def test_forward_words():
        'It test that we can match words against in the same orientation'

        seq = 'gCACAggTGTGggTATAgg'
        seq = SeqWithQuality(seq=Seq(seq))

        result = match_words(seq, ['CACA', 'TATA', 'KK'])[0]
        assert result['query'] == seq

        #The match por CACA
        match = result['matches'][0]
        assert match['subject'] == 'CACA'
        assert match['start'] == 1
        assert match['end'] == 10
        assert len(match['match_parts']) == 2
        #the reverse match part
        assert match['match_parts'][1] == {'query_start':7,
                                           'query_end':10,
                                           'query_strand':1,
                                           'subject_start':0,
                                           'subject_end':3,
                                           'subject_strand':-1}

        #The match por TATA
        match = result['matches'][1]
        assert match['subject'] == 'TATA'
        assert match['start'] == 13
        assert match['end'] == 16
        assert len(match['match_parts']) == 2

        #No matches for KK
        assert len(result['matches']) == 2
예제 #2
0
    def strip_words_by_matching(sequence):
        """It strips the given words from a sequence.

        It returns a striped sequence with the longest segment without the
        words.
        """
        if sequence is None:
            return None
        if not words:
            return sequence

        alignments = match_words(sequence, words)
        if not alignments:
            return sequence
        locations = _get_non_matched_locations(alignments)
        segments = _get_longest_non_matched_seq_region_limits(sequence, locations)
        if segments is None:
            return None
        segments = _get_non_matched_from_matched_locations([segments], len(sequence))
        _add_trim_segments(segments, sequence)
        return sequence