def __call__(self, seqs):
        'It splits a list of sequences with the provided linkers'
        stats = self._stats
        stats[PROCESSED_PACKETS] += 1
        seq_fhand = write_seqrecords(seqs, file_format='fasta')
        seq_fhand.flush()

        min_identity = 87.0
        min_len = 17
        filters = [{'kind': 'min_length', 'min_num_residues': min_len,
                    'length_in_query': False, 'filter_match_parts': True},
                   {'kind': 'score_threshold', 'score_key': 'identity',
                   'min_score': min_identity}]

        matcher = BlastMatcher(seq_fhand.name, self.linkers,
                               program='blastn', filters=filters,
                               params={'task': 'blastn-short'},
                               elongate_for_global=True)
        new_seqs = []
        for seqrec in seqs:
            stats[PROCESSED_SEQS] += 1
            segments = matcher.get_matched_segments_for_read(seqrec.id)
            if segments is not None:
                split_seqs = self._split_by_mate_linker(seqrec, segments)
            else:
                split_seqs = [seqrec]
            for seq in split_seqs:
                new_seqs.append(seq)
                stats[YIELDED_SEQS] += 1
        return new_seqs
    def test_matching_segments(self):
        'It tests the detection of oligos in sequence files'
        seq_5 = 'CTAGTCTAGTCGTAGTCATGGCTGTAGTCTAGTCTACGATTCGTATCAGTTGTGTGAC'
        mate_fhand = create_a_matepair_file()

        expected_region = (len(seq_5), len(seq_5 + TITANIUM_LINKER) - 1)
        matcher = BlastMatcher(mate_fhand.name, LINKERS, program='blastn',
                               elongate_for_global=True)
        linker_region = matcher.get_matched_segments_for_read('seq1')[0]
        assert [expected_region] == linker_region
Exemple #3
0
 def __call__(self, seqrecords):
     'It trims the masked segments of the seqrecords.'
     stats = self.stats
     db_fhand = write_seqrecords(seqrecords, file_format='fasta')
     db_fhand.flush()
     params = {'task': 'blastn-short', 'expect': '0.0001'}
     filters = [{'kind': 'score_threshold', 'score_key': 'identity',
                 'min_score': 89},
                {'kind': 'min_length', 'min_num_residues': 13,
                 'length_in_query': False}]
     matcher = BlastMatcher(db_fhand.name, self.oligos,
                            program='blastn', filters=filters,
                            params=params, elongate_for_global=True)
     for seqrec in seqrecords:
         stats[PROCESSED_SEQS] += 1
         segments = matcher.get_matched_segments_for_read(seqrec.id)
         if segments is not None:
             _add_trim_segments(segments[0], seqrec, kind=VECTOR)
         stats[YIELDED_SEQS] += 1
     return seqrecords