def __call__(self, seqs): 'It splits a list of sequences with the provided linkers' seq_fhand = write_seqs(seqs, file_format='fasta') seq_fhand.flush() min_identity = 87.0 min_len = 13 filters = [{'kind': 'min_length', 'min_num_residues': min_len, 'length_in_query': False, 'filter_match_parts': True}, {'kind': 'score_threshold', 'score_key': 'identity', 'min_score': min_identity}] matcher = BlasterForFewSubjects(seq_fhand.name, self.linkers, program='blastn', filters=filters, params={'task': 'blastn-short'}, elongate_for_global=True, seqs_type=NUCL) new_seqs = [] for seq in seqs: segments = matcher.get_matched_segments_for_read(get_name(seq)) if segments is not None: split_seqs = self._split_by_mate_linker(seq, segments) else: split_seqs = [seq] for seq in split_seqs: new_seqs.append(seq) return new_seqs
def __call__(self, seqs): 'It splits a list of sequences with the provided linkers' seq_fhand = write_seqs(seqs, file_format='fasta') seq_fhand.flush() min_identity = 87.0 min_len = 13 filters = [{ 'kind': 'min_length', 'min_num_residues': min_len, 'length_in_query': False, 'filter_match_parts': True }, { 'kind': 'score_threshold', 'score_key': 'identity', 'min_score': min_identity }] matcher = BlasterForFewSubjects(seq_fhand.name, self.linkers, program='blastn', filters=filters, params={'task': 'blastn-short'}, elongate_for_global=True, seqs_type=NUCL) new_seqs = [] for seq in seqs: segments = matcher.get_matched_segments_for_read(get_name(seq)) if segments is not None: split_seqs = self._split_by_mate_linker(seq, segments) else: split_seqs = [seq] for seq in split_seqs: new_seqs.append(seq) return new_seqs
def _pre_trim(self, trim_packet): seqs = [s for seqs in trim_packet[SEQS_PASSED]for s in seqs] db_fhand = write_seqs(seqs, file_format='fasta') db_fhand.flush() params = {'task': 'blastn-short', 'expect': '0.0001'} filters = [{'kind': 'score_threshold', 'score_key': 'identity', 'min_score': 87}, {'kind': 'min_length', 'min_num_residues': 13, 'length_in_query': False}] self._matcher = BlasterForFewSubjects(db_fhand.name, self.oligos, program='blastn', filters=filters, params=params, elongate_for_global=True)
def test_matching_segments(self): 'It tests the detection of oligos in sequence files' seq_5 = 'CTAGTCTAGTCGTAGTCATGGCTGTAGTCTAGTCTACGATTCGTATCAGTTGTGTGAC' mate_fhand = create_a_matepair_file() linkers = assing_kind_to_seqs(SEQRECORD, LINKERS, None) expected_region = (len(seq_5), len(seq_5 + TITANIUM_LINKER) - 1) matcher = BlasterForFewSubjects(mate_fhand.name, linkers, program='blastn', elongate_for_global=True) linker_region = matcher.get_matched_segments_for_read('seq1')[0] assert [expected_region] == linker_region
def test_matching_segments(self): 'It tests the detection of oligos in sequence files' seq_5 = 'CTAGTCTAGTCGTAGTCATGGCTGTAGTCTAGTCTACGATTCGTATCAGTTGTGTGAC' mate_fhand = create_a_matepair_file() linkers = [SeqItem('titan', ['>titan\n', TITANIUM_LINKER + '\n']), SeqItem('flx', ['>flx\n', FLX_LINKER + '\n'])] linkers = assing_kind_to_seqs(SEQITEM, linkers, 'fasta') expected_region = (len(seq_5), len(seq_5 + TITANIUM_LINKER) - 1) matcher = BlasterForFewSubjects(mate_fhand.name, linkers, program='blastn', elongate_for_global=True) linker_region = matcher.get_matched_segments_for_read('seq1')[0] assert [expected_region] == linker_region
class TrimWithBlastShort(_BaseTrim): 'It trims adaptors with the blast short algorithm' def __init__(self, oligos): 'The initiator' self.oligos = oligos super(TrimWithBlastShort, self).__init__() def _pre_trim(self, trim_packet): seqs = [s for seqs in trim_packet[SEQS_PASSED] for s in seqs] db_fhand = write_seqs(seqs, file_format='fasta') db_fhand.flush() params = {'task': 'blastn-short', 'expect': '0.0001'} filters = [{ 'kind': 'score_threshold', 'score_key': 'identity', 'min_score': 87 }, { 'kind': 'min_length', 'min_num_residues': 13, 'length_in_query': False }] self._matcher = BlasterForFewSubjects(db_fhand.name, self.oligos, program='blastn', filters=filters, params=params, elongate_for_global=True) def _do_trim(self, seq): 'It trims the masked segments of the SeqWrappers.' segments = self._matcher.get_matched_segments_for_read(get_name(seq)) if segments is not None: _add_trim_segments(segments[0], seq, kind=VECTOR) return seq
class FilterBlastShort(_BaseFilter): 'It filters a seq if there is a match against the given oligos' def __init__(self, oligos, failed_drags_pair=True, reverse=False): self.oligos = oligos super(FilterBlastShort, self).__init__(reverse=reverse, failed_drags_pair=failed_drags_pair) def _setup_checks(self, filterpacket): seqs = [s for seqs in filterpacket[SEQS_PASSED]for s in seqs] # we create a blastdb for these reads and then we use the oligos # as the blast query db_fhand = write_seqs(seqs, file_format='fasta') db_fhand.flush() params = {'task': 'blastn-short', 'expect': '0.0001'} filters = [{'kind': 'score_threshold', 'score_key': 'identity', 'min_score': 87}, {'kind': 'min_length', 'min_num_residues': 13, 'length_in_query': False}] self._matcher = BlasterForFewSubjects(db_fhand.name, self.oligos, program='blastn', filters=filters, params=params, elongate_for_global=False) def _do_check(self, seq): segments = self._matcher.get_matched_segments_for_read(get_name(seq)) return True if segments is None else False
class TrimWithBlastShort(_BaseTrim): "It trims adaptors with the blast short algorithm" def __init__(self, oligos): "The initiator" self.oligos = oligos super(TrimWithBlastShort, self).__init__() def _pre_trim(self, trim_packet): seqs = [s for seqs in trim_packet[SEQS_PASSED] for s in seqs] db_fhand = write_seqs(seqs, file_format="fasta") db_fhand.flush() params = {"task": "blastn-short", "expect": "0.0001"} filters = [ {"kind": "score_threshold", "score_key": "identity", "min_score": 87}, {"kind": "min_length", "min_num_residues": 13, "length_in_query": False}, ] self._matcher = BlasterForFewSubjects( db_fhand.name, self.oligos, program="blastn", filters=filters, params=params, elongate_for_global=True ) def _do_trim(self, seq): "It trims the masked segments of the SeqWrappers." segments = self._matcher.get_matched_segments_for_read(get_name(seq)) if segments is not None: _add_trim_segments(segments[0], seq, kind=VECTOR) return seq
class TrimWithBlastShort(_BaseTrim): 'It trims adaptors with the blast short algorithm' def __init__(self, oligos): 'The initiator' self.oligos = oligos super(TrimWithBlastShort, self).__init__() def _pre_trim(self, trim_packet): seqs = [s for seqs in trim_packet[SEQS_PASSED]for s in seqs] db_fhand = write_seqs(seqs, file_format='fasta') db_fhand.flush() params = {'task': 'blastn-short', 'expect': '0.0001'} filters = [{'kind': 'score_threshold', 'score_key': 'identity', 'min_score': 87}, {'kind': 'min_length', 'min_num_residues': 13, 'length_in_query': False}] self._matcher = BlasterForFewSubjects(db_fhand.name, self.oligos, program='blastn', filters=filters, params=params, elongate_for_global=True) def _do_trim(self, seq): 'It trims the masked segments of the SeqWrappers.' segments = self._matcher.get_matched_segments_for_read(get_name(seq)) if segments is not None: _add_trim_segments(segments[0], seq, kind=VECTOR) return seq
def _setup_checks(self, filterpacket): seqs = [s for seqs in filterpacket[SEQS_PASSED]for s in seqs] # we create a blastdb for these reads and then we use the oligos # as the blast query db_fhand = write_seqs(seqs, file_format='fasta') db_fhand.flush() params = {'task': 'blastn-short', 'expect': '0.0001'} filters = [{'kind': 'score_threshold', 'score_key': 'identity', 'min_score': 87}, {'kind': 'min_length', 'min_num_residues': 13, 'length_in_query': False}] self._matcher = BlasterForFewSubjects(db_fhand.name, self.oligos, program='blastn', filters=filters, params=params, elongate_for_global=False)
class TrimNexteraAdapters(_BaseTrim): "It trims from Nextera adaptors found with blast short algorithm to 3'end" "If adapter is at one end and it is not complete, it trims more bases" def __init__(self, oligos): 'The initiator' self.oligos = oligos super(TrimNexteraAdapters, self).__init__() def _pre_trim(self, trim_packet): seqs = [s for seqs in trim_packet[SEQS_PASSED]for s in seqs] db_fhand = write_seqs(seqs, file_format='fasta') db_fhand.flush() params = {'task': 'blastn-short', 'expect': '0.0001'} filters = [{'kind': 'score_threshold', 'score_key': 'identity', 'min_score': 87}, {'kind': 'min_length', 'min_num_residues': 13, 'length_in_query': False}] self._matcher = BlasterForFewSubjects(db_fhand.name, self.oligos, program='blastn', filters=filters, params=params, elongate_for_global=True) def _do_trim(self, seq): 'It trims the masked segments of the SeqWrappers.' segments = self._matcher.get_matched_segments_for_read(get_name(seq)) if segments is not None: segments = [(segment[0], get_length(seq) - 1) for segment in segments[0]] _add_trim_segments(segments, seq, kind=OTHER) return seq
def __call__(self, seqs): 'It trims the masked segments of the SeqWrappers.' db_fhand = write_seqs(seqs, file_format='fasta') db_fhand.flush() params = {'task': 'blastn-short', 'expect': '0.0001'} filters = [{'kind': 'score_threshold', 'score_key': 'identity', 'min_score': 89}, {'kind': 'min_length', 'min_num_residues': 13, 'length_in_query': False}] matcher = BlasterForFewSubjects(db_fhand.name, self.oligos, program='blastn', filters=filters, params=params, elongate_for_global=True) for seq in seqs: segments = matcher.get_matched_segments_for_read(get_name(seq)) if segments is not None: _add_trim_segments(segments[0], seq, kind=VECTOR) return seqs
def test_matching_segments(self): 'It tests the detection of oligos in sequence files' seq_5 = 'CTAGTCTAGTCGTAGTCATGGCTGTAGTCTAGTCTACGATTCGTATCAGTTGTGTGAC' mate_fhand = create_a_matepair_file() linkers = [ SeqItem('titan', ['>titan\n', TITANIUM_LINKER + '\n']), SeqItem('flx', ['>flx\n', FLX_LINKER + '\n']) ] linkers = assing_kind_to_seqs(SEQITEM, linkers, 'fasta') expected_region = (len(seq_5), len(seq_5 + TITANIUM_LINKER) - 1) matcher = BlasterForFewSubjects(mate_fhand.name, linkers, program='blastn', elongate_for_global=True) linker_region = matcher.get_matched_segments_for_read('seq1')[0] assert [expected_region] == linker_region
def _pre_trim(self, trim_packet): seqs = [s for seqs in trim_packet[SEQS_PASSED] for s in seqs] db_fhand = write_seqs(seqs, file_format="fasta") db_fhand.flush() params = {"task": "blastn-short", "expect": "0.0001"} filters = [ {"kind": "score_threshold", "score_key": "identity", "min_score": 87}, {"kind": "min_length", "min_num_residues": 13, "length_in_query": False}, ] self._matcher = BlasterForFewSubjects( db_fhand.name, self.oligos, program="blastn", filters=filters, params=params, elongate_for_global=True )
class TrimNexteraAdapters(_BaseTrim): "It trims from Nextera adaptors found with blast short algorithm to 3'end" "If adapter is at one end and it is not complete, it trims more bases" def __init__(self, oligos): 'The initiator' self.oligos = oligos super(TrimNexteraAdapters, self).__init__() def _pre_trim(self, trim_packet): seqs = [s for seqs in trim_packet[SEQS_PASSED] for s in seqs] db_fhand = write_seqs(seqs, file_format='fasta') db_fhand.flush() params = {'task': 'blastn-short', 'expect': '0.0001'} filters = [{ 'kind': 'score_threshold', 'score_key': 'identity', 'min_score': 87 }, { 'kind': 'min_length', 'min_num_residues': 13, 'length_in_query': False }] self._matcher = BlasterForFewSubjects(db_fhand.name, self.oligos, program='blastn', filters=filters, params=params, elongate_for_global=True) def _do_trim(self, seq): 'It trims the masked segments of the SeqWrappers.' segments = self._matcher.get_matched_segments_for_read(get_name(seq)) if segments is not None: segments = [(segment[0], get_length(seq) - 1) for segment in segments[0]] _add_trim_segments(segments, seq, kind=OTHER) return seq