def _do_check(self, seq): seq_object = seq.object try: quals = seq_object.letter_annotations['phred_quality'] except KeyError: msg = 'Some of the input sequences do not have qualities: {}' msg = msg.format(get_name(seq)) raise WrongFormatError(msg) if self.ignore_masked: str_seq = str(seq_object.seq) seg_quals = [quals[segment[0]: segment[1] + 1] for segment in get_uppercase_segments(str_seq)] qual = sum(sum(q) * len(q) for q in seg_quals) / len(quals) else: qual = sum(quals) / len(quals) return True if qual >= self.threshold else False
def test_masked_locations(): "It test the masked locations function" assert list(get_uppercase_segments("aaATTTTTTaa")) == [(2, 8)] assert list(get_uppercase_segments("aaATTTaTTaa")) == [(2, 5), (7, 8)] assert list(get_uppercase_segments("AAATaaa")) == [(0, 3)] assert list(get_uppercase_segments("aaaaAAAA")) == [(4, 7)] seq = "AATTaaTTaaTTT" assert list(get_uppercase_segments(seq)) == [(0, 3), (6, 7), (10, 12)] assert list(get_uppercase_segments("AATT")) == [(0, 3)] assert not list(get_uppercase_segments("aatt"))
def test_masked_locations(): 'It test the masked locations function' assert list(get_uppercase_segments('aaATTTTTTaa')) == [(2, 8)] assert list(get_uppercase_segments('aaATTTaTTaa')) == [(2, 5), (7, 8)] assert list(get_uppercase_segments('AAATaaa')) == [(0, 3)] assert list(get_uppercase_segments('aaaaAAAA')) == [(4, 7)] seq = 'AATTaaTTaaTTT' assert list(get_uppercase_segments(seq)) == [(0, 3), (6, 7), (10, 12)] assert list(get_uppercase_segments('AATT')) == [(0, 3)] assert not list(get_uppercase_segments('aatt'))
def _do_trim(self, seq): str_seq = get_str_seq(seq) unmasked_segments = get_uppercase_segments(str_seq) segment = get_longest_segment(unmasked_segments) if segment is not None: segments = [] if segment[0] != 0: segments.append((0, segment[0] - 1)) len_seq = len(str_seq) if segment[1] != len_seq - 1: segments.append((segment[1] + 1, len_seq - 1)) _add_trim_segments(segments, seq, kind=OTHER) else: segments = [(0, len(seq))] _add_trim_segments(segments, seq, kind=OTHER) return seq
def _do_check(self, seq): seq_object = seq.object try: quals = seq_object.letter_annotations['phred_quality'] except KeyError: msg = 'Some of the input sequences do not have qualities: {}' msg = msg.format(get_name(seq)) raise WrongFormatError(msg) if self.ignore_masked: str_seq = str(seq_object.seq) seg_quals = [ quals[segment[0]:segment[1] + 1] for segment in get_uppercase_segments(str_seq) ] qual = sum(sum(q) * len(q) for q in seg_quals) / len(quals) else: qual = sum(quals) / len(quals) return True if qual >= self.threshold else False