def test_check_is_pair_4b(): read1 = Sequence(name='seq/1', sequence='AAA') read2 = Sequence(name='seq/2', quality='###', sequence='AAA') try: check_is_pair(read1, read2) assert False # check_is_pair should fail here. except ValueError: pass
def test_SanitizedFastxParser_invalid(create_fastx): '''Test that parser detects invalid sequence''' expected = [Sequence('seq1/1', 'XXX'), Sequence('seq1/2', 'A' * 4)] parser = SanitizedFastxParser(create_fastx(expected)) result = list(parser) assert parser.n_bad == 1 assert len(result) == 1 assert result[0].sequence == 'A' * 4
def test_SanitizedFastxParser_no_convert_Ns(create_fastx): expected = [Sequence('seq1/1', 'N' * 5), Sequence('seq1/2', 'N' * 4)] parser = SanitizedFastxParser(create_fastx(expected), alphabet='DNAN_SIMPLE', convert_n=False) result = list(parser) assert parser.n_bad == 0 assert len(result) == 2 assert result[0].sequence == 'N' * 5 assert result[1].sequence == 'N' * 4
def test_SanitizedFastxParser_convert_Ns(create_fastx): '''Test that A's are converted to N's''' expected = [Sequence('seq1/1', 'N' * 5), Sequence('seq1/2', 'N' * 4)] parser = SanitizedFastxParser(create_fastx(expected), alphabet='DNAN_SIMPLE') result = list(parser) assert parser.n_bad == 0 assert len(result) == 2 assert result[0].sequence == 'A' * 5 assert result[1].sequence == 'A' * 4
def test_FastxParser(create_fastx): expected = [ Sequence('seq1/1', 'A' * 5), Sequence('seq1/2', 'A' * 4), Sequence('seq2/1', 'A' * 5), Sequence('seq3/1', 'A' * 3), Sequence('seq3/2', 'A' * 5) ] parser = FastxParser(create_fastx(expected)) result = list(parser) assert len(expected) == len(result) assert all((x == y) for x, y in zip(expected, result))
def test_SanitizedFastxParser_lowercase(create_fastx): reads = [ Sequence('seq1/1', 'acgtn'), Sequence('seq1/2', 'AcGtN'), Sequence('seq1/2', 'aCgTn') ] parser = SanitizedFastxParser(create_fastx(reads), convert_n=False) result = list(parser) assert result[0].sequence == 'ACGTN' assert result[1].sequence == 'ACGTN' assert result[2].sequence == 'ACGTN'
def test_init_sequence_only(self): s = Sequence(sequence=self.sequence) assert s.name is None assert s.sequence is None assert s.quality is None assert s.description is None assert s.cleaned_seq is None
def test_init_name_and_sequence(self): s = Sequence(name=self.name, sequence=self.sequence) assert s.name == self.name assert s.sequence == self.sequence assert s.quality is None assert s.description is None assert s.cleaned_seq == self.sequence
def test_init_with_cleaned_seq(self): s = Sequence(name=self.name, sequence=self.sequence, cleaned_seq=self.cleaned) assert s.name == self.name assert s.sequence == self.sequence assert s.quality is None assert s.description is None assert s.cleaned_seq == self.cleaned
def testRequirePairedAndMinLength_NeitherPass(self, parser, create_fastx): reads = [ Sequence('seq1/1', 'A' * 5), Sequence('seq1/2', 'A' * 4), Sequence('seq3/1', 'A' * 3), Sequence('seq3/2', 'A' * 3) ] reader = BrokenPairedReader(parser(create_fastx(reads)), min_length=4, require_paired=True) result = [] for n, paired, first, second in reader: result.append((first, second)) assert n == 0 assert len(result) == 1 l, r = result[0] assert l == reads[0] assert r == reads[1]
def test_check_is_pair_7(): read1 = Sequence(name='seq/2', sequence='AAA') read2 = Sequence(name='seq/1', sequence='AAA') assert not check_is_pair(read1, read2)
def test_check_is_pair_3_fa(): read1 = Sequence(name='seq 1::', sequence='AAA') read2 = Sequence(name='seq 2::', sequence='AAA') assert check_is_pair(read1, read2)
def test_check_is_pair_3_broken_fq_2(): read1 = Sequence(name='seq 1::', quality='###', sequence='AAA') read2 = Sequence(name='seq', quality='###', sequence='AAA') assert not check_is_pair(read1, read2)
def test_check_is_pair_2(): read1 = Sequence(name='seq/1', quality='###', sequence='AAA') read2 = Sequence(name='seq/2', quality='###', sequence='AAA') assert check_is_pair(read1, read2)
class Test_BrokenPairedReader(object): reads = [ Sequence(name='seq1/1', sequence='A' * 5), Sequence(name='seq1/2', sequence='A' * 4), Sequence(name='seq2/1', sequence='A' * 5), Sequence(name='seq3/1', sequence='A' * 3), Sequence(name='seq3/2', sequence='A' * 5) ] @pytest.mark.parametrize("parser", [FastxParser, SanitizedFastxParser]) def testDefault(self, parser, create_fastx): x, n, m = gather_paired(parser(create_fastx(self.reads)), min_length=1) expected = [('seq1/1', 'seq1/2'), ('seq2/1', None), ('seq3/1', 'seq3/2')] assert x == expected, x assert m == 3 assert n == 3, n @pytest.mark.parametrize("parser", [FastxParser, SanitizedFastxParser]) def testMinLength(self, parser, create_fastx): x, n, m = gather_paired(parser(create_fastx(self.reads)), min_length=3) expected = [('seq1/1', 'seq1/2'), ('seq2/1', None), ('seq3/1', 'seq3/2')] assert x == expected, x assert m == 3 assert n == 3, n @pytest.mark.parametrize("parser", [FastxParser, SanitizedFastxParser]) def testMinLength_2(self, parser, create_fastx): x, n, m = gather_paired(parser(create_fastx(self.reads)), min_length=4) expected = [('seq1/1', 'seq1/2'), ('seq2/1', None), (None, 'seq3/2')] assert x == expected, x assert m == 3 assert n == 3, n @pytest.mark.parametrize("parser", [FastxParser, SanitizedFastxParser]) def testForceSingle(self, parser, create_fastx): x, n, m = gather_paired(parser(create_fastx(self.reads)), force_single=True) expected = [('seq1/1', None), ('seq1/2', None), ('seq2/1', None), ('seq3/1', None), ('seq3/2', None)] assert x == expected, x assert m == 5 assert n == 4, n @pytest.mark.parametrize("parser", [FastxParser, SanitizedFastxParser]) def testForceSingleAndMinLength(self, parser, create_fastx): x, n, m = gather_paired(parser(create_fastx(self.reads)), min_length=5, force_single=True) expected = [('seq1/1', None), ('seq2/1', None), ('seq3/2', None)] assert x == expected, x assert m == 3, m assert n == 2, n @pytest.mark.parametrize("parser", [FastxParser, SanitizedFastxParser]) def testRequirePairedAndMinLength_HalfPass(self, parser, create_fastx): reads = [ Sequence('seq1/1', 'A' * 5), Sequence('seq1/2', 'A' * 4), Sequence('seq3/1', 'A' * 3), Sequence('seq3/2', 'A' * 5) ] reader = BrokenPairedReader(parser(create_fastx(reads)), min_length=4, require_paired=True) result = [] for n, paired, first, second in reader: result.append((first, second)) assert len(result) == 1 assert n == 0 l, r = result[0] assert l == reads[0] assert r == reads[1] @pytest.mark.parametrize("parser", [FastxParser, SanitizedFastxParser]) def testRequirePairedAndMinLength_SwappedHalfPass(self, parser, create_fastx): reads = [ Sequence('seq1/1', 'A' * 5), Sequence('seq1/2', 'A' * 4), Sequence('seq3/1', 'A' * 5), Sequence('seq3/2', 'A' * 3) ] reader = BrokenPairedReader(parser(create_fastx(reads)), min_length=4, require_paired=True) result = [] for n, paired, first, second in reader: result.append((first, second)) assert n == 0 assert len(result) == 1 l, r = result[0] assert l == reads[0] assert r == reads[1] @pytest.mark.parametrize("parser", [FastxParser, SanitizedFastxParser]) def testRequirePairedAndMinLength_NeitherPass(self, parser, create_fastx): reads = [ Sequence('seq1/1', 'A' * 5), Sequence('seq1/2', 'A' * 4), Sequence('seq3/1', 'A' * 3), Sequence('seq3/2', 'A' * 3) ] reader = BrokenPairedReader(parser(create_fastx(reads)), min_length=4, require_paired=True) result = [] for n, paired, first, second in reader: result.append((first, second)) assert n == 0 assert len(result) == 1 l, r = result[0] assert l == reads[0] assert r == reads[1] @pytest.mark.parametrize("parser", [FastxParser, SanitizedFastxParser]) def testRequirePairedAndMinLength_SwappedNeitherPass( self, parser, create_fastx): reads = [ Sequence('seq1/1', 'A' * 3), Sequence('seq1/2', 'A' * 3), Sequence('seq3/1', 'A' * 5), Sequence('seq3/2', 'A' * 5) ] reader = BrokenPairedReader(parser(create_fastx(reads)), min_length=4, require_paired=True) result = [] for n, paired, first, second in reader: result.append((first, second)) assert n == 0 assert len(result) == 1 l, r = result[0] assert l == reads[2] assert r == reads[3]