Exemple #1
0
 def test_write_sequence_object(self):
     fmt = FastaFormat()
     with open_output(self.path, "w") as fw:
         fw.write(fmt.format(Sequence("name", "CCATA")))
         fw.write(fmt.format(Sequence("name2", "HELLO")))
     with open(self.path) as t:
         assert t.read() == '>name\nCCATA\n>name2\nHELLO\n'
Exemple #2
0
def test_error_correction():
    a1 = 'AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTC'
    a2 = 'AGATCGGAAGAGCACACGTCTGAACTCCAGTCACGAGTTA'
    frag = 'CCAAGCAGACATTCACTCAGATTGCA'
    correct_frag = 'CCAAGTAGACATTCGCTCAGATTGCA'
    r1 = list(frag)
    # C>T at pos 6
    r1[5] = 'T'
    q1 = ['#'] * 40
    # quality of read1 > quality of read2 at pos 6
    q1[5] = 'A'
    r1 = (''.join(r1) + a1)[0:40]
    q1 = ''.join(q1)
    r2 = list(frag)
    # A>G at pos 15
    r2[14] = 'G'
    q2 = ['#'] * 40
    # quality of read2 > quality of read1 at pos 11
    q2[len(frag) - 15] = 'A'
    r2 = reverse_complement(reverse_complement(a2) + ''.join(r2))[0:40]
    q2 = ''.join(q2)
    read1 = Sequence('foo', r1, q1)
    read2 = Sequence('foo', r2, q2)
    parser = AdapterParser()
    adapter1 = parser.parse_from_spec(a1)
    adapter2 = parser.parse_from_spec(a2)
    cutter = InsertAdapterCutter(adapter1, adapter2, mismatch_action='liberal')
    new_read1, new_read2 = cutter(read1, read2)
    assert len(new_read1) == 26
    assert new_read1.insert_overlap
    assert new_read1.sequence == correct_frag
    assert len(new_read2) == 26
    assert new_read2.insert_overlap
    assert new_read2.sequence == reverse_complement(correct_frag)
Exemple #3
0
def test_mismatched_adapter_overlaps():
    """
    This is a test case from real data. The adapter overlaps 1 less bp
    on the fw read than on the reverse read. We want to make sure that
    the extra 'A' base gets trimmed.
    adapter                                                                                                                                                          GATCGGAAGAGCACACGTCTGAACTCCAGTCACCAGATCATCTCGTATGCCGTCTTCTGCTTG
    actual                                                               TTGTTTTTATGGAGAGAGTTTTAAGGTTTATTTTAGTTTTAAAGGATATTGTAGGTTAGAGGGAAAGTGTATGATGAAGGTATATATTGGTAGATCGGAAGAGCACACGTCTGAACTTCAGTCAC
    actual rc                          TATGTTCTTTCCCTTCACGTCTCTCTTCGGATCTTTATTGTGATGAGTTGAAAATAAAGGTTAAGTATAGATAAAAAAGTTATTATAGTTTAGAGGGTAAGTGTATGATGGAGTAAAATATTGGT
    adapter rc AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
    """
    r1 = 'TTGTTTTTATGGAGAGAGTTTTAAGGTTTATTTTAGTTTTAAAGGATATTGTAGGTTAGAGGGAAAGTGTATGATGAAGGTATATATTGGTAGATCGGAAGAGCACACGTCTGAACTTCAGTCAC'
    r2 = 'ACCAATATTTTACTCCATCATACACTTACCCTCTAAACTATAATAACTTTTTTATCTATACTTAACCTTTATTTTCAACTCATCACAATAAAGATCCGAAGAGAGACGTGAAGGGAAAGAACATA'
    a1 = "GATCGGAAGAGCACACGTCTGAACTCCAGTCACCAGATCATCTCGTATGCCGTCTTCTGCTTG"  # TruSeq index 7
    a2 = "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT"  # TruSeq universal
    parser = AdapterParser()
    adapter1 = parser.parse_from_spec(a1)
    adapter2 = parser.parse_from_spec(a2)
    # the data has a fairly high error rate
    cutter = InsertAdapterCutter(adapter1,
                                 adapter2,
                                 max_insert_mismatch_frac=0.3,
                                 max_adapter_mismatch_frac=0.3)
    read1 = Sequence('foo', r1, '#' * 125)
    read2 = Sequence('foo', r2, '#' * 125)
    new_read1, new_read2 = cutter(read1, read2)
    assert (len(new_read1)) == 91
    assert (len(new_read2)) == 91
    assert (
        new_read1.sequence ==
        'TTGTTTTTATGGAGAGAGTTTTAAGGTTTATTTTAGTTTTAAAGGATATTGTAGGTTAGAGGGAAAGTGTATGATGAAGGTATATATTGGT'
    )
Exemple #4
0
def test_error_correction_unequal_read_lengths():
    # Test case for issue #51
    read1 = Sequence(
        'read1',
        'TTTGCAGCTTTTGTAGACAAGTGCTGTGCAGCTGATGTCAAAGAGACCTGCTTTGCTCTGGAGGGTCCAAAACTTGTAGCCTCAACCCGAGAAGCCATAGCCTAA',
        'CCCCCFCGGGGGBFFAFC<?BEADCCF<FFFFGFFDFDFFGGGGCFGGC?DFFFEC;,===??DG==DDDFFFFG8DDD7+5;;DF*=)))10885D**58>6=0',
    )
    read2 = Sequence(
        'read1',
        'ATAGGCTATGGCTTCTCGAGTTGAAGCTACAAGTTTTGGACCCTCCAGAGCAAAGCAGGTCTCTTTGACATCAGCTGCACAGCACTTGTCTACAAAAGCTGCAAAAGATCGGAAGAGCGTCTCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGACGTATCATTAAAAAAAAAAACACATCACATCAACAAGATAACACGACTTCTCCATCCACAGTACCGATGACCTCAACATTAGT',
        'CCCCCG@FCFGGCFGGGGFEFGFGGFCFGGGFGFGGGGGGGGGGGGGGGGGGGGGGGGGGG9FGGGGGGGFGDFFGGGGGGGGGGGGGGGGG8;>@?@FEGGGGGGGGGGGGGGGGGGGGG=DDFAEFFFGF>B>EA):DFFBDFFB6CDEDDD9=99DD>55)580:A5)*)*;DD>**51:0118):)4))1***0:*)*)((***0*.(((((*)/.)1/(6((()1.)(((6).-----8<:C<73',
    )
    aligner = InsertAligner(
        'AGATCGGAAGAGCACACGTCTGAACTCCAGTCACACAGTGATCTCGTATGCCGTCTTCTGCTTG',
        'AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT',
    )
    insert_match, adapter_match1, adapter_match2 = aligner.match_insert(
        read1.sequence, read2.sequence)
    ec = ErrorCorrectorMixin('N')
    ec.correct_errors(read1, read2, insert_match, truncate_seqs=True)
    assert read1.corrected == 3
    assert read2.corrected == 3
    for i in (80, 86, 104):
        assert read1.sequence[
            i] == 'N', 'Read 1 not corrected to N at {}'.format(i)
        assert read2.sequence[
            104 - i] == 'N', 'Read 2 not corrected to N at {}'.format(104 - i)
Exemple #5
0
def test_overwrite_read():
    overwrite = OverwriteRead(20, 40, 10)
    lowseq = 'ACGT' * 5
    highseq = 'TCAG' * 5
    # mean lowq > 20, mean highq > 40
    lowq = (11, 31, 16, 24, 16, 20, 17, 19, 21, 28) * 2
    highq = (22, 62, 32, 48, 32, 40, 34, 38, 42, 56) * 2
    read1 = Sequence('foo', lowseq, ints2quals(lowq))
    read2 = Sequence('foo', highseq, ints2quals(highq))
    new_read1, new_read2 = overwrite(read1, read2)
    assert new_read1.sequence == lowseq
    assert new_read1.qualities == ints2quals(lowq)
    assert new_read2.sequence == highseq
    assert new_read2.qualities == ints2quals(highq)
    assert new_read1.corrected == new_read2.corrected == 0
    # mean lowq < 20, mean highq > 40
    lowq = tuple(i - 1 for i in lowq)
    read1 = Sequence('foo', lowseq, ints2quals(lowq))
    new_read1, new_read2 = overwrite(read1, read2)
    assert new_read1.sequence == rc(highseq)
    assert new_read1.qualities == ints2quals(reversed(highq))
    assert new_read2.sequence == highseq
    assert new_read2.qualities == ints2quals(highq)
    assert new_read1.corrected == new_read2.corrected == 1
    # mean lowq < 20, mean highq < 40
    highq = tuple(i - 1 for i in highq)
    read2 = Sequence('foo', highseq, ints2quals(highq))
    new_read1, new_read2 = overwrite(read1, read2)
    assert new_read1.sequence == lowseq
    assert new_read1.qualities == ints2quals(lowq)
    assert new_read2.sequence == highseq
    assert new_read2.qualities == ints2quals(highq)
    assert new_read1.corrected == new_read2.corrected == 0
Exemple #6
0
def test_overlapping():
    trimmer = MergeOverlapping(min_overlap=10, error_rate=0.1)
    a1 = 'AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTC'
    a2 = reverse_complement('AGATCGGAAGAGCACACGTCTGAACTCCAGTCACGAGTTA')
    frag = 'CCAAGCAGACATTCACTCAGATTGCA'
    r1 = (frag + a1)[0:40]
    q1 = '#' * 40
    r2 = reverse_complement(a2 + frag)[0:40]
    q2 = '!' * 40
    parser = AdapterParser()
    adapter1 = parser.parse_from_spec(a1)
    adapter2 = parser.parse_from_spec(a2)
    cutter = AdapterCutter([adapter1, adapter2])
    read1 = Sequence('foo', r1, q1)
    read1 = cutter(read1)
    assert len(read1) == 26
    read2 = Sequence('foo', r2, q2)
    read2 = cutter(read2)
    assert len(read2) == 26
    # complete overlap
    read1_merged, read2_merged = trimmer(read1, read2)
    assert read1_merged.merged
    assert read2_merged is None
    assert read1 == read1_merged
    # partial overlap
    read1.merged = False
    read2 = read2.subseq(0, 24)[2]
    read1_merged, read2_merged = trimmer(read1, read2)
    assert read1_merged.merged
    assert read2_merged is None
    assert read1 == read1_merged
    # partial overlap r1, r2
    read1.merged = False
    read1 = read1.subseq(0, 24)[2]
    read1_merged, read2_merged = trimmer(read1, read2)
    assert read1_merged.merged
    assert read2_merged is None
    assert len(read1_merged) == 26
    assert read1_merged.sequence == 'CCAAGCAGACATTCACTCAGATTGCA'
    assert read1_merged.qualities == ('#' * 24) + ('!' * 2)
    # errors
    # round(0.1 * 24) = 2, so 2 errors should pass but 3 should not
    read1.merged = False
    r1_seq = list(read1.sequence)
    r1_seq[10] = reverse_complement(r1_seq[10])
    r1_seq[20] = reverse_complement(r1_seq[20])
    read1.sequence = "".join(r1_seq)
    read1_merged, read2_merged = trimmer(read1, read2)
    assert read1_merged.merged
    assert read2_merged is None
    assert len(read1_merged) == 26
    assert read1_merged.sequence == 'CCAAGCAGACTTTCACTCAGTTTGCA'
    assert read1_merged.qualities == ('#' * 24) + ('!' * 2)
    # too few overlapping bases
    read1.merged = False
    r1_seq[15] = reverse_complement(r1_seq[15])
    read1.sequence = "".join(r1_seq)
    read1_merged, read2_merged = trimmer(read1, read2)
    assert read1_merged.merged is False
    assert read2 is not None
Exemple #7
0
def test_Swift_trimmer():
    trimmer = SwiftBisulfiteTrimmer()
    seq = "".join(["ACGT"] * 30)
    read1 = Sequence('read1', seq)
    read2 = Sequence('read2', seq)
    trimmed = trimmer(read1, read2)
    assert trimmed[0].sequence == seq[:-10]
    assert trimmed[1].sequence == seq[10:]
Exemple #8
0
def test_nend_trimmer():
    trimmer = NEndTrimmer()
    seqs = ['NNNNAAACCTTGGNNN', 'NNNNAAACNNNCTTGGNNN', 'NNNNNN']
    trims = ['AAACCTTGG', 'AAACNNNCTTGG', '']
    for seq, trimmed in zip(seqs, trims):
        _seq = Sequence('read1', seq, qualities='#' * len(seq))
        _trimmed = Sequence('read1', trimmed, qualities='#' * len(trimmed))
        assert trimmer(_seq) == _trimmed
Exemple #9
0
def test_quality_trimmer():
    read = Sequence('read1', 'ACGTTTACGTA', '##456789###')
    qt = QualityTrimmer(10, 10, 33)
    assert qt(read) == Sequence('read1', 'GTTTAC', '456789')
    qt = QualityTrimmer(0, 10, 33)
    assert qt(read) == Sequence('read1', 'ACGTTTAC', '##456789')
    qt = QualityTrimmer(10, 0, 33)
    assert qt(read) == Sequence('read1', 'GTTTACGTA', '456789###')
Exemple #10
0
def test_Modifiers_paired_legacy():
    m = PairedEndModifiers(paired="first")
    m.add_modifier(UnconditionalCutter, lengths=[5])
    read1 = Sequence('read1', 'ACGTTTACGTA', '##456789###')
    read2 = Sequence('read1', 'ACGTTTACGTA', '##456789###')
    mod_read1, mod_read2 = m.modify(read1, read2)
    assert mod_read1.sequence == 'TACGTA'
    assert mod_read2.sequence == 'ACGTTTACGTA'
Exemple #11
0
def test_nextseq_trim():
    s = Sequence('n', '', '')
    assert nextseq_trim_index(s, cutoff=22) == 0
    s = Sequence('n',
        'TCTCGTATGCCGTCTTATGCTTGAAAAAAAAAAGGGGGGGGGGGGGGGGGNNNNNNNNNNNGGNGG',
        'AA//EAEE//A6///E//A//EA/EEEEEEAEA//EEEEEEEEEEEEEEE###########EE#EA'
    )
    assert nextseq_trim_index(s, cutoff=22) == 33
Exemple #12
0
def test_non_directional_bisulfite_trimmer():
    trimmer = NonDirectionalBisulfiteTrimmer(rrbs=True)
    read1 = Sequence('read1', "CAATCGATCGA")
    read2 = Sequence('read2', "CTATCGATC")
    read2.match, read2.match_info = back_match(read2)
    read3 = Sequence('read3', "CTATCGATCCA")
    # assert trimmer(read1).sequence == "ATCGATC"
    assert trimmer(read2).sequence == "CTATCGA"
    assert trimmer(read3).sequence == "CTATCGATCCA"
Exemple #13
0
 def test_twoheaders(self):
     fmt = FastqFormat()
     with open_output(self.path, "w") as fw:
         fw.write(
             fmt.format(Sequence("name", "CCATA", "!#!#!", name2="name")))
         fw.write(
             fmt.format(Sequence("name2", "HELLO", "&&&!&", name2="name2")))
     with open(self.path) as t:
         assert t.read(
         ) == '@name\nCCATA\n+name\n!#!#!\n@name2\nHELLO\n+name2\n&&&!&\n'
Exemple #14
0
def test_Modifiers_paired_both():
    m = PairedEndModifiers(paired="both")
    m.add_modifier(UnconditionalCutter, read=1 | 2, lengths=[5])
    mod1 = m.get_modifiers(read=1)
    mod2 = m.get_modifiers(read=2)
    assert len(mod1) == 1
    assert len(mod2) == 1
    assert isinstance(mod1[0], UnconditionalCutter)
    assert isinstance(mod2[0], UnconditionalCutter)
    read1 = Sequence('read1', 'ACGTTTACGTA', '##456789###')
    read2 = Sequence('read1', 'ACGTTTACGTA', '##456789###')
    mod_read1, mod_read2 = m.modify(read1, read2)
    assert mod_read1.sequence == 'TACGTA'
    assert mod_read2.sequence == 'TACGTA'
Exemple #15
0
def test_overlapping_with_error_correction():
    trimmer = MergeOverlapping(min_overlap=10,
                               error_rate=0.1,
                               mismatch_action='liberal')
    r1 = 'AGATCGGAAGACCGTCATGTAGGGAAAGAGTGTAGATCTC'
    q1 = 'FFFFFFFFFFF#FFFFFFFFFFFFFFFFFFFFF#######'
    r2 = reverse_complement('AGATCGGTAGAGCGTCGTGTAGGGAAATAGTGTAGATCTC')
    q2 = ''.join(reversed('FFFFFFFFFFFFFFFF#FFFFFFFFFF#FFFFFFFFFFFF'))
    read1 = Sequence('foo', r1, q1)
    read2 = Sequence('foo', r2, q2)
    read1_merged, read2_merged = trimmer(read1, read2)
    assert read1_merged.merged
    assert read2_merged is None
    assert read1_merged.sequence == 'AGATCGGTAGAGCGTCATGTAGGGAAAGAGTGTAGATCTC'
    assert read1_merged.qualities == 'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF#######'
Exemple #16
0
 def test(self):
     reads = [(Sequence('A/1 comment', 'TTA',
                        '##H'), Sequence('A/2 comment', 'GCT', 'HH#')),
              (Sequence('B/1', 'CC', 'HH'), Sequence('B/2', 'TG', '#H'))]
     fmt = InterleavedFormatter(FastqFormat(), "foo")
     result = defaultdict(lambda: [])
     for read1, read2 in reads:
         fmt.format(result, read1, read2)
     assert fmt.written == 2
     assert fmt.read1_bp == 5
     assert fmt.read2_bp == 5
     assert "foo" in result
     assert "".join(
         result["foo"]
     ) == '@A/1 comment\nTTA\n+\n##H\n@A/2 comment\nGCT\n+\nHH#\n@B/1\nCC\n+\nHH\n@B/2\nTG\n+\n#H\n'
Exemple #17
0
def test_min_cutter_T_T():
    unconditional_before = UnconditionalCutter((2, -2))
    unconditional_after = UnconditionalCutter((1, -1))
    min_trimmer = MinCutter((5, -5), True, True)
    read1 = Sequence('read1', "CAATCGATCGAACGTACCGAT")
    assert read1.clipped == [0, 0, 0, 0], str(read1.clipped)
    read1 = unconditional_before(read1)
    assert read1.sequence == "ATCGATCGAACGTACCG"
    assert read1.clipped == [2, 2, 0, 0], str(read1.clipped)
    # test without adapter trimming
    assert min_trimmer(read1).sequence == "ATCGATCGAACGTACCG"
    # test with adapter trimming
    read2 = read1[:]
    read2.sequence = "ATCGAACGTACCG"
    read2.match, read2.match_info = front_match(read2)
    read3 = min_trimmer(read2)
    assert read3.sequence == "TCGAACGTACCG", read3.sequence
    assert read3.clipped == [2, 2, 1, 0]
    # test with subsequent clipping
    read4 = unconditional_after(read2)
    assert read4.sequence == "TCGAACGTACC", read4.sequence
    assert read4.clipped == [2, 2, 1, 1], read4.clipped
    read5 = min_trimmer(read4)
    assert read5.sequence == "TCGAACGTACC", read5.sequence
    assert read5.clipped == [2, 2, 1, 1], read5.clipped
def test_linked_adapter():
    linked_adapter = LinkedAdapter('AAAA', 'TTTT')
    sequence = Sequence(name='seq', sequence='AAAACCCCCTTTT')
    match = linked_adapter.match_to(sequence)
    trimmed = linked_adapter.trimmed(match)
    assert trimmed.name == 'seq'
    assert trimmed.sequence == 'CCCCC'
Exemple #19
0
def test_unconditional_cutter():
    uc = UnconditionalCutter(lengths=[5])
    s = Sequence("read1", 'abcdefg')
    assert UnconditionalCutter(lengths=[2])(s).sequence == 'cdefg'
    assert UnconditionalCutter(lengths=[-2])(s).sequence == 'abcde'
    assert UnconditionalCutter(lengths=[100])(s).sequence == ''
    assert UnconditionalCutter(lengths=[-100])(s).sequence == ''
def test_issue_80():
    # This issue turned out to not be an actual issue with the alignment
    # algorithm. The following alignment is found because it has more matches
    # than the 'obvious' one:
    #
    # TCGTATGCCGTCTTC
    # =========X==XX=
    # TCGTATGCCCTC--C
    #
    # This is correct, albeit a little surprising, since an alignment without
    # indels would have only two errors.

    adapter = Adapter(
        sequence="TCGTATGCCGTCTTC",
        where=BACK,
        max_error_rate=0.2,
        min_overlap=3,
        read_wildcards=False,
        adapter_wildcards=False)
    read = Sequence(name="seq2", sequence="TCGTATGCCCTCC")
    result = adapter.match_to(read)
    assert read.original_length == 13, result
    assert result.errors == 3, result
    assert result.astart == 0, result
    assert result.astop == 15, result
Exemple #21
0
def test_ncontentfilter_paired():
    params = [
        ('AAA', 'AAA', 0, KEEP),
        ('AAAN', 'AAA', 0, DISCARD),
        ('AAA', 'AANA', 0, DISCARD),
        ('ANAA', 'AANA', 1, KEEP),
    ]
    for seq1, seq2, count, expected in params:
        filter = NContentFilter(count=count)
        filter_legacy = SingleWrapper(filter)
        filter_both = PairedWrapper(filter)
        read1 = Sequence('read1', seq1, qualities='#' * len(seq1))
        read2 = Sequence('read1', seq2, qualities='#' * len(seq2))
        assert filter_legacy(read1, read2) == filter(read1)
        # discard entire pair if one of the reads fulfills criteria
        assert filter_both(read1, read2) == expected
Exemple #22
0
    def test(self):
        expected = [(Sequence('read1/1 some text', 'TTATTTGTCTCCAGC',
                              '##HHHHHHHHHHHHH'),
                     Sequence('read1/2 other text', 'GCTGGAGACAAATAA',
                              'HHHHHHHHHHHHHHH')),
                    (Sequence('read3/1', 'CCAACTTGATATTAATAACA',
                              'HHHHHHHHHHHHHHHHHHHH'),
                     Sequence('read3/2', 'TGTTATTAATATCAAGTTGG',
                              '#HHHHHHHHHHHHHHHHHHH'))]
        reads = list(InterleavedSequenceReader("tests/cut/interleaved.fastq"))
        for (r1, r2), (e1, e2) in zip(reads, expected):
            print(r1, r2, e1, e2)

        assert reads == expected
        with openseq("tests/cut/interleaved.fastq", interleaved=True) as f:
            reads = list(f)
        assert reads == expected
Exemple #23
0
def test_ncontentfilter():
    # third parameter is True if read should be discarded
    params = [('AAA', 0, KEEP), ('AAA', 1, KEEP), ('AAACCTTGGN', 1, KEEP),
              ('AAACNNNCTTGGN', 0.5, KEEP), ('NNNNNN', 1, DISCARD),
              ('ANAAAA', 1 / 6, KEEP), ('ANAAAA', 0, DISCARD)]
    for seq, count, expected in params:
        filter = NContentFilter(count=count)
        _seq = Sequence('read1', seq, qualities='#' * len(seq))
        assert filter(_seq) == expected
Exemple #24
0
def test_min_cutter_T_F():
    unconditional_before = UnconditionalCutter((2, -2))
    min_trimmer = MinCutter((4, -4), True, False)
    read1 = Sequence('read1', "CAATCGATCGAACGTACCGAT")
    read1 = unconditional_before(read1)
    assert read1.sequence == "ATCGATCGAACGTACCG"
    assert read1.clipped == [2, 2, 0, 0]
    # test without adapter trimming
    assert min_trimmer(read1).sequence == "CGATCGAACGTAC"
Exemple #25
0
def test_statistics():
    read = Sequence('name', 'AAAACCCCAAAA')
    adapters = [Adapter('CCCC', BACK, 0.1)]
    cutter = AdapterCutter(adapters, times=3)
    trimmed_read = cutter(read)
    # TODO make this a lot simpler
    trimmed_bp = 0
    for adapter in adapters:
        for d in (adapter.lengths_front, adapter.lengths_back):
            trimmed_bp += sum(seqlen * count for (seqlen, count) in d.items())
    assert trimmed_bp <= len(read), trimmed_bp
def test_issue_52():
    adapter = Adapter(
        sequence='GAACTCCAGTCACNNNNN',
        where=BACK,
        max_error_rate=0.12,
        min_overlap=5,
        read_wildcards=False,
        adapter_wildcards=True)
    read = Sequence(name="abc", sequence='CCCCAGAACTACAGTCCCGGC')
    am = Match(astart=0, astop=17, rstart=5, rstop=21, matches=15, errors=2, front=None, adapter=adapter, read=read)
    assert am.wildcards() == 'GGC'
    """
Exemple #27
0
def test_Modifiers_single():
    m = SingleEndModifiers()
    m.add_modifier(UnconditionalCutter, lengths=[5])
    mod1 = m.get_modifiers(read=1)
    mod2 = m.get_modifiers(read=2)
    assert len(mod1) == 1
    assert isinstance(mod1[0], UnconditionalCutter)
    assert len(mod2) == 0
    # test single-end
    read = Sequence('read1', 'ACGTTTACGTA', '##456789###')
    mod_read = m.modify(read)
    assert len(mod_read) == 1
    assert mod_read[0].sequence == 'TACGTA'
def test_str():
    a = Adapter('ACGT', where=BACK, max_error_rate=0.1)
    str(a)
    str(a.match_to(Sequence(name='seq', sequence='TTACGT')))
    ca = ColorspaceAdapter('0123', where=BACK, max_error_rate=0.1)
    str(ca)
Exemple #29
0
 def test_too_many_qualities(self):
     with raises(FormatError):
         Sequence(name="name", sequence="ACGT", qualities="#####")
Exemple #30
0
def test_TruSeq_trimmer():
    trimmer = TruSeqBisulfiteTrimmer()
    read1 = Sequence('read1', "CTATCGATCCACGAGACTAAC")
    assert trimmer(read1).sequence == "ATCCACGAGACTAAC"