Esempio n. 1
0
def test_merge_targets():
    """Identify cluster for merging"""
    uid1 = "ACCT"
    uid2 = "GGGG"
    uid3 = "AAGG"

    seq1 = ["ACTGTTTGTCTAAGC"] * 2
    qual1 = ['I' * len(seq1[0])] * len(seq1)
    seq2 = ["ACTGTTTTTCTAAGC"] * 5
    qual2 = ['I' * len(seq2[0])] * len(seq2)
    seq3 = ["ACTGTTTTTCTAAGC"] * 2
    qual3 = ['I' * len(seq3[0])] * len(seq3)

    clusters = create_consensus([uid1 + uid1]*len(seq1) + \
                                   [uid2 + uid2]*len(seq2),
                                ['I'*(len(uid1)*2)]*(len(seq1) + len(seq2)),
                                seq1 + seq2, qual1 + qual2)
    seq3 = [
        pseq.SequenceWithQuality(seq, qual) for seq, qual in zip(seq3, qual3)
    ]
    uid = pseq.SequenceWithQuality(uid2 + uid3, 'I' * (len(uid2) + len(uid3)))
    cand = clusters.merge_target(uid, seq3[0], {}, 2)
    assert cand == uid2 + uid2, "%r != %r" % (cand, uid2 + uid2)
    cand = clusters.merge_target(uid, seq3[0], {}, 1)
    assert cand is None, "%r != %r" % (cand, None)
Esempio n. 2
0
def test_merge_diff(idx):
    """Propagate diffs when merging clusters"""
    uid1 = "ACCT"
    uid2 = "ACTT"

    seq1 = ["ACTGTTTGTCTAAGC"] * 2
    qual1 = ['I' * len(seq1[0])] * len(seq1)
    seq2 = ["ACTGTTTTTCTAAGC"] * 5
    qual2 = ['I' * len(seq2[0])] * len(seq2)
    seq3 = ["ACTGTTTTTCTAAGC"] * 2
    qual3 = ['I' * len(seq3[0])] * len(seq3)
    seq4 = ["ACTGTTTGTGTAAGC", "ACTGTTTGTGTAAGC", "ACTGTTTGTATAAGC"]
    qual4 = ['I' * len(seq4[0])] * len(seq4)

    clusters = create_consensus([uid1 + uid2]*len(seq1) + \
                                   [uid2 + uid1]*len(seq2) + \
                                   [uid2 + uid2]*len(seq3) + \
                                   [uid1 + uid1]*len(seq4),
                                ['I'*(len(uid1)*2)]*(len(seq1) + len(seq2) + \
                                   len(seq3) + len(seq4)),
                                seq1 + seq2 + seq3 + seq4,
                                qual1 + qual2 + qual3 + qual4)
    ids = [uid1 + uid2, uid2 + uid1, uid2 + uid2, uid1 + uid1]
    centres = [clusters[ids[i]] for i in idx]
    merged = centres[0]
    for i in range(1, len(clusters)):
        success = merged.merge(centres[i], 2, max_dist=0.5)
        assert success
    expect = "+8G4T7 10C9G2"
    obs = str(merged).splitlines()
    assert merged.size == 11, "%r != %r" % (merged.size, 11)
    assert obs[2] == expect, "%r != %r" % (obs[2], expect)
Esempio n. 3
0
def test_keys():
    """Retrieve cluster IDs"""
    uid1 = "ACCT"

    seq1 = ["ACTGTTTGTCTAAGC"] * 2
    qual1 = ['I' * len(seq1[0])] * len(seq1)

    clusters = create_consensus([uid1 + uid1] * len(seq1),
                                ['I' * (len(uid1) * 2)] * len(seq1), seq1,
                                qual1)
    assert list(clusters.keys()) == [
        'ACCTACCT'
    ], "%r != %r" % (list(clusters.keys()), ['ACCTACCT'])
Esempio n. 4
0
def test_values():
    """Retrieve cluster consensus"""
    uid1 = "ACCT"

    seq1 = ["ACTGTTTGTCTAAGC"] * 2
    qual1 = ['I' * len(seq1[0])] * len(seq1)

    clusters = create_consensus([uid1 + uid1] * len(seq1),
                                ['I' * (len(uid1) * 2)] * len(seq1), seq1,
                                qual1)
    values = list(clusters.values())
    assert len(values) == 1, "%r != %r" % (len(values), 1)
    assert  values[0].sequence.sequence == "ACTGTTTGTCTAAGC", "%r != %r" % \
                                 (values[0].sequence.sequence, ['ACCTACCT'])
Esempio n. 5
0
def test_iteritems():
    """Retrieve cluster IDs"""
    uid1 = "ACCT"

    seq1 = ["ACTGTTTGTCTAAGC"] * 2
    qual1 = ['I' * len(seq1[0])] * len(seq1)

    clusters = create_consensus([uid1 + uid1] * len(seq1),
                                ['I' * (len(uid1) * 2)] * len(seq1), seq1,
                                qual1)
    expected = [('ACCTACCT', "ACTGTTTGTCTAAGC")]
    for (obs, expect) in zip(clusters.items(), expected):
        assert obs[0] == expect[0], "%r != %r" % (obs[0], expect[0])
        assert obs[1].sequence.sequence == expect[1], "%r != %r" % \
                                 (obs[1].sequence.sequence, expect[1])
Esempio n. 6
0
def test_write():
    """Write fastq output"""
    uid1 = "ACCT"

    seq1 = ["ACTGTTTGTCTAAGC"] * 2
    qual1 = ['I' * len(seq1[0])] * len(seq1)

    clusters = create_consensus([uid1 + uid1] * len(seq1),
                                ['I' * (len(uid1) * 2)] * len(seq1), seq1,
                                qual1)
    clusters.write(TMP + "write.fastq")
    expect = str(clusters['ACCTACCT']).split("\n")
    with open(TMP + "write.fastq") as output:
        for (i, (out_line, expect_line)) in enumerate(zip(output, expect), 1):
            out_line = out_line.strip("\n")
            assert out_line == expect_line, "Error in line %r of fastq record:\n%r\n  !=\n%r" % \
                    (i, out_line, expect_line)