Exemple #1
0
def test_merge_fail_uid():
    """Don't merge sequences with very different UIDs'"""
    id1 = sequence.SequenceWithQuality("AAAA", "IIII")
    id2 = sequence.SequenceWithQuality("CCAA", "IIII")
    seq = sequence.SequenceWithQuality("ACTGTTTGTCTAAGC", "IIIDIIIIIIIIIII")
    cons1 = cons.Consensus(id1, seq)
    cons2 = cons.Consensus(id2, seq)
    merged = cons1.merge(cons2, 1)
    assert not merged, "Merging succeeded unecpectedly"
Exemple #2
0
def test_merge_size():
    """Update size of merged clusters"""
    id1 = sequence.SequenceWithQuality("AAAA", "IIII")
    id2 = sequence.SequenceWithQuality("AACA", "IIII")
    seq = sequence.SequenceWithQuality("ACTGTTTGTCTAAGC", "IIIDIIIIIIIIIII")
    cons1 = cons.Consensus(id1, seq)
    cons1.update(id1, seq)
    cons2 = cons.Consensus(id2, seq)
    cons2.update(id2, seq)
    merged = cons1.merge(cons2, 1)
    assert merged, "Merging failed unexpectedly"
    assert cons1.size == 4, "Incorrect size for merged cluster (%d != %d)" % (cons1.size, 4)
Exemple #3
0
def test_merge_simple():
    """Combine two consensus sequences"""
    id1 = sequence.SequenceWithQuality("AAAA", "IIII")
    id2 = sequence.SequenceWithQuality("AACA", "IIII")
    seq = sequence.SequenceWithQuality("ACTGTTTGTCTAAGC", "IIIDIIIIIIIIIII")
    cons1 = cons.Consensus(id1, seq)
    cons2 = cons.Consensus(id2, seq)
    merged = cons1.merge(cons2, 1)
    assert merged, "Merging failed unexpectedly"
    assert cons1.size == 2, "Incorrect size for merged cluster (%d != %d)" % (cons1.size, 2)
    assert cons1.sequence.sequence == seq.sequence, "Incorrect merged sequence (%r != %r)" % \
                                           (cons1.sequence.sequence, seq.sequence)
Exemple #4
0
def test_consensus_diff():
    """Update sequence diff"""
    suffix = 'A'*45
    id1 = sequence.SequenceWithQuality("AAAA", "IIII")
    seq1 = sequence.SequenceWithQuality("ACTGTTTGTCTAAGC"+suffix, "IIIDIIIIIIIIIII"*4)
    seq2 = sequence.SequenceWithQuality("ACTTTTTGTCTTAGC"+suffix, "IIIIIIIIIDIDIII"*4)
    seq3 = sequence.SequenceWithQuality("ACTTTTTGTGTTAGC"+suffix, "IIIIIIIIIqIDIII"*4)
    consensus = cons.Consensus(id1, seq2)
    success = consensus.update(id1, seq1)

    assert success, "Sequence %r was rejected" % seq1
    success = consensus.update(id1, seq3)

    seq_expect = "ACTTTTTGTGTAAGC"+suffix
    qual_expect = "IIIIIIIIIqIIIII"*4
    diff_expect = {3:{'T':2, 'G':1},
                   11:{'A':1, 'T':2},
                   9:{'C':2, 'G':1}}
    assert success, "Sequence %r was rejected" % seq3
    assert consensus.sequence.sequence == seq_expect, \
           "Failed to update consensus (%s != %s)" % (consensus.sequence.sequence, seq_expect)
    assert consensus.sequence.quality == qual_expect, \
           "Failed to update qualities (%s != %s)" % (consensus.sequence.quality, qual_expect)
    assert consensus.diffs == diff_expect, \
           "Incorrect sequence diff (%r != %r)" % (consensus.diffs, diff_expect)
Exemple #5
0
    def merge_target(self, uid, read_seq, id_map, threshold):
        """Compute set of candidate clusters for a given read.

        Args:
            uid (:obj:`pyrates.sequence.SequenceWithQuality`): UID sequence.
            read_seq (:obj:`pyrates.sequence.SequenceWithQuality`): Read sequence.
            id_map (:obj:`dictionary`): A mapping of known approximate matches for UIDs.
            threshold (:obj:`int`): Maximum number of differences allowed between UIDs.

        Returns:
            :obj:`string`: Either the best approximate match for the UID or `None`
                if no valid match was found.
        """
        nameid = uid.sequence
        id_cands = self._store.search(nameid, max_hits=100, raw=True)
        id_cands = self._filter(nameid, id_cands, read_seq, threshold)
        if id_cands:
            similar_id = min(id_cands, key=lambda x: x[1])
            similar_id = similar_id[0]
        else:
            similar_id = None
        ## Create new cluster or merge with existing consensus
        if similar_id is None:
            self.clusters[nameid] = cons.Consensus(uid, read_seq)
            self._store.add(nameid)
        else:
            id_map[nameid] = similar_id
        return similar_id
Exemple #6
0
def create_consensus(uids, uid_qual, seqs, seq_qual):
    """Create consensus dictionary from raw sequences.

    Args:
        uids (:obj:`list`): UID sequences.
        seqs (:obj:`list`): Read sequences.

    Returns:
        :obj:`dict`: Consensus sequences.
    """
    uid_with_qual = [
        sequence.SequenceWithQuality(seq, qual)
        for seq, qual in zip(uids, uid_qual)
    ]
    seq_with_qual = [
        sequence.SequenceWithQuality(seq, qual)
        for seq, qual in zip(seqs, seq_qual)
    ]
    cluster = {}
    for uid, seq in zip(uid_with_qual, seq_with_qual):
        if uid.sequence not in cluster:
            cluster[uid.sequence] = cons.Consensus(uid, seq)
        else:
            cluster[uid.sequence].update(uid, seq)
    return clust.Clustering(cluster)
Exemple #7
0
def test_consensus_new():
    """Create objects of class Consensus"""
    seq = sequence.SequenceWithQuality("ACTGTTTGTCTAAGC", "IIIDIIIIIIIIIII")
    id_seq = sequence.SequenceWithQuality("AAA", "III")
    consensus = cons.Consensus(id_seq, seq)
    assert consensus.sequence == seq
    assert consensus.uid == id_seq
    assert consensus.size == 1
Exemple #8
0
def test_update_uid(qual1, qual2, expect):
    """Retain highest quality"""
    id1 = sequence.SequenceWithQuality("A"*len(qual1), qual1)
    id2 = sequence.SequenceWithQuality("A"*len(qual2), qual2)
    seq = sequence.SequenceWithQuality("A"*20, "I"*20)
    consensus = cons.Consensus(id1, seq)
    consensus._update_uid(id2)
    assert consensus.uid.quality == expect, \
           "Failed to retain high quality sequence (%r != %r)" % (consensus.uid.quality, expect)
Exemple #9
0
def test_consensus_skip():
    """Reject sequences that are too different"""
    uid = sequence.SequenceWithQuality("AAA", "III")
    seq1 = sequence.SequenceWithQuality("ACTGTTTGTCTAAGC", "IIIDIIIIIIIIIII")
    seq2 = sequence.SequenceWithQuality("TTCTCCCTGGTAAGC", "IIIDIIIIIIIIIII")
    consensus = cons.Consensus(uid, seq1)
    success = consensus.update(uid, seq2)
    assert not success
    assert consensus.sequence == seq1, "%r != %r" % (consensus.sequence, seq1)
    assert consensus.different == 1, "Skipped sequence not counted"
Exemple #10
0
def test_consensus_idlen():
    """Skip sequences with incompatible IDs"""
    id1 = sequence.SequenceWithQuality("AAAA", "IIII")
    id2 = sequence.SequenceWithQuality("AAAAA", "IIIII")
    seq = sequence.SequenceWithQuality("ACTGTTTGTCTAAGC", "IIIDIIIIIIIIIII")

    consensus = cons.Consensus(id1, seq)
    success = consensus.update(id2, seq)
    assert not success
    assert consensus.uid == id1, "%r != %r" % (consensus.uid, id1)
Exemple #11
0
    def add(self, uid, sequence):
        """Add a new cluster centre.

        Args:
            uid (:obj:`pyrates.sequence.SequenceWithQuality`): UID for the new cluster.
            sequence (:obj:`pyrates.sequence.SequenceWithQuality`): Sequence to represent cluster.
        """
        nameid = uid.sequence
        self.clusters[nameid] = cons.Consensus(uid, sequence)
        self._store.add(nameid)
Exemple #12
0
def test_consensus_seqlen():
    """Skip shorter sequences"""
    id1 = sequence.SequenceWithQuality("AAAA", "IIII")
    seq1 = sequence.SequenceWithQuality("AACTGTGAGTGTAGATGTTCTGTA", "I"*24)
    seq2 = sequence.SequenceWithQuality("AACTGTGAGTGTAGATGTTC", "I"*20)
    consensus = cons.Consensus(id1, seq1)
    success = consensus.update(id1, seq2)
    assert not success
    assert consensus.sequence == seq1, "%r != %r" % (consensus.sequence, seq1)
    assert consensus.shorter == 1, "Skipped sequence not recorded"

    consensus = cons.Consensus(id1, seq2)
    success = consensus.update(id1, seq1)
    assert not success
    assert consensus.sequence == seq1, "%r != %r" % (consensus.sequence, seq1)
    assert consensus.shorter == 1, "Skipped sequence not recorded"

    consensus = cons.Consensus(id1, seq2)
    success = consensus.update(id1, seq2)
    assert success
    success = consensus.update(id1, seq1)
    assert not success
    assert consensus.sequence == seq2, "%r != %r" % (consensus.sequence, seq2)
    assert consensus.longer == 1, "Skipped sequence not recorded"
Exemple #13
0
def test_consensus_str():
    """String representation of consensus sequences"""
    suffix = 'A'*45
    id1 = sequence.SequenceWithQuality("AAAA", "IIII")
    seq1 = sequence.SequenceWithQuality("ACTGTTTGTCTAAGC"+suffix, "IIIDIIIIIIIIIII"*4, name='test')
    seq2 = sequence.SequenceWithQuality("ACTTTTTGTCTTAGC"+suffix, "IIIIIIIIIDIDIII"*4, name='test')
    consensus = cons.Consensus(id1, seq1)
    expect_str1 = "@test:AAAA:IIII:1:0:0:0\nACTGTTTGTCTAAGC"+suffix+"\n+\n"+"IIIDIIIIIIIIIII"*4
    expect_repr1 = "Consensus(uid=SequenceWithQuality(sequence='AAAA', " + \
                                                     "quality='IIII', name=''), " + \
                   "sequence=SequenceWithQuality(sequence='ACTGTTTGTCTAAGC" + suffix +"', " + \
                                                "quality='" + 'IIIDIIIIIIIIIII'*4 + \
                                                "', name='test'), " + \
                   "diffs={}, size=1)"
    expect_str2 = "@test:AAAA:IIII:2:0:0:0\nACTTTTTGTCTAAGC" + suffix + \
                  "\n+4G1T1 12A1T1\n" + "IIIIIIIIIIIIIII"*4

    assert str(consensus) == expect_str1, "\n%s\n!=\n%s" % (consensus, expect_str1)
    assert repr(consensus) == expect_repr1, "\n%r\n!=\n%r" % (consensus, expect_repr1)
    consensus.update(id1, seq2)
    assert str(consensus) == expect_str2, "\n%s\n!=\n%s" % (str(consensus), expect_str2)