Example #1
0
def check_kmer_freq_in_read_pair(read1, read2, minkmer, debugstream=None):
    """
    Check interesting k-mer frequence in each read.

    When calculating offset between a pair of reads, do not use any interesting
    k-mers that occur multiple times in either read.
    """
    maxkmer = kevlar.revcom(minkmer)
    matches1 = [
        k for k in read1.ikmers
        if kevlar.same_seq(k.sequence, minkmer, maxkmer)
    ]
    matches2 = [
        k for k in read2.ikmers
        if kevlar.same_seq(k.sequence, minkmer, maxkmer)
    ]
    nmatches1 = len(matches1)
    nmatches2 = len(matches2)
    assert nmatches1 > 0 and nmatches1 > 0, (nmatches1, nmatches2)
    if nmatches1 > 1 or nmatches2 > 1:
        if debugstream:
            message = (
                'stubbornly refusing to calculate offset bewteen {:s} and '
                '{:s}; interesting k-mer {:s} occurs multiple times'.format(
                    read1.name, read2.name, minkmer))
            print('[kevlar::overlap] INFO', message, file=debugstream)
        return None, None

    kmer1 = matches1[0]
    kmer2 = matches2[0]
    return kmer1, kmer2
Example #2
0
def test_allocate_sketch_graphy(count, smallcount):
    sequence = 'AATCAACGCTTCTTAATAGGCATAGTGTCTCTGCTGCGCATGGACGTGCCATAGCCACTACT'
    kmer = 'GCATAGTGTCTCTGCTGCGCA'

    sketch = kevlar.sketch.allocate(21, 1e4, 4, count, True, smallcount)
    sketch.consume(sequence)
    sketch.get(kmer) == 1
    kmer_hash = sketch.hash(kmer)
    assert kevlar.same_seq(sketch.reverse_hash(kmer_hash), kmer)
Example #3
0
def test_pico(picorecord1, picorecord2, picorecord3):
    pair1 = ReadPair(picorecord1, picorecord2, 'TTTTTTGTTTCCCAAAGTAAGGCTG')
    assert pair1.offset == 59
    assert pair1.head.read.name == 'seq1_901428_901847_3:0:0_0:0:0_87d/1'
    print(pair1.mergedseq)

    pair2 = ReadPair(picorecord1, picorecord3, 'TTTTTTGTTTCCCAAAGTAAGGCTG')
    assert pair2.offset == 59
    assert pair2.head.read.name == 'seq1_901428_901847_3:0:0_0:0:0_87d/1'
    print(pair2.mergedseq)

    assert kevlar.same_seq(pair1.mergedseq, pair2.mergedseq)
Example #4
0
def test_pico_offset(picorecord1, picorecord2, picorecord3):
    pair = kevlar.overlap.calc_offset(picorecord1, picorecord2,
                                      'TTTTTTGTTTCCCAAAGTAAGGCTG')
    assert pair.offset == 59
    assert pair.head.name == 'seq1_901350_901788_1:0:0_0:0:0_21ca1/2'
    assert pair.swapped is False
    contig = kevlar.assemble.merge_pair(pair)
    print(contig)

    pair = kevlar.overlap.calc_offset(picorecord1, picorecord3,
                                      'TTTTTTGTTTCCCAAAGTAAGGCTG')
    assert pair.offset == 59
    assert pair.head.name == 'seq1_901350_901788_1:0:0_0:0:0_21ca1/2'
    assert pair.swapped is True
    newcontig = kevlar.assemble.merge_pair(pair)
    print(newcontig)
    assert kevlar.same_seq(contig, newcontig)
Example #5
0
def test_pico_contains(picorecord3, picorecord4):
    pair = ReadPair(picorecord3, picorecord4, 'CACTCAGCCTTACTTTGGGAAACAA')
    print(pair.mergedseq)
    assert kevlar.same_seq(pair.mergedseq, picorecord3.sequence)