Esempio n. 1
0
def test_augfastx_writer():
    output = StringIO()
    record = Record(
        name='BasiliscusVulgarisRead84467/1',
        sequence='TTAACTCTAGATTAGGGGCGTGACTTAATAAGGTGTGGGCCTAAGCGTCT',
        quality='BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB',
        annotations=[
            KmerOfInterest(ksize=19, offset=13, abund=(12, 1, 1)),
            KmerOfInterest(ksize=19, offset=15, abund=(20, 0, 1)),
        ],
    )
    kevlar.print_augmented_fastx(record, output)
    record = Record(
        name='BasiliscusVulgarisRead90577/2',
        sequence='CTGTAATCCCAGCACTTTGGGAGGCCGAGGCAAGCAGATGATGCGGTCAG',
        quality='BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB',
        annotations=[
            KmerOfInterest(ksize=19, offset=1, abund=(5, 7, 9)),
            KmerOfInterest(ksize=19, offset=2, abund=(7, 10, 9)),
        ],
        mates=['CAGATGTGTCTTGTGGGCAGTGCAGCGGAGAGGTGCAAATATGGGTTTGG']
    )
    kevlar.print_augmented_fastx(record, output)
    record = Record(
        name='BasiliscusVulgarisRead99037/1',
        sequence='AGCACTTTGGGAGGCCGAGGCAAGCAGATGATGCGGTCAGGATTACAGAT',
        quality='BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB'
    )
    kevlar.print_augmented_fastx(record, output)

    assert output.getvalue() == """@BasiliscusVulgarisRead84467/1
Esempio n. 2
0
def dump(bamstream,
         refrseqs=None,
         pairmode='split',
         upint=50000,
         logstream=sys.stderr):
    """
    Parse read alignments in BAM/SAM format.

    - bamstream: open file handle to the BAM/SAM file input
    - refrseqs: dictionary of reference sequences, indexed by sequence ID; if
      provided, perfect matches to the reference sequence will be discarded
    - strict: only keep paired end if it also lacks a perfect match to the
      reference genome
    - upint: update interval for progress indicator
    - logstream: file handle do which progress indicator will write output
    """
    bam = pysam.AlignmentFile(bamstream, 'rb')
    reader = bam_paired_reader(bam)
    for i, (record1, record2) in enumerate(reader, 1):
        if i % upint == 0:  # pragma: no cover
            print('...processed', i, 'pairs of records', file=logstream)
        for record in keepers(record1, record2, bam, refrseqs, pairmode):
            yield Record(name=readname(record),
                         sequence=record.seq,
                         quality=record.qual)
Esempio n. 3
0
 def revcom(self):
     seq = kevlar.revcom(self.read.sequence)
     kmerseqrc = kevlar.revcom(self.kmerseq)
     newoffset = len(seq) - self.kmer.offset - self.kmer.ksize
     kmer = KmerOfInterest(self.kmer.ksize, newoffset, self.kmer.abund)
     kdict = {self.kmerseq: kmer, kmerseqrc: kmer}
     newread = Record(self.read.name, seq, annotations=[kmer], ikmers=kdict)
     return ReadWithKmer(newread, self.kmerseq)
Esempio n. 4
0
def record5():
    return Record(
        name='read5',
        sequence='CTCTTCCGGCAGTCACTGTCAAGAGAGGGTGAACT',
        annotations=[
            KmerOfInterest(7, 15, [12, 0, 0]),
            KmerOfInterest(7, 16, [13, 0, 0]),
        ],
    )
Esempio n. 5
0
def record2a():
    return Record(
        name='read2',
        sequence='ACGCAAAGCTATTTACGCAA',
        annotations=[
            KmerOfInterest(5, 1, [15, 0, 0]),
            KmerOfInterest(5, 15, [15, 0, 0]),
        ],
    )
Esempio n. 6
0
def record6():
    return Record(
        name='read6',
        sequence='TCACTGTCAAGAGAGGCCTACGGATTCGGTTACTG',
        annotations=[
            KmerOfInterest(7, 3, [12, 0, 0]),
            KmerOfInterest(7, 4, [13, 0, 0]),
        ],
    )
Esempio n. 7
0
def record4():
    # similar to record2 but with a single nucleotide mismatch
    return Record(
        name='read4',
        sequence='ACGCAATGCTATTTAAAACC',
        annotations=[
            KmerOfInterest(5, 1, [15, 0, 0]),
            KmerOfInterest(5, 14, [19, 1, 0]),
        ],
    )
Esempio n. 8
0
def record3fix():
    # reverse complement of record2
    return Record(
        name='read3',
        sequence='GGTTTTAAATAGCTTTGCGT',
        annotations=[
            KmerOfInterest(5, 1, [19, 1, 0]),
            KmerOfInterest(5, 14, [15, 0, 0]),
        ],
    )
Esempio n. 9
0
def mutate_genome(infile, mutations):
    parser = parse_augmented_fastx(kevlar.open(infile, 'r'))
    for record in parser:
        sequence = record.sequence
        if record.name in mutations:
            mutlist = sorted(mutations[record.name],
                             key=lambda m: m.pos,
                             reverse=True)
            sequence = mutate_sequence(sequence, mutlist)
        yield Record(name=record.name, sequence=sequence)
Esempio n. 10
0
def main(args):
    contigstream = kevlar.parse_augmented_fastx(kevlar.open(args.contigs, 'r'))
    outstream = kevlar.open(args.out, 'w')
    localizer = localize(contigstream,
                         args.refr,
                         seedsize=args.seed_size,
                         delta=args.delta,
                         maxdiff=args.max_diff,
                         logstream=args.logfile)
    for cutout in localizer:
        record = Record(name=cutout.defline, sequence=cutout.sequence)
        kevlar.sequence.write_record(record, outstream)
Esempio n. 11
0
def picorecord4():
    return Record(
        name='seqname',
        sequence=('TGTTCACTCAGCCTTACTTTGGGAAACAAAAAAAAAACTAAGCTTTTGGATTACAGTTG'
                  'GAAGTGAGGTCTCAGCCTGCACAAACGAATAAATG'),
        annotations=[
            KmerOfInterest(25, 8, [17, 0, 0]),
            KmerOfInterest(25, 7, [18, 0, 0]),
            KmerOfInterest(25, 6, [18, 1, 0]),
            KmerOfInterest(25, 5, [18, 1, 0]),
            KmerOfInterest(25, 4, [19, 0, 0]),
        ],
    )
Esempio n. 12
0
def picorecord3():
    return Record(
        name='seq1_901428_901847_3:0:0_0:0:0_87d/1',
        sequence=('TATTGTTCACTCAGCCTTACTTTGGGAAACAAAAAAAAAACTAAGCTTTTGGATTACAG'
                  'TTGGAAGTGAGGTCTCAGCCTGCACAAACGAATAAATGTAA'),
        annotations=[
            KmerOfInterest(25, 11, [17, 0, 0]),
            KmerOfInterest(25, 10, [18, 0, 0]),
            KmerOfInterest(25, 9, [18, 1, 0]),
            KmerOfInterest(25, 8, [18, 1, 0]),
            KmerOfInterest(25, 7, [19, 0, 0]),
        ],
    )
Esempio n. 13
0
def picorecord2():
    return Record(
        name='seq1_901428_901847_3:0:0_0:0:0_87d/1',
        sequence=('TTACATTTATTCGTTTGTGCAGGCTGAGACCTCACTTCCAACTGTAATCCAAAAGCTTA'
                  'GTTTTTTTTTTGTTTCCCAAAGTAAGGCTGAGTGAACAATA'),
        annotations=[
            KmerOfInterest(25, 64, [19, 0, 0]),
            KmerOfInterest(25, 65, [18, 1, 0]),
            KmerOfInterest(25, 66, [18, 1, 0]),
            KmerOfInterest(25, 67, [18, 0, 0]),
            KmerOfInterest(25, 68, [17, 0, 0]),
        ],
    )
Esempio n. 14
0
def picorecord1():
    return Record(
        name='seq1_901350_901788_1:0:0_0:0:0_21ca1/2',
        sequence=('GTTTTTTTTTTGTTTCCCAAAGTAAGGCTGAGTGAACAATATTTTCTCATAGTTTTGAC'
                  'AAAAACAAAGGAATCCTTAGTTATTAAACTCGGGAGTTTGA'),
        annotations=[
            KmerOfInterest(25, 5, [19, 0, 0]),
            KmerOfInterest(25, 6, [18, 1, 0]),
            KmerOfInterest(25, 7, [18, 1, 0]),
            KmerOfInterest(25, 8, [18, 0, 0]),
            KmerOfInterest(25, 9, [17, 0, 0]),
        ],
    )
Esempio n. 15
0
def test_align_mates():
    mate_seqs = kevlar.open(data_file('minitrio/novel-mates.fastq.gz'), 'r')
    record = Record(
        name='bogusread',
        sequence='NNNNN',
        mates=[r.sequence for r in kevlar.parse_augmented_fastx(mate_seqs)]
    )
    refrfile = data_file('minitrio/refr.fa')
    kevlar.reference.autoindex(refrfile)
    positions = list(kevlar.call.align_mates(record, refrfile))
    seqids = set([seqid for seqid, start, end in positions])
    coords = sorted([(start, end) for seqid, start, end in positions])
    print('DEBUG', coords, file=sys.stderr)
    assert seqids == set(['seq1'])
    assert coords == [
        (45332, 45432), (45377, 45477), (45393, 45493), (45428, 45528),
        (45440, 45540), (45447, 45547), (46092, 46192), (46093, 46193),
        (46099, 46199), (46127, 46227), (46131, 46231), (46146, 46246),
        (46148, 46248), (48025, 48125), (48035, 48135),
    ]
Esempio n. 16
0
def record1fix():
    return Record(
        name='read1',
        sequence='GCTGCACCGATGTACGCAAA',
        annotations=[KmerOfInterest(5, 14, [15, 0, 0])],
    )