def test_parse_single_partition(): infile = data_file('part-reads-simple.fa') readstream = kevlar.parse_augmented_fastx(kevlar.open(infile, 'r')) partitions = list(kevlar.parse_single_partition(readstream, '1')) partitions = [part for partid, part in partitions] assert len(partitions) == 1 assert len(partitions[0]) == 4 readstream = kevlar.parse_augmented_fastx(kevlar.open(infile, 'r')) partitions = list(kevlar.parse_single_partition(readstream, '2')) partitions = [part for partid, part in partitions] assert len(partitions) == 1 assert len(partitions[0]) == 2 readstream = kevlar.parse_augmented_fastx(kevlar.open(infile, 'r')) partitions = list(kevlar.parse_single_partition(readstream, 'alFrED')) partitions = [part for partid, part in partitions] assert partitions == []
def test_sim_var_geno(): seqstream = kevlar.open(data_file('100kbx3.fa.gz'), 'r') sequences = kevlar.seqio.parse_seq_dict(seqstream) simulator = kevlar.gentrio.simulate_variant_genotypes(sequences, ninh=2, ndenovo=2, rng=112358 ^ 853211) variants = list(simulator) seqids = [v.seqid for v in variants] positions = [v.position for v in variants] genotypes = [v.genotypes for v in variants] print('DEBUG', seqids, positions, genotypes) assert len(variants) == 4 assert seqids == ['scaf3', 'scaf3', 'scaf1', 'scaf2'] assert positions == [4936, 57391, 67028, 88584] assert genotypes == [('0/1', '0/1', '1/0'), ('1/1', '1/1', '1/1'), ('1/0', '0/0', '0/0'), ('0/1', '0/0', '0/0')]
def test_load_reads_and_kmers(): """Make sure augmented records are loaded correctly.""" instream = open(data_file('var1.reads.augfastq'), 'r') reads, kmers = load_reads_and_kmers(instream, logstream=None) assert len(reads) == 10 assert len(kmers) == 7 readname = 'read8f start=8,mutations=0' assert reads[readname].sequence == ('CACTGTCCTTACAGGTGGATAGTCGCTTTGTAATAAA' 'AGAGTTACACCCCGGTTTTTAGAAGTCTCGACTTTAA' 'GGAAGTGGGCCTACGGCGGAAGCCGT') testset = set([ 'read2f start=13,mutations=0', 'read8f start=8,mutations=0', 'read10f start=34,mutations=0', 'read13f start=49,mutations=1', 'read15f start=54,mutations=1', 'read16f start=13,mutations=1', 'read22f start=5,mutations=0', 'read35f start=25,mutations=0', 'read37f start=9,mutations=0' ]) assert kmers['CCGGTTTTTAGAAGTCTCGACTTTAAGGA'] == testset
def test_gentrio_smoketest(): seqstream = kevlar.open(data_file('100kbx3.fa.gz'), 'r') sequences = kevlar.seqio.parse_seq_dict(seqstream) outstreams = [StringIO(), StringIO(), StringIO()] mutator = kevlar.gentrio.gentrio(sequences, outstreams, ninh=2, ndenovo=1, seed=1985) variants = list(mutator) for variant in variants: print(variant.vcf, file=sys.stderr) for i in range(3): outstreams[i].seek(0) probandseqs = kevlar.seqio.parse_seq_dict(outstreams[0]) motherseqs = kevlar.seqio.parse_seq_dict(outstreams[1]) fatherseqs = kevlar.seqio.parse_seq_dict(outstreams[2]) print(probandseqs['scaf1_haplo1'][variants[0].position]) print(probandseqs['scaf1_haplo2'][variants[0].position]) assert variants[0].genotypes[0] == '0/1' assert variants[0].refrwindow in probandseqs['scaf1_haplo1'] assert variants[0].refrwindow not in probandseqs['scaf1_haplo2'] assert variants[0].window not in probandseqs['scaf1_haplo1'] assert variants[0].window in probandseqs['scaf1_haplo2'] print(probandseqs['scaf3_haplo1'][variants[2].position]) print(probandseqs['scaf3_haplo2'][variants[2].position]) print(motherseqs['scaf3_haplo1'][variants[2].position]) print(motherseqs['scaf3_haplo2'][variants[2].position]) print(fatherseqs['scaf3_haplo1'][variants[2].position]) print(fatherseqs['scaf3_haplo2'][variants[2].position]) assert variants[2].window in probandseqs['scaf3_haplo1'] assert variants[2].refrwindow in probandseqs['scaf3_haplo2'] assert variants[2].refrwindow in motherseqs['scaf3_haplo1'] assert variants[2].refrwindow in motherseqs['scaf3_haplo2'] assert variants[2].refrwindow in fatherseqs['scaf3_haplo1'] assert variants[2].window in fatherseqs['scaf3_haplo2']
def test_load_mutations_y(): instream = kevlar.open(data_file('muts-y.tsv'), 'r') mutations = kevlar.mutate.load_mutations(instream, stderr) assert len(mutations) == 3 assert 'scaffold399' in mutations assert len(mutations['scaffold399']) == 1 mut = mutations['scaffold399'][0] assert mut == Mutation(seq='scaffold399', pos=685357, type='ins', data='AGCTACCCCAGTGAGTCGGTAATGTGATC') assert 'scaffold982' in mutations assert len(mutations['scaffold982']) == 1 mut = mutations['scaffold982'][0] assert mut == Mutation(seq='scaffold982', pos=108754, type='del', data='23') assert 'scaffold1102' in mutations assert len(mutations['scaffold1102']) == 1 mut = mutations['scaffold1102'][0] assert mut == Mutation(seq='scaffold1102', pos=260686, type='snv', data='1')
def test_augfastx_reader_e1(): infilename = data_file('example1.augfastq') infile = open(infilename, 'r') record = next(kevlar.parse_augmented_fastx(infile)) assert record.name == 'e1' assert record.sequence == ( 'TTAACTCTAGATTAGGGGCGTGACTTAATAAGGTGTGGGCCTAAGCGTCT' ) assert len(record.annotations) == 2 ikmer = record.annotations[0] assert record.ikmerseq(ikmer) == 'AGGGGCGTGACTTAATAAG' assert ikmer.ksize == 19 assert ikmer.offset == 13 assert ikmer.abund == (12, 15, 1, 1) ikmer = record.annotations[1] assert record.ikmerseq(ikmer) == 'GGGCGTGACTTAATAAGGT' assert ikmer.ksize == 19 assert ikmer.offset == 15 assert ikmer.abund == (20, 28, 0, 1)
def test_call_homopolymers_mixed_results(): contigfile = data_file('homopolymer/12175-3parts.contigs.augfasta') contigstream = kevlar.parse_augmented_fastx(kevlar.open(contigfile, 'r')) partstream = kevlar.parse_partitioned_reads(contigstream) contigs = kevlar.call.load_contigs(partstream) gdnafile = data_file('homopolymer/12175-3parts.targets.fasta') gdnastream = kevlar.reference.load_refr_cutouts(kevlar.open(gdnafile, 'r')) partstream = kevlar.parse_partitioned_reads(gdnastream) targets = kevlar.call.load_contigs(partstream) prelimcalls = list() for partid in contigs: contiglist = contigs[partid] gdnalist = targets[partid] caller = kevlar.call.call(gdnalist, contiglist, partid=partid) prelimcalls.extend(list(caller)) kid = kevlar.sketch.load(data_file('homopolymer/12175-kid.sct')) mom = kevlar.sketch.load(data_file('homopolymer/12175-mom.sct')) dad = kevlar.sketch.load(data_file('homopolymer/12175-dad.sct')) refr = kevlar.sketch.load(data_file('homopolymer/12175-refr.sct')) scorer = kevlar.simlike.simlike( prelimcalls, kid, [mom, dad], refr, samplelabels=['Proband', 'Mother', 'Father'], ) calls = list(scorer) assert len(calls) == 6 for c in calls: print(c.vcf) unintrstng = [c for c in calls if c.filterstr in ('PASS', 'Homopolymer')] assert len(unintrstng) == 3 call1, call2, call3 = unintrstng assert call1.position == 123651924 assert call1.filterstr == 'PASS' # negative control assert call1._refr == 'TAA' assert call1._alt == 'T' assert call2.position == 124641259 assert call2.filterstr == 'PASS' # borderline assert call2._refr == 'TAAA' assert call2._alt == 'T' assert call3.position == 128660727 assert call3.filterstr == 'Homopolymer' # positive control
def test_augfastx_reader_e2(): infilename = data_file('example2.augfastq') infile = open(infilename, 'r') record = next(kevlar.parse_augmented_fastx(infile)) assert record.name == 'ERR894724.125497791/1' assert record.sequence == ( 'TAGCCAGTTTGGGTAATTTTAATTGTAAAACTTTTTTTTCTTTTTTTTTGATTTTTTTTTTTCAAGCAG' 'AAGACGGCATACGAGCTCTTTTCACGTGACTGGAGTTCAGACGTGTGCTCTTCCGAT' ) assert len(record.annotations) == 2 ikmer = record.annotations[0] assert record.ikmerseq(ikmer) == 'GGCATACGAGCTCTTTTCACGTGACTGGAGT' assert ikmer.ksize == 31 assert ikmer.offset == 74 assert ikmer.abund == (23, 0, 0) ikmer = record.annotations[1] assert record.ikmerseq(ikmer) == 'GCTCTTTTCACGTGACTGGAGTTCAGACGTG' assert ikmer.ksize == 31 assert ikmer.offset == 83 assert ikmer.abund == (23, 0, 0)
def test_get_seed_matches(): seedfasta = ( '>seed0\nATCTGTTCTTGGCCAATAGAAAAAGCAAGGAGCCCTGAAAGACTCACAGTG\n' '>seed1\nAAAAGGAAATGTTAACAACAAAATCACACAGATAAACCATCACAAGATCTG\n' '>seed2\nGATTCTAGGAGCTTGTTACTGCTGCTGAAAAAGGAAATGTTAACAACAAAA\n' '>seed3\nAACCAATAGAGGTCCACAGAAGTATATATAATCTGTTCTTGGCCAATAGAA\n' '>seed4\nTTGTGTGTAAAAACCAATAGAGGTCCACAGAAGTATATATAATCTGTTCTT\n' '>seed5\nAAGATACTATAATATGTTTCCCTGAGCACACCCCTTCGAAAGAGCAGAATT\n') with NamedTemporaryFile(suffix='.fa', mode='w') as seedfile: print(seedfasta, file=seedfile, flush=True) refrfile = data_file('fiveparts-refr.fa.gz') seed_matches = get_seed_matches(seedfile.name, refrfile, seedsize=51) print(seed_matches) assert seed_matches == { 'AACCAATAGAGGTCCACAGAAGTATATATAATCTGTTCTTGGCCAATAGAA': {('seq1', 284819)}, 'AAGATACTATAATATGTTTCCCTGAGCACACCCCTTCGAAAGAGCAGAATT': {('seq1', 284722)}, 'ATCTGTTCTTGGCCAATAGAAAAAGCAAGGAGCCCTGAAAGACTCACAGTG': {('seq1', 284849)}, 'AAGAACAGATTATATATACTTCTGTGGACCTCTATTGGTTTTTACACACAA': {('seq1', 284808)}, }
def test_graph_init(): """Test graph initialization.""" instream = kevlar.open(data_file('var1.reads.augfastq'), 'r') graph = kevlar.ReadGraph() graph.load(kevlar.parse_augmented_fastx(instream)) graph.populate_edges(strict=True) # 10 reads in the file, but read16f has no valid connections due to error assert len(graph.nodes()) == 10 # The given read shares its interesting k-mer and has compatible overlaps # with 6 other reads (read13f and read15f have errors). r23name = 'read23f start=67,mutations=0' assert len(graph[r23name]) == 6 # Test the values of one of the edges. r35name = 'read35f start=25,mutations=0' assert graph[r23name][r35name]['offset'] == 42 assert graph[r23name][r35name]['overlap'] == 58 # Should all be a single CC assert len(list(connected_components(graph))) == 2 assert len([p for p in graph.partitions()]) == 1 r8name = 'read8f start=8,mutations=0' r37name = 'read37f start=9,mutations=0' assert graph[r37name][r8name]['offset'] == 1 assert graph[r37name][r8name]['overlap'] == 99 pair = OverlappingReadPair(tail=graph.get_record(r8name), head=graph.get_record(r37name), offset=1, overlap=99, sameorient=True, swapped=False) assert merge_pair(pair) == ('CACTGTCCTTACAGGTGGATAGTCGCTTTGTAATAAAAGAGTTAC' 'ACCCCGGTTTTTAGAAGTCTCGACTTTAAGGAAGTGGGCCTACGG' 'CGGAAGCCGTC')
def test_augfastx_reader_withmates(): instream = kevlar.open(data_file('seqs-mates.augfastq'), 'r') reader = kevlar.parse_augmented_fastx(instream) record = next(reader) assert len(record.annotations) == 5 assert len(record.mates) == 1 assert record.mates[0].startswith('CTGATAAGCAACTTCAGCAAA') record = next(reader) assert len(record.annotations) == 4 assert len(record.mates) == 1 assert record.mates[0].startswith('ATTAGAAAAAAAAAGTGCATT') record = next(reader) assert len(record.annotations) == 21 assert len(record.mates) == 0 record = next(reader) assert len(record.annotations) == 2 assert record.mates[0].startswith('CAGATGTGTCTTGTGGGCAGT') with pytest.raises(StopIteration): next(reader)
def test_simlike_cli(fmtstr, sampleargs, minitrio, capsys): kid, mom, dad, ref = minitrio with NamedTemporaryFile(suffix='.ct') as kidct, \ NamedTemporaryFile(suffix='.ct') as momct, \ NamedTemporaryFile(suffix='.ct') as dadct, \ NamedTemporaryFile(suffix='.sct') as refrsct: kid.save(kidct.name) mom.save(momct.name) dad.save(dadct.name) ref.save(refrsct.name) arglist = [ 'simlike', '--case', kidct.name, '--controls', momct.name, dadct.name, *sampleargs, '--refr', refrsct.name, data_file('minitrio/calls.vcf') ] print(arglist) args = kevlar.cli.parser().parse_args(arglist) kevlar.simlike.main(args) out, err = capsys.readouterr() assert fmtstr in out assert 'LIKESCORE=214.103' in out assert 'LLDN=-221.908;LLFP=-785.714;LLIH=-436.011' in out
def test_load_sample_seqfile(count, smallcount, extension, shortext): infile = data_file('bogus-genome/refr.fa') with NamedTemporaryFile() as outfile: sketch = kevlar.count.load_sample_seqfile([infile], 21, 1e6, count=count, smallcount=smallcount, outfile=outfile.name) assert sketch.get('GAATCGGTGGCTGGTTGCCGT') > 0 assert sketch.get('GATTACAGATTACAGATTACA') == 0 assert os.path.exists(outfile.name + extension) with NamedTemporaryFile(suffix=shortext) as outfile: sketch = kevlar.count.load_sample_seqfile([infile], 21, 1e6, count=count, smallcount=smallcount, outfile=outfile.name) assert sketch.get('GAATCGGTGGCTGGTTGCCGT') > 0 assert sketch.get('GATTACAGATTACAGATTACA') == 0 assert not os.path.exists(outfile.name + extension) assert os.path.exists(outfile.name)
def test_gen_muts(): seqstream = kevlar.open(data_file('100kbx3.fa.gz'), 'r') sequences = kevlar.seqio.parse_seq_dict(seqstream) w = {'snv': 0.7, 'ins': 0.15, 'del': 0.15} mutator = kevlar.gentrio.generate_mutations(sequences, weights=w, rng=42) mutations = list(mutator) refrs = [m._refr for m in mutations] alts = [m._alt for m in mutations] print('DEBUG refrs', refrs, file=sys.stderr) print('DEBUG alts', alts, file=sys.stderr) testrefrs = [ 'ATTACGACAGAGTTTGTAGGTGTACGAGCCCAATCCAACGTCGGCCATCCGAGACTCTTTAAGTACCCG' 'GCCATACACTGTGCGCCGAAAAATCAGCGATCATACCACCGTTTGAAGCTTCACGGCCGAGTGTTCTGG' 'CGATTCGT', 'TATATGAGCTCTCGACGGAATTTACGAGCGCGTATAAGCCTTTTGCAGTTACAACAT' 'T', 'A', 'GAGTTGGGTATAATAACGTAGTCGGGGGAGCAGATGGAGCAGTGCGACCGCCG', 'C', 'G', 'A', 'T', 'G', 'C' ] testalts = [ 'A', 'T', 'C', 'G', 'G', 'C', 'ATGCGCAGAGGATATGTTAGTGACTATTGAAGGTGGAAC' 'TTGCAAGGGAATGGGTTCACCCTTGCGATTTCGGGGCTACTAAGCACATAGGCTAACGGCAGATGGAGT' 'AAGCTACGCCAAAACTAATTAGCGTGCTCGGGGCGTAGGCGGGACCCCGGAAATGATAACCAGGATCAA' 'ACATCCCTTCTTCGACCGAAGGCTGTTGCGCACGTATGACAGCTCTGTGACGCTCTAGATTCAGCTTTG' 'AAGTCGTGACACGTTGCGATACCTTGACCTGGATGAAACTTCGCCGGGACTTCCCTGACAA', 'TTTG' 'TTCCCATGACTTACGCTACACACGAGCCAGCTAGCTGCGAAAACCTAAGAGCCTCCG', 'A', 'CTA' 'GCGAAACACGGAATAACATCAAATGACAGCTATCTCCCAAGATGGTGGGTAGGTTTATAGTAGAGTGGG' 'CGGCTACATTCGTCTCCCCGGCCCGCAGCCCGCGCACTATAGCAAAATGTTAATGCAGGTTCTGCCCTC' 'CATATAGATCACACGCTAAGTCAAAATACGACCCTGTGACCAGCCGCAATCACTTGCTGAATTCCGCAC' 'CTTGCTCCAGCGACTATCTTCTTCCTTAAGCCCCTGGT' ] assert refrs == testrefrs assert alts == testalts assert mutations[0].genotypes is None
def test_no_refr(): bamstream = kevlar.open(data_file('bogus-genome/reads.bam'), 'r') records = [r for r in kevlar.dump.dump(bamstream)] assert len(records) == 8
filtermem=1e7, filterfpr=0.005, logstream=sys.stderr) variants = [v for v in workflow] variants = sorted(variants, key=lambda v: v._pos) startpos = [v._pos + 1 for v in variants] teststartpos = [ 4073, 185752, 226611, 636699, 834646, 901124, 1175768, 1527139, 1631013, 2265795 ] assert len(variants) == 10 assert startpos == teststartpos def test_simplex_trio1(capsys): case = data_file('trio1/case1.fq') controls = data_glob('trio1/ctrl[1,2].fq') refr = data_file('bogus-genome/refr.fa') arglist = [ 'simplex', '--case', case, '--control', controls[0], '--control', controls[1], '--case-min', '6', '--ctrl-max', '0', '--novel-memory', '1M', '--novel-fpr', '0.2', '--filter-memory', '50K', '--mask-files', refr, '--mask-memory', '1M', '--filter-fpr', '0.005', '--ksize', '21', refr ] args = kevlar.cli.parser().parse_args(arglist) kevlar.simplex.main(args) out, err = capsys.readouterr() testvcf = '\t'.join([ 'bogus-genome-chr1', '3567', '.', 'A', 'C', '.', 'PASS', 'RW=GAAGGGCAC'
def test_sketch_load_badfilename(): infile = data_file('test.notasketchtype') with pytest.raises(kevlar.sketch.KevlarSketchTypeError) as kste: sketch = kevlar.sketch.load(infile) assert ('sketch type from filename ' + infile) in str(kste)
def test_sketch_load(filename, testkmer): infile = data_file(filename) sketch = kevlar.sketch.load(infile) assert sketch.get(testkmer) > 0 assert sketch.get('GATTACA' * 3) == 0
def test_mutate_bogus(): instream = kevlar.open(data_file('muts-w.txt'), 'r') with pytest.raises(ValueError) as ve: mutations = kevlar.mutate.load_mutations(instream, stderr) assert 'invalid variant type "slippage"' in str(ve)
def test_load_mutations_z(): instream = kevlar.open(data_file('muts-z.csv'), 'r') with pytest.raises(ValueError) as ve: mutations = kevlar.mutate.load_mutations(instream, stderr) assert 'error parsing mutation' in str(ve)
def test_reader_format_mismatch(filename, errormsg): instream = kevlar.open(data_file(filename), 'r') reader = kevlar.vcf.VCFReader(instream) with pytest.raises(kevlar.vcf.VariantAnnotationError, match=errormsg): calls = list(reader)
def test_assembly_edgeless(cc): filename = 'edgeless/cc{:d}.afq.gz'.format(cc) fh = kevlar.open(data_file(filename), 'r') reads = [r for r in kevlar.parse_augmented_fastx(fh)] contigs = [c for c in kevlar.assembly.fml_asm(reads)] assert len(contigs) == 0
record.annotate('GATGAGGATGAGGATGAGGATGAGG', 8, (11, 1, 0)) kevlar.print_augmented_fastx(record, stdout) out, err = capsys.readouterr() assert read in out def test_iter_read_multi_file(): infiles = kevlar.tests.data_glob('bogus-genome/mask-chr[1,2].fa') print(infiles) records = [r for r in kevlar.multi_file_iter_khmer(infiles)] assert len(records) == 4 def test_novel_abund_screen(capsys): case = data_file('screen-case.fa') ctrl = data_file('screen-ctrl.fa') arglist = [ 'novel', '--ksize', '25', '--ctrl-max', '1', '--case-min', '8', '--case', case, '--control', ctrl, '--abund-screen', '3' ] args = kevlar.cli.parser().parse_args(arglist) kevlar.novel.main(args) out, err = capsys.readouterr() assert '>seq_error' not in out def test_skip_until(capsys): readname = 'bogus-genome-chr1_115_449_0:0:0_0:0:0_1f4/1' case = data_file('trio1/case1.fq')
# ----------------------------------------------------------------------------- # Copyright (c) 2018 The Regents of the University of California # # This file is part of kevlar (http://github.com/dib-lab/kevlar) and is # licensed under the MIT license: see LICENSE. # ----------------------------------------------------------------------------- import sys import kevlar from kevlar.cigar import AlignmentBlock, AlignmentTokenizer from kevlar.tests import data_file import pytest @pytest.mark.parametrize('contig,gdna', [ (data_file('cigar/a.contig.fa'), data_file('cigar/a.gdna.fa')), (data_file('cigar/b.contig.fa'), data_file('cigar/b.gdna.fa')), (data_file('cigar/c.contig.fa'), data_file('cigar/c.gdna.fa')), (data_file('phony-snv-01.contig.fa'), data_file('phony-snv-01.gdna.fa')), (data_file('phony-snv-02.contig.fa'), data_file('phony-snv-02.gdna.fa')), ]) def test_blocks(contig, gdna): query = next(kevlar.parse_augmented_fastx(kevlar.open(contig, 'r'))) target = next(kevlar.parse_augmented_fastx(kevlar.open(gdna, 'r'))) cigar, score = kevlar.align(target.sequence, query.sequence) tok = AlignmentTokenizer(query.sequence, target.sequence, cigar) for block in tok.blocks: assert block.type in ('M', 'D', 'I') if block.type in ('M', 'D'): assert len(block.target) == block.length else:
def test_sketch_load_badfilename(): infile = data_file('test.notasketchtype') errormsg = r'sketch type from filename ' + infile with pytest.raises(kevlar.sketch.KevlarSketchTypeError, match=errormsg): sketch = kevlar.sketch.load(infile)
badkmers = ['CAGGCCAGGGATCGCCGTG'] goodkmers = [ 'AGGGGCGTGACTTAATAAG', 'GGGCGTGACTTAATAAGGT', 'TAGGGGCGTGACTTAATAA', 'GGGGCGTGACTTAATAAGG', ] for record in validated: for kmer in record.annotations: seq = record.ikmerseq(kmer) assert seq not in badkmers and kevlar.revcom(seq) not in badkmers assert seq in goodkmers or kevlar.revcom(seq) in goodkmers @pytest.mark.parametrize('mask,nkmers,nkmerinstances', [ (None, 424, 5782), (bogusrefr(), 424, 5782), (kevlar.sketch.load(data_file('bogus-genome/mask.nt')), 13, 171) ]) def test_ctrl3(mask, nkmers, nkmerinstances): readfile = data_file('trio1/novel_3_1,2.txt') ikmers = defaultdict(int) for read in kevlar.filter.filter(readfile, memory=1e7, mask=mask): for ikmer in read.annotations: kmerseq = kevlar.revcommin(read.ikmerseq(ikmer)) ikmers[kmerseq] += 1 assert len(ikmers) == nkmers assert sum(ikmers.values()) == nkmerinstances def test_filter_abundfilt(): readfile = data_file('worm.augfasta') ikmers = defaultdict(int)
def test_ikmer_filter_cli(): reads = data_file('min_ikmers_filt.augfastq.gz') refr = data_file('localize-refr.fa') arglist = ['alac', '--ksize', '31', '--min-ikmers', '3', reads, refr] args = kevlar.cli.parser().parse_args(arglist) kevlar.alac.main(args)
def test_assemble_no_edges(capsys): cliargs = ['assemble', data_file('asmbl-no-edges.augfastq.gz')] args = kevlar.cli.parser().parse_args(cliargs) kevlar.assemble.main(args) out, err = capsys.readouterr() assert out == ''
def test_bwa_failure(): args = ['bwa', 'mem', data_file('not-a-real-file.fa'), '-'] with pytest.raises(KevlarBWAError) as e: aligner = kevlar.reference.bwa_align(args, '>seq1\nACGT') pos = list(aligner)
def test_assemble_no_edges(capsys): cliargs = ['assemble', data_file('asmbl-no-edges.augfastq.gz')] args = kevlar.cli.parser().parse_args(cliargs) with pytest.raises(kevlar.assemble.KevlarEdgelessGraphError) as ege: kevlar.assemble.main(args) assert 'nothing to be done, aborting' in str(ege)