def test_kraken_fifo(kraken_db): in_bams = list(os.path.join(util.file.get_test_input_path(), d, 'test-reads.bam') for d in ('TestMetagenomicsSimple', 'TestMetagenomicsViralMix')) out_reports = list(util.file.mkstempfname('.out_{}.report.txt'.format(i)) for i in (1,2)) out_reads = list(util.file.mkstempfname('.out_{}.reads.txt.gz'.format(i)) for i in (1,2)) with util.file.fifo(names=('inbam1.bam', 'inbam2.bam')) as (inbam1, inbam2): with open(inbam1, 'wb') as b1, open(inbam2, 'wb') as b2: p1 = subprocess.Popen(['cat', in_bams[0]], stdout=b1) p2 = subprocess.Popen(['cat', in_bams[1]], stdout=b2) cmd = [kraken_db, inbam1, inbam2] \ + ['--outReports'] + out_reports \ + ['--outReads'] + out_reads parser = metagenomics.parser_kraken(argparse.ArgumentParser()) args = parser.parse_args(cmd) args.func_main(args) print("waiting for kraken to drain fifo for first bam file") p1.wait() print("waiting for kraken to drain fifo for second bam file") p2.wait() # just check for non-empty outputs for outfile in out_reads: with util.file.open_or_gzopen(outfile, 'r') as inf: assert len(inf.read()) > 0 for outfile in out_reports: with util.file.open_or_gzopen(outfile) as inf: assert len(inf.read()) > 0
def test_kraken(kraken_db, input_bam): out_report = util.file.mkstempfname('.report') out_reads = util.file.mkstempfname('.reads.gz') cmd = [input_bam, kraken_db, '--outReport', out_report, '--outReads', out_reads] parser = metagenomics.parser_kraken(argparse.ArgumentParser()) args = parser.parse_args(cmd) args.func_main(args) with util.file.open_or_gzopen(out_reads, 'r') as inf: assert len(inf.read()) > 0 with util.file.open_or_gzopen(out_report) as inf: report_lines = [x.strip().split() for x in inf.readlines()] assert os.path.getsize(out_report) > 0 if 'TestMetaegenomicsSimple' in kraken_db: zaire_found = False tai_found = False for line in report_lines: if line[-1] == 'Zaire ebolavirus' and float(line[0]) > 90: zaire_found = True elif 'Tai Forest' in line[-1]: tai_found = True assert zaire_found assert not tai_found
def test_kraken(kraken_db, input_bam): out_report = util.file.mkstempfname('.report') out_reads = util.file.mkstempfname('.reads.gz') cmd = [input_bam, kraken_db, '--outReport', out_report, '--outReads', out_reads] parser = metagenomics.parser_kraken(argparse.ArgumentParser()) args = parser.parse_args(cmd) args.func_main(args) assert os.path.getsize(out_report) > 0 assert os.path.getsize(out_reads) > 0
def test_kraken(kraken_db, input_bam): out_report = util.file.mkstempfname('.report') out_reads = util.file.mkstempfname('.reads.gz') cmd = [ input_bam, kraken_db, '--outReport', out_report, '--outReads', out_reads ] parser = metagenomics.parser_kraken(argparse.ArgumentParser()) args = parser.parse_args(cmd) args.func_main(args) assert os.path.getsize(out_report) > 0 assert os.path.getsize(out_reads) > 0
def test_kraken_krona(tmpdir, kraken_db, krona_db, input_bam): out_report = util.file.mkstempfname('.report') out_reads = util.file.mkstempfname('.reads.gz') cmd = [input_bam, kraken_db, '--outReport', out_report, '--outReads', out_reads] parser = metagenomics.parser_kraken(argparse.ArgumentParser()) args = parser.parse_args(cmd) args.func_main(args) out_html = util.file.mkstempfname('.krona.html') parser = metagenomics.parser_krona(argparse.ArgumentParser()) args = parser.parse_args([out_reads, krona_db, out_html]) args.func_main(args)
def test_kraken_krona(kraken_db, krona_db, input_bam): out_report = util.file.mkstempfname('.report') out_reads = util.file.mkstempfname('.reads.gz') cmd = [input_bam, kraken_db, '--outReport', out_report, '--outReads', out_reads] parser = metagenomics.parser_kraken(argparse.ArgumentParser()) args = parser.parse_args(cmd) args.func_main(args) out_html = util.file.mkstempfname('.krona.html') parser = metagenomics.parser_krona(argparse.ArgumentParser()) args = parser.parse_args([out_reads, krona_db, out_html]) args.func_main(args)
def test_kraken(kraken_db, input_bam): out_report = util.file.mkstempfname('.report') out_reads = util.file.mkstempfname('.reads.gz') cmd = [kraken_db, input_bam, '--outReports', out_report, '--outReads', out_reads] parser = metagenomics.parser_kraken(argparse.ArgumentParser()) args = parser.parse_args(cmd) args.func_main(args) with util.file.open_or_gzopen(out_reads, 'r') as inf: assert len(inf.read()) > 0 with util.file.open_or_gzopen(out_report) as inf: report_lines = [x.strip().split() for x in inf.readlines()] assert os.path.getsize(out_report) > 0 '''
def kraken_db(request, tmpdir_module, kraken, db_type): data_dir = join(util.file.get_test_input_path(), db_type) db_dir = join(data_dir, 'db') parser = metagenomics.parser_kraken(argparse.ArgumentParser()) db = os.path.join(tmpdir_module, 'kraken_db_{}'.format(db_type)) parser = metagenomics.parser_kraken_build(argparse.ArgumentParser()) cmd = [db, '--library', join(db_dir, 'library'), '--taxonomy', join(db_dir, 'taxonomy'), '--subsetTaxonomy', '--minimizerLen', '10', '--clean'] parser.parse_args(cmd) args = parser.parse_args(cmd) args.func_main(args) return db
def test_kraken_on_empty(kraken_db, input_bam): if 'TestMetagenomicsViralMix' not in kraken_db: return input_bam = os.path.join(util.file.get_test_input_path(), 'empty.bam') out_report = util.file.mkstempfname('.report') out_reads = util.file.mkstempfname('.reads.gz') cmd = [input_bam, kraken_db, '--outReport', out_report, '--outReads', out_reads] parser = metagenomics.parser_kraken(argparse.ArgumentParser()) args = parser.parse_args(cmd) args.func_main(args) with util.file.open_or_gzopen(out_reads, 'r') as inf: assert len(inf.read()) == 0 with open(out_report, 'rt') as inf: out_report_contents = inf.readlines() assert len(out_report_contents) == 1 out_report_contents = out_report_contents[0].rstrip('\n').split('\t') assert out_report_contents == ['100.00', '0', '0', 'U', '0', 'unclassified']
def test_kraken_multi(kraken_db): in_bams = list(os.path.join(util.file.get_test_input_path(), d, 'test-reads.bam') for d in ('TestMetagenomicsSimple', 'TestMetagenomicsViralMix')) out_reports = list(util.file.mkstempfname('.out_{}.report.txt'.format(i)) for i in (1,2)) out_reads = list(util.file.mkstempfname('.out_{}.reads.txt.gz'.format(i)) for i in (1,2)) cmd = [kraken_db] + in_bams \ + ['--outReports'] + out_reports \ + ['--outReads'] + out_reads parser = metagenomics.parser_kraken(argparse.ArgumentParser()) args = parser.parse_args(cmd) args.func_main(args) # just check for non-empty outputs for outfile in out_reads: with util.file.open_or_gzopen(outfile, 'r') as inf: assert len(inf.read()) > 0 for outfile in out_reports: with util.file.open_or_gzopen(outfile) as inf: assert len(inf.read()) > 0