def test_dory_multifasta_query(location): copy_dory_head() copy_dory_catlas() copy_dory_sig() # make k-mer search index - FIXTURE args = '-k 21 dory_k21_r1'.split() index_contigs_by_kmer.main(args) # index by multifasta os.mkdir('dory_k21_r1_multifasta') args = 'dory_k21_r1 dory_k21_r1_multifasta/multifasta.pickle --query dory-head.fa -k 21' index_cdbg_by_multifasta.main(args.split()) args = '-k 21 --scaled 100 dory_k21_r1/contigs.fa.gz dory_k21_r1_multifasta/hashval.pickle' index_cdbg_by_minhash.main(args.split()) args = '--hashvals dory_k21_r1_multifasta/hashval.pickle --multi-idx dory_k21_r1_multifasta/multifasta.pickle --query-sig dory-subset.fq.sig --output dory_k21_r1_multifasta/query-results.csv -k 21 --scaled 100' query_multifasta_by_sig.main(args.split()) args = '--multi-idx dory_k21_r1_multifasta/multifasta.pickle --output dory_k21_r1_multifasta/multifasta.cdbg_by_record.csv --info-csv dory_k21_r1/contigs.fa.gz.info.csv' extract_cdbg_by_multifasta.main(args.split()) assert os.path.exists( 'dory_k21_r1_multifasta/multifasta.cdbg_by_record.csv') assert os.path.exists('dory_k21_r1_multifasta/query-results.csv')
def test_dory_query_workflow(location): from spacegraphcats.cdbg import bcalm_to_gxt copy_dory_head() copy_dory_subset() # make the output directory try: os.mkdir('dory_k21_r1') except FileExistsError: pass # convert the bcalm file to gxt args = ['-k', '21', '-P', relative_file('dory/bcalm.dory.k21.unitigs.fa'), 'dory_k21_r1/cdbg.gxt', 'dory_k21_r1/contigs.fa.gz'] bcalm_to_gxt.main(args) # build catlas args = pytest_utils.Args() args.no_checkpoint = True args.level = 0 args.radius = 1 args.project = 'dory_k21_r1' print('** running catlas') catlas.main(args) # make k-mer search index args = '-k 21 dory_k21_r1'.split() print('** running index_contigs_by_kmer') index_contigs_by_kmer.main(args) # do search!! args='dory_k21_r1 dory_k21_r1_search_oh0 --query dory-head.fa -k 21'.split() try: query_by_sequence.main(args) except SystemExit as e: assert e.code == 0, str(e) # check output! output_path = 'dory_k21_r1_search_oh0/' assert os.path.exists(output_path + 'command.txt') assert os.path.exists(output_path + 'dory-head.fa.frontier.txt.gz') assert os.path.exists(output_path + 'dory-head.fa.cdbg_ids.txt.gz') assert os.path.exists(output_path + 'dory-head.fa.response.txt') assert os.path.exists(output_path + 'dory-head.fa.contigs.sig') assert os.path.exists(output_path + 'results.csv') with open(output_path + 'results.csv') as fp: lines = fp.readlines() assert len(lines) == 2 last_line = lines[-1].strip() assert last_line == 'dory-head.fa,1.0,1.0,1671,2,21,1631,1.0,0.0,0.0,dory_k21_r1'
def test_dory_extract_unassembled_nodes(location): copy_dory_catlas() copy_dory_head() # make k-mer search index - FIXTURE args = '-k 21 dory_k21_r1'.split() index_contigs_by_kmer.main(args) # run extract_unassembled_regions args = 'dory_k21_r1 dory-head.fa dory.regions -k 21'.split() extract_unassembled_nodes.main(args)
def test_dory_estimate_query_abundance(location): copy_dory_catlas() copy_dory_head() # make k-mer search index - FIXTURE args = '-k 21 dory_k21_r1'.split() index_contigs_by_kmer.main(args) # calculate query abundances args = 'dory_k21_r1 dory-head.fa -o abundances.csv -k 21'.split() print('** running estimate_query_abundance') estimate_query_abundance.main(args) abunds = open('abundances.csv', 'rt').read()
def test_dory_search_nomatch(location): # test situations where zero k-mers match - should not fail. copy_dory_catlas() testdata = relative_file('data/random-query-nomatch.fa') shutil.copyfile(testdata, 'random-query.fa') # make k-mer search index args = '-k 21 dory_k21_r1'.split() print('** running index_contigs_by_kmer') index_contigs_by_kmer.main(args) # do search!! args='dory_k21_r1 dory_k21_r1_search_oh0 --query random-query.fa -k 21'.split() try: query_by_sequence.main(args) except SystemExit as e: assert e.code == 0, str(e)
def test_dory(): with TempDirectory() as location: from spacegraphcats.cdbg import bcalm_to_gxt # make the output directory try: os.mkdir('dory_k21_r1') except FileExistsError: pass # convert the bcalm file to gxt args = [ '-k', '-21', '-P', relative_filename('dory/bcalm.dory.k21.unitigs.fa'), 'dory_k21_r1/cdbg.gxt', 'dory_k21_r1/contigs.fa.gz' ] bcalm_to_gxt.main(args) # build catlas args = Args() args.no_checkpoint = True args.level = 0 args.radius = 1 args.project = 'dory_k21_r1' catlas.main(args) # make k-mer search index args = '-k 21 dory_k21_r1'.split() index_contigs_by_kmer.main(args) # do search!! extract_nodes_by_query args = 'dory_k21_r1 dory_k21_r1_search_oh0 --query data/dory-head.fa -k 21 --overhead=0.0'.split( ) try: extract_nodes_by_query.main(args) except SystemExit as e: assert e.code == 0, str(e) # check output! output_path = 'dory_k21_r1_search_oh0/' assert os.path.exists(output_path + 'command.txt') assert os.path.exists(output_path + 'dory-head.fa.frontier.txt.gz') assert os.path.exists(output_path + 'dory-head.fa.cdbg_ids.txt.gz') assert os.path.exists(output_path + 'dory-head.fa.response.txt') assert os.path.exists(output_path + 'dory-head.fa.contigs.sig') assert os.path.exists(output_path + 'results.csv') with open(output_path + 'results.csv') as fp: lines = fp.readlines() assert len(lines) == 2 last_line = lines[-1].strip() assert last_line == 'data/dory-head.fa,1.0,1.0,1671,2,21,1631,1.0,0.0,0.0' # run characterize_catlas_regions args = 'dory_k21_r1 dory_k1_r1.vec'.split() characterize_catlas_regions.main(args) # run extract_unassembled_regions args = 'dory_k21_r1 data/dory-head.fa dory.regions -k 21'.split() extract_unassembled_nodes.main(args) # run catlas info catlas_info.main(['dory_k21_r1']) # run extract_contigs args = [ 'dory_k21_r1', 'dory_k21_r1_search_oh0/dory-head.fa.cdbg_ids.txt.gz', '-o', 'dory_k21_r1_search_oh0/dory-head.fa.cdbg_ids.contigs.fa.gz' ] extract_contigs.main(args) assert os.path.exists( 'dory_k21_r1_search_oh0/dory-head.fa.cdbg_ids.contigs.fa.gz') # run make_bgzf args = ['data/dory-subset.fa', '-o', 'dory/dory.reads.bgz'] make_bgzf.main(args) # run label_cdbg args = [ 'dory_k21_r1', 'dory/dory.reads.bgz', 'dory_k21_r1/reads.bgz.labels' ] label_cdbg.main(args) # run extract_reads args = [ 'dory/dory.reads.bgz', 'dory_k21_r1/reads.bgz.labels', 'dory_k21_r1_search_oh0/dory-head.fa.cdbg_ids.txt.gz', '-o', 'dory_k21_r1_search_oh0/dory-head.fa.cdbg_ids.reads.fa.gz' ] extract_reads.main(args)
def test_dory(): with TempDirectory() as location: from spacegraphcats.cdbg import bcalm_to_gxt # make the output directory try: os.mkdir('dory_k21_r1') except FileExistsError: pass # convert the bcalm file to gxt args = ['-k', '-21', '-P', relative_filename('dory/bcalm.dory.k21.unitigs.fa'), 'dory_k21_r1/cdbg.gxt', 'dory_k21_r1/contigs.fa.gz'] bcalm_to_gxt.main(args) # build catlas args = Args() args.no_checkpoint = True args.level = 0 args.radius = 1 args.project = 'dory_k21_r1' catlas.main(args) # make k-mer search index args = '-k 21 dory_k21_r1'.split() index_contigs_by_kmer.main(args) # do search!! args='dory_k21_r1 dory_k21_r1_search_oh0 --query data/dory-head.fa -k 21 --overhead=0.0'.split() try: extract_nodes_by_query.main(args) except SystemExit as e: assert e.code == 0, str(e) # check output! output_path = 'dory_k21_r1_search_oh0/' assert os.path.exists(output_path + 'command.txt') assert os.path.exists(output_path + 'dory-head.fa.frontier.txt.gz') assert os.path.exists(output_path + 'dory-head.fa.cdbg_ids.txt.gz') assert os.path.exists(output_path + 'dory-head.fa.response.txt') assert os.path.exists(output_path + 'dory-head.fa.contigs.sig') assert os.path.exists(output_path + 'results.csv') with open(output_path + 'results.csv') as fp: lines = fp.readlines() assert len(lines) == 2 last_line = lines[-1].strip() assert last_line == 'data/dory-head.fa,1.0,1.0,1671,2,21,1631,1.0,0.0,0.0' # run characterize_catlas_regions args = 'dory_k21_r1 dory_k1_r1.vec'.split() characterize_catlas_regions.main(args) # run extract_unassembled_regions args = 'dory_k21_r1 data/dory-head.fa dory.regions -k 21'.split() extract_unassembled_nodes.main(args) # run catlas info catlas_info.main(['dory_k21_r1']) # run extract_contigs args = ['dory_k21_r1', 'dory_k21_r1_search_oh0/dory-head.fa.cdbg_ids.txt.gz', '-o', 'dory_k21_r1_search_oh0/dory-head.fa.cdbg_ids.contigs.fa.gz'] extract_contigs.main(args) assert os.path.exists('dory_k21_r1_search_oh0/dory-head.fa.cdbg_ids.contigs.fa.gz') # run make_bgzf args = ['data/dory-subset.fa', '-o', 'dory/dory.reads.bgz'] make_bgzf.main(args) # run label_cdbg args = ['dory_k21_r1', 'dory/dory.reads.bgz', 'dory_k21_r1/reads.bgz.labels'] label_cdbg.main(args) # run extract_reads args = ['dory/dory.reads.bgz', 'dory_k21_r1/reads.bgz.labels', 'dory_k21_r1_search_oh0/dory-head.fa.cdbg_ids.txt.gz', '-o', 'dory_k21_r1_search_oh0/dory-head.fa.cdbg_ids.reads.fa.gz'] extract_reads.main(args) # calculate query abundances args = 'dory_k21_r1 data/dory-head.fa -o abundances.csv -k 21'.split() estimate_query_abundance.main(args) abunds = open('abundances.csv', 'rt').read() assert 'data/dory-head.fa,1.0,1.05' in abunds