def test_dory_extract_reads_fq(location): # check that FASTQ is passed through properly. copy_dory_catlas() copy_dory_catlas_search() copy_dory_subset() # run make_bgzf - FIXTURE print('** running make_bgzf') args = ['dory-subset.fq', '-o', relative_file('dory/dory.reads.bgz')] make_bgzf.main(args) # run label_cdbg - FIXTURE print('** running label_cdbg') args = ['dory_k21_r1', relative_file('dory/dory.reads.bgz'), 'dory_k21_r1/reads.bgz.labels'] label_cdbg.main(args) # run extract_reads print('** running extract_reads') args = [relative_file('dory/dory.reads.bgz'), 'dory_k21_r1/reads.bgz.labels', 'dory_k21_r1_search_oh0/dory-head.fa.cdbg_ids.txt.gz', '-o', 'dory_k21_r1_search_oh0/dory-head.fa.cdbg_ids.reads.fa.gz'] extract_reads.main(args) reads_filename = 'dory_k21_r1_search_oh0/dory-head.fa.cdbg_ids.reads.fa.gz' reads = [ record for record in screed.open(reads_filename) ] assert len(reads) == 2 assert len(reads[0].quality) # FASTQ preserved!
def test_dory_extract_reads(location): copy_dory_catlas() copy_dory_catlas_search() copy_dory_subset() # run make_bgzf - FIXTURE print('** running make_bgzf') args = ['dory-subset.fa', '-o', relative_file('dory/dory.reads.bgz')] make_bgzf.main(args) # run label_cdbg - FIXTURE print('** running label_cdbg') args = ['dory_k21_r1', relative_file('dory/dory.reads.bgz'), 'dory_k21_r1/reads.bgz.labels'] label_cdbg.main(args) # run extract_reads print('** running extract_reads') args = [relative_file('dory/dory.reads.bgz'), 'dory_k21_r1/reads.bgz.labels', 'dory_k21_r1_search_oh0/dory-head.fa.cdbg_ids.txt.gz', '-o', 'dory_k21_r1_search_oh0/dory-head.fa.cdbg_ids.reads.fa.gz'] extract_reads.main(args) reads_filename = 'dory_k21_r1_search_oh0/dory-head.fa.cdbg_ids.reads.fa.gz' reads = [ record for record in screed.open(reads_filename) ] assert len(reads) == 2
def test_dory_label_cdbg(location): copy_dory_catlas() copy_dory_subset() # run make_bgzf - FIXTURE print('** running make_bgzf') args = ['dory-subset.fa', '-o', relative_file('dory/dory.reads.bgz')] make_bgzf.main(args) # run label_cdbg print('** running label_cdbg') args = ['dory_k21_r1', relative_file('dory/dory.reads.bgz'), 'dory_k21_r1/reads.bgz.labels'] label_cdbg.main(args)
def test_dory_make_bgzf(location): copy_dory_subset() # run make_bgzf print('** running make_bgzf') args = ['dory-subset.fa', '-o', relative_file('dory/dory.reads.bgz')] make_bgzf.main(args)
def test_dory_query_workflow(location): from spacegraphcats.cdbg import bcalm_to_gxt copy_dory_head() copy_dory_subset() # make the output directory try: os.mkdir('dory_k21_r1') except FileExistsError: pass # convert the bcalm file to gxt args = ['-k', '21', '-P', relative_file('dory/bcalm.dory.k21.unitigs.fa'), 'dory_k21_r1/cdbg.gxt', 'dory_k21_r1/contigs.fa.gz'] bcalm_to_gxt.main(args) # build catlas args = pytest_utils.Args() args.no_checkpoint = True args.level = 0 args.radius = 1 args.project = 'dory_k21_r1' print('** running catlas') catlas.main(args) # make k-mer search index args = '-k 21 dory_k21_r1'.split() print('** running index_contigs_by_kmer') index_contigs_by_kmer.main(args) # do search!! args='dory_k21_r1 dory_k21_r1_search_oh0 --query dory-head.fa -k 21'.split() try: query_by_sequence.main(args) except SystemExit as e: assert e.code == 0, str(e) # check output! output_path = 'dory_k21_r1_search_oh0/' assert os.path.exists(output_path + 'command.txt') assert os.path.exists(output_path + 'dory-head.fa.frontier.txt.gz') assert os.path.exists(output_path + 'dory-head.fa.cdbg_ids.txt.gz') assert os.path.exists(output_path + 'dory-head.fa.response.txt') assert os.path.exists(output_path + 'dory-head.fa.contigs.sig') assert os.path.exists(output_path + 'results.csv') with open(output_path + 'results.csv') as fp: lines = fp.readlines() assert len(lines) == 2 last_line = lines[-1].strip() assert last_line == 'dory-head.fa,1.0,1.0,1671,2,21,1631,1.0,0.0,0.0,dory_k21_r1'
def test_dory_query_by_hashval(location): testdata = relative_file('data/dory-k31-hashval-queries.txt') shutil.copyfile(testdata, 'dory-k31-hashval-queries.txt') copy_dory_catlas() # index by hashval args = '-k 31 dory_k21_r1/contigs.fa.gz dory_k21_r1_mh.pickle' index_cdbg_by_minhash.main(args.split()) args = '-k 31 dory_k21_r1 dory_k21_r1_mh.pickle dory-k31-hashval-queries.txt dory_k21_r1_hashval_k31' query_by_hashval.main(args.split()) assert os.path.exists('dory_k21_r1_hashval_k31/hashval_results.csv')
def test_dory_search_nomatch(location): # test situations where zero k-mers match - should not fail. copy_dory_catlas() testdata = relative_file('data/random-query-nomatch.fa') shutil.copyfile(testdata, 'random-query.fa') # make k-mer search index args = '-k 21 dory_k21_r1'.split() print('** running index_contigs_by_kmer') index_contigs_by_kmer.main(args) # do search!! args='dory_k21_r1 dory_k21_r1_search_oh0 --query random-query.fa -k 21'.split() try: query_by_sequence.main(args) except SystemExit as e: assert e.code == 0, str(e)
def copy_dory_subset(): testdata = relative_file('data/dory-subset.fa') shutil.copyfile(testdata, 'dory-subset.fa') testdata = relative_file('data/dory-subset.fq') shutil.copyfile(testdata, 'dory-subset.fq')
def copy_dory_head(): testdata = relative_file('data/dory-head.fa') shutil.copyfile(testdata, 'dory-head.fa')
def copy_dory_sig(): testdata = relative_file('data/dory-subset.fq.sig') shutil.copyfile(testdata, 'dory-subset.fq.sig')