def shogun_bt2_db(input, output, annotater, extract_id, prefixes, depth, depth_force): verify_make_dir(output) # Verify the FASTA is annotated if input == '-': output_fn = 'stdin' else: output_fn = '.'.join(str(os.path.basename(input)).split('.')[:-1]) outf_fasta = os.path.join(output, output_fn + '.annotated.fna') outf_map = os.path.join(output, output_fn + '.annotated.map') if not os.path.isfile(outf_fasta) or not os.path.isfile(outf_map): tree = NCBITree() db = RefSeqDatabase() if annotater == 'refseq': annotater_class = RefSeqAnnotater(extract_id, prefixes, db, tree, depth=depth, depth_force=depth_force) elif annotater == 'nt': annotater_class = NTAnnotater(extract_id, prefixes, db, tree, depth=depth, depth_force=depth_force) else: annotater_class = GIAnnotater(extract_id, db, tree, depth=depth, depth_force=depth_force) with open(outf_fasta, 'w') as output_fna: with open(outf_map, 'w') as output_map: with open(input) as inf: inf_fasta = FASTA(inf) for lines_fna, lines_map in annotater_class.annotate(inf_fasta.read()): output_fna.write(lines_fna) output_map.write(lines_map) else: print("Found the output files \"%s\" and \"%s\". Skipping the annotation phase for this file." % ( outf_fasta, outf_map)) # Build the output BT2 database verify_make_dir(os.path.join(output, 'bt2')) print(bowtie2_build(outf_fasta, os.path.join(output, 'bt2', output_fn)))
def shogun_utree_db(input, output, annotater, extract_id, threads, prefixes, depth, depth_force): verify_make_dir(output) # Verify the FASTA is annotated if input == '-': output_fn = 'stdin' else: output_fn = '.'.join(str(os.path.basename(input)).split('.')[:-1]) outf_fasta = os.path.join(output, output_fn + '.annotated.fna') outf_map = os.path.join(output, output_fn + '.annotated.map') if not os.path.isfile(outf_fasta) or not os.path.isfile(outf_map): tree = NCBITree() db = RefSeqDatabase() if annotater == 'refseq': annotater_class = RefSeqAnnotater(extract_id, prefixes, db, tree, depth=depth, depth_force=depth_force) elif annotater == 'nt': annotater_class = NTAnnotater(extract_id, prefixes, db, tree, depth=depth, depth_force=depth_force) else: annotater_class = GIAnnotater(extract_id, db, tree, depth=depth, depth_force=depth_force) with open(outf_fasta, 'w') as output_fna: with open(outf_map, 'w') as output_map: with open(input) as inf: inf_fasta = FASTA(inf) for lines_fna, lines_map in annotater_class.annotate(inf_fasta.read()): output_fna.write(lines_fna) output_map.write(lines_map) else: print("Found the output files \"%s\" and \"%s\". Skipping the annotation phase for this file." % ( outf_fasta, outf_map)) # Build the output CTR verify_make_dir(os.path.join(output, 'utree')) path_uncompressed_tree = os.path.join(output, 'utree', output_fn + '.utr') path_compressed_tree = os.path.join(output, 'utree', output_fn + '.ctr') if os.path.exists(path_compressed_tree): print('Compressed tree database file %s exists, skipping this step.' % path_compressed_tree) else: if not os.path.exists(path_uncompressed_tree): print(utree_build(outf_fasta, outf_map, path_uncompressed_tree, threads=threads)) print(utree_compress(path_uncompressed_tree, path_compressed_tree)) os.remove(path_uncompressed_tree)