def run_dnadiff_pairwise(fasta_files, fasta_names, output_folder): """Runs MUMmer's dnadiff pairwise for given fasta_files. Uses fasta_names to organize output folders for dnadiff as fastaname1_vs_fastaname2.""" assert len(fasta_files) == len(fasta_names) for i in range(len(fasta_files)): for j in range(i + 1, len(fasta_files)): out_dir = ospj(output_folder, "{fn1}_vs_{fn2}".format( fn1=fasta_names[i], fn2=fasta_names[j])) dir_utils.mkdir_p(out_dir) run_dnadiff(fasta_files[i], fasta_files[j], ospj(out_dir, "out"))
def run_dnadiff_pairwise(fasta_files, fasta_names, output_folder): """Runs MUMmer's dnadiff pairwise for given fasta_files. Uses fasta_names to organize output folders for dnadiff as fastaname1_vs_fastaname2.""" assert len(fasta_files) == len(fasta_names) for i in range(len(fasta_files)): for j in range(i + 1, len(fasta_files)): out_dir = ospj( output_folder, "{fn1}_vs_{fn2}".format(fn1=fasta_names[i], fn2=fasta_names[j])) dir_utils.mkdir_p(out_dir) run_dnadiff(fasta_files[i], fasta_files[j], ospj(out_dir, "out"))
def parallel_run_dnadiff_pairwise(fasta_files, fasta_names, output_folder): """Runs MUMmer's dnadiff pairwise for given fasta_files using multiprocessing. Uses fasta_names to organize output folders for dnadiff as fastaname1_vs_fastaname2.""" assert len(fasta_files) == len(fasta_names) pool = Pool() args = [] for i in range(len(fasta_files)): for j in range(i + 1, len(fasta_files)): out_dir = ospj(output_folder, "{fn1}_vs_{fn2}".format( fn1=fasta_names[i], fn2=fasta_names[j])) dir_utils.mkdir_p(out_dir) args.append((fasta_files[i], fasta_files[j], ospj(out_dir, "out"))) pool.map(run_dnadiff_star, args) pool.close() pool.join()
def write_approved_bins(app_binsdf, fasta_file, output_folder, prefix): """Write given approved bins from scg_tsv to given output_folder/prefix_binid.fa""" # get fasta dict with open(fasta_file) as handle: rdict = SeqIO.to_dict(SeqIO.parse(handle, "fasta")) # print approved bins dir_utils.mkdir_p(output_folder) for bin_id, contigs in app_binsdf.loc[:, ["Cluster", "Contigs"]].as_matrix(): contig_ids = contigs.split("|") out_file = ospj(output_folder, "{prefix}_bin{bin_id}.fa".format(bin_id=bin_id, prefix=prefix)) with open(out_file, "w") as handle: for cid in contig_ids: handle.write(">{}\n{}\n".format(cid, rdict[cid].seq))
def parallel_run_dnadiff_pairwise(fasta_files, fasta_names, output_folder): """Runs MUMmer's dnadiff pairwise for given fasta_files using multiprocessing. Uses fasta_names to organize output folders for dnadiff as fastaname1_vs_fastaname2.""" assert len(fasta_files) == len(fasta_names) pool = Pool() args = [] for i in range(len(fasta_files)): for j in range(i + 1, len(fasta_files)): out_dir = ospj( output_folder, "{fn1}_vs_{fn2}".format(fn1=fasta_names[i], fn2=fasta_names[j])) dir_utils.mkdir_p(out_dir) args.append((fasta_files[i], fasta_files[j], ospj(out_dir, "out"))) pool.map(run_dnadiff_star, args) pool.close() pool.join()
def setUp(self): """Delete temporary dir if it exists then create it""" self.tearDown() dir_utils.mkdir_p(TMP_BASENAME_DIR)