def evaluate(tree_file, ref_msa, out_dir): util.expect_file_exists(tree_file) util.expect_file_exists(ref_msa) util.make_path(out_dir) prefix = "eval" cmd = [] cmd.append(common.raxml) cmd.append('--evaluate') cmd.append('--msa') cmd.append(ref_msa) cmd.append('--model') cmd.append(common.subst_model) cmd.append('--tree') cmd.append(tree_file) cmd.append('--prefix') cmd.append(prefix) cmd.append('--threads') cmd.append(str(common.iqtree_threads)) cmd.append('--blopt') cmd.append('nr_safe') cmd.append('--redo') cmd.append('--force') cmd.append('perf_threads') cmd.append("--blmin") cmd.append(common.raxml_min_bl) sub.check_call(cmd, cwd=out_dir, stdout=sub.DEVNULL) modelfile = os.path.join(out_dir, prefix + ".raxml.bestModel") return modelfile
def launch_epa(tree, modelfile, ref_msa, query_msa, out_dir, thorough=True): util.make_path(out_dir) cmd = [] cmd.append(common.epa) cmd.append("--tree") cmd.append(tree) cmd.append("--model") cmd.append(modelfile) cmd.append("--msa") cmd.append(ref_msa) cmd.append("--query") cmd.append(query_msa) cmd.append("--threads") if not util.is_slurm(): cmd.append(str(common.available_cores)) else: cmd.append("4") if thorough: cmd.append("--no-heur") cmd.append("--filter-max") cmd.append("50") cmd.append("--filter-acc-lwr") cmd.append("1.0") cmd.append("--out-dir") cmd.append(out_dir) cmd.append("--redo") cmd.append("--verbose") sub.check_call(cmd, stdout=sub.DEVNULL)
def ggplot_lwr_histogram(hist_csv_file, out_dir): util.make_path( out_dir ) # gappa examine lwr --jplace-path ./*/*.jplace --no-list-file --out-dir ../../results/epa_rooting/ cmd = [] cmd.append( os.path.join(common.scripts_dir, "lwr_hist.r") ) cmd.append(hist_csv_file) # get the input file name without ending f_name=os.path.splitext( os.path.basename(hist_csv_file) )[0] out_file=os.path.join(out_dir, f_name+".pdf") cmd.append(out_file) sub.check_call(cmd) return out_file
def launch_split4epa(ref_phylip, aln_result, out_dir): util.make_path(out_dir) cmd = [] cmd.append(common.epa) cmd.append("--split") cmd.append(ref_phylip) cmd.append(aln_result) cmd.append("--out-dir") cmd.append(out_dir) cmd.append("--redo") sub.check_call(cmd) return os.path.join(out_dir, "reference.fasta"), os.path.join(out_dir, "query.fasta")
def launch_raxml(alignment, model, output_dir, seed, starting_trees=1, parsimony=False, bs_trees=0, cores=16, debug=False, parse=False): util.expect_file_exists(alignment) util.make_path(output_dir) prefix = os.path.join(output_dir, "raxml") prefix += "_" + model.replace("+", "") cmd = [] cmd.append("mpiexec") cmd.append("-np") cmd.append(str(cores)) cmd.append(common.raxml) cmd.append("--msa") cmd.append(alignment) cmd.append("--model") cmd.append(model) cmd.append("--seed") cmd.append(str(seed)) cmd.append("--blmin") cmd.append(common.raxml_min_bl) if (parse): cmd.append("--parse") cmd.append("--tree") if (starting_trees > 0): if (not parsimony): cmd.append("rand{" + str(starting_trees) + "}") prefix += "_rand" + str(starting_trees) else: cmd.append("pars{" + str(starting_trees) + "}") prefix += "_pars" + str(starting_trees) if (bs_trees > 0): cmd.append("--boostrap") cmd.append("--bs-trees") cmd.append(str(bs_trees)) prefix += "_bs" + str(bs_trees) prefix += "_seed" + str(seed) cmd.append("--prefix") cmd.append(prefix) cmd.append('--force') cmd.append('perf_threads') launcher.submit(prefix, cmd, cores, debug)
def outgroup_check(jplace_files, out_dir): for f in jplace_files: util.expect_file_exists( f ) util.make_path( out_dir ) cmd = [] cmd.append(common.genesis_outgroup_check) for f in jplace_files: cmd.append(f) outfile = os.path.join( out_dir, "outgroup_check.txt" ) with open( outfile, "w+" ) as logfile: sub.check_call(cmd, stdout=logfile) return outfile
def launch_hmmbuild(ref_msa, out_dir): util.make_path(out_dir) ref_hmm="reference.hmm" cmd = [] cmd.append( os.path.join(common.hmmer_dir, "hmmbuild") ) if not util.is_slurm(): cmd.append("--cpu") cmd.append(str(common.available_cores)) cmd.append(ref_hmm) cmd.append(ref_msa) sub.check_call(cmd, cwd=out_dir) return os.path.join(out_dir, ref_hmm)
def split_alignment_outgroups(input_msa, outgroup_spec, out_dir): util.expect_file_exists( input_msa ) util.expect_file_exists( outgroup_spec ) util.make_path( out_dir ) new_ref_msa="reference.fasta" query_msa="query.fasta" cmd = [] cmd.append(common.genesis_remove_sequences) cmd.append(input_msa) cmd.append(outgroup_spec) cmd.append(new_ref_msa) cmd.append(query_msa) sub.check_call(cmd, cwd=out_dir) return os.path.join(out_dir, new_ref_msa), os.path.join(out_dir, query_msa)
def gappa_examine_lwr(jplace_path, out_dir): util.make_path( out_dir ) # gappa examine lwr --jplace-path ./*/*.jplace --no-list-file --out-dir ../../results/epa_rooting/ cmd = [] cmd.append(common.gappa) cmd.append("examine") cmd.append("lwr") cmd.append("--jplace-path") cmd += glob(jplace_path) cmd.append("--no-list-file") cmd.append("--no-compat-check") cmd.append("--allow-file-overwriting") cmd.append("--histogram-bins") cmd.append("20") cmd.append("--out-dir") cmd.append(out_dir) sub.check_call(cmd) return os.path.join( out_dir, "lwr_histogram.csv" )
def setup_new_version( date=datetime.datetime.now().strftime("%Y-%m-%d"), datasets = ["fmsao", "fmsan", "smsao", "smsan"]): version_id = get_current_version_id( date ) version = "{}_{}".format( date, version_id ) # make the base path for the version util.make_path( util.versioned_path(version, "") ) # generate the appropriate paths paths = [] for ds in datasets: p = common.Paths([version, ds], 0) setup_new_dataset(p) paths.append(p) print(version) return paths
def launch_hmmalign(ref_hmm, ref_msa, query_fasta, out_dir): util.make_path(out_dir) out_file = "both.afa" cmd = [] cmd.append( os.path.join(common.hmmer_dir, "hmmalign") ) # if not util.is_slurm(): # cmd.append("--cpu") # cmd.append(str(common.available_cores)) cmd.append("-o") cmd.append(out_file) cmd.append("--outformat") cmd.append("afa") cmd.append("--mapali") cmd.append(ref_msa) cmd.append(ref_hmm) cmd.append(query_fasta) sub.check_call(cmd, cwd=out_dir) return os.path.join(out_dir, out_file)
def launch_papara(tree, ref_phylip, query_fasta, out_dir): util.make_path(out_dir) name = "aln" cmd = [] cmd.append(common.papara) cmd.append("-t") cmd.append(tree) cmd.append("-s") cmd.append(ref_phylip) cmd.append("-q") cmd.append(query_fasta) if not util.is_slurm(): cmd.append("-j") cmd.append(str(common.available_cores)) cmd.append("-r") cmd.append("-n") cmd.append(name) sub.check_call(cmd, cwd=out_dir) return os.path.join( out_dir, "papara_alignment." + name )
args = parser.parse_args( '--name test --ep best --whichset train_val --batch_id 0 --shuffle'.split( )) # args = parser.parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_ids device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') dataset = util.DataSet('./dataset/' + args.whichset, args.whichset) dataloader = DataLoader(dataset, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=args.shuffle) exp_path, train_path, val_path, infer_path, ckpt_path = util.make_path(args) model = torch.load(ckpt_path + '/' + 'model.archi').to(device) # model = nn.DataParallel(model).to(device) model.load_state_dict( torch.load(ckpt_path + '/' + 'weight_' + args.ep + '.pth')['state_dict']) dataiter = iter(dataloader) for _ in range(args.batch_id + 1): data2show = next(dataiter) img, state, gt = data2show # img, state, gt = util.NaN2Zero(img), util.NaN2Zero(state), util.NaN2Zero(gt) img, state, gt = img.to(device), state.to(device), gt.to(device) prediction = model(img, state)
def setup_new_dataset(path): util.make_path(path.data_dir) util.make_path(path.runs_dir) util.make_path(path.results_dir)
def setup_directory(directory, subdirectory, version): real_current = os.path.join(directory, version, subdirectory) util.make_path(real_current)