def main(args): #read in and rename arguments title1 = os.path.basename(args.input_dir_1) title2 = os.path.basename(args.input_dir_2) d1, n1 = scorefileparse.read_dec_nat(args.input_dir_1, [], args.scoretype1, True) d2, n2 = scorefileparse.read_dec_nat(args.input_dir_2, [], args.scoretype2, True) dec1 = scorefileparse.filter_pdbs_by_rmsd(d1, args.rmsd_cutoff) nat1 = scorefileparse.filter_pdbs_by_rmsd(n1, args.rmsd_cutoff) dec2 = scorefileparse.filter_pdbs_by_rmsd(d2, args.rmsd_cutoff) nat2 = scorefileparse.filter_pdbs_by_rmsd(n2, args.rmsd_cutoff) dec_norm1 = scorefileparse.norm_pdbs(dec1) nat_norm1 = scorefileparse.norm_pdbs(nat1,dec1) dec_norm2 = scorefileparse.norm_pdbs(dec2) nat_norm2 = scorefileparse.norm_pdbs(nat2,dec2) [dec_inter1, nat_inter1, dec_inter2, nat_inter2] = scorefileparse.pdbs_intersect([dec_norm1, nat_norm1, dec_norm2, nat_norm2]) [dec_inter1, dec_inter2] = scorefileparse.pdbs_scores_intersect([dec_inter1, dec_inter2]) [nat_inter1, nat_inter2] = scorefileparse.pdbs_scores_intersect([nat_inter1, nat_inter2]) dec_filt1 = scorefileparse.filter_norm_pdbs(dec_norm1) nat_filt1 = scorefileparse.filter_norm_pdbs(nat_norm1) dec_filt2 = scorefileparse.filter_norm_pdbs(dec_norm2) nat_filt2 = scorefileparse.filter_norm_pdbs(nat_norm2) [dec_finter1, dec_finter2] = scorefileparse.pdbs_scores_intersect([dec_filt1, dec_filt2]) [nat_finter1, nat_finter2] = scorefileparse.pdbs_scores_intersect([nat_filt1, nat_filt2]) fig, axarr = conv.create_ax(2, len(dec_inter1)) for x_ind,pdb in enumerate(sorted(dec_inter1.keys())): ax = axarr[x_ind, 0] plot(dec_inter1, dec_inter2, nat_inter1, nat_inter2, ax, pdb, title1, title2) ax = axarr[x_ind, 1] plot(dec_finter1, dec_finter2, nat_finter1, nat_finter2, ax, pdb, title1, title2)
def main(input_dir, output_pre, repl_orig): #read in and rename arguments title1 = os.path.basename(input_dir[0][0]) title2 = os.path.basename(input_dir[1][0]) d1 = scorefileparse.read_dir(input_dir[0][0], input_dir[0][1], repl_orig) d2 = scorefileparse.read_dir(input_dir[1][0], input_dir[1][1], repl_orig) [dec_inter1, dec_inter2] = scorefileparse.pdbs_scores_intersect([d1, d2]) disc_metrics_1 = discparse.pdbs_dict_to_metrics(dec_inter1,input_dir[0][1]) disc_metrics_2 = discparse.pdbs_dict_to_metrics(dec_inter2,input_dir[1][1]) print len(dec_inter1) print len(dec_inter2) print disc_metrics_2 plot(disc_metrics_1, disc_metrics_2, title1, title2, output_pre)
def main(input_dir_rosetta_sf, input_dir_amber_sf, input_dir_rosetta_pdb, input_dir_amber_pdb, output_dir, n_results): d1 = scorefileparse.read_dir(input_dir_rosetta_sf, 'rosetta', repl_orig=True) d2 = scorefileparse.read_dir(input_dir_amber_sf, 'amber', repl_orig=True) dec_norm1 = scorefileparse.norm_pdbs(d1) dec_norm2 = scorefileparse.norm_pdbs(d2) [dec_inter1, dec_inter2] = scorefileparse.pdbs_scores_intersect([dec_norm1, dec_norm2]) dec1_cp = copy.deepcopy(dec_inter1) dec2_cp = copy.deepcopy(dec_inter2) min_naive_by_pdb = {} for pdb in sorted(dec_inter1.keys()): for i in range(1,n_results+1): rosetta_lowest_energy = find_lowest_energy( dec_inter1[pdb] ) amber_lowest_energy = find_lowest_energy( dec_inter2[pdb] ) pareto_lowest_energy = find_pareto(dec1_cp, dec2_cp, pdb) #copy rosetta file src = os.join(input_dir_rosetta_pdb, pdb, rosetta_lowest_energy + "_0001.pdb") dst = os.join(output_dir,"rosetta","{0}_{1}.pdb".format(pdb,i)) copyfile(src, dst) #copy amber file src = os.join(input_dir_rosetta_pdb, pdb, "min_NoH_" + amber_lowest_energy + ".pdb") dst = os.join(output_dir,"amber","{0}_{1}.pdb".format(pdb,i)) copyfile(src, dst) #copy pareto file src = os.join(input_dir_rosetta_pdb, pdb, pareto_lowest_energy + "_0001.pdb") dst = os.join(output_dir,"combined","{0}_{1}.pdb".format(pdb,i)) copyfile(src, dst) #delete from original scoredict so that next round will get the next-lowest dec_inter1[pdb].pop(rosetta_lowest_energy) dec_inter2[pdb].pop(amber_lowest_energy) dec1_cp[pdb].pop(pareto_lowest_energy) dec2_cp[pdb].pop(pareto_lowest_energy)
def main(input_dir_rosetta_sf, input_dir_amber_sf, input_dir_rosetta_pdb, input_dir_amber_pdb, output_dir, n_results, alternate_rmsd, start_ind_desc_amber, start_ind_desc_rosetta, end_ind_desc_amber, end_ind_desc_rosetta, add_pdb_to_path): if alternate_rmsd is True: d1 = scorefileparse.read_dir(input_dir_rosetta_sf, 'rosetta', repl_orig=False, rmsd='total_score', start_ind_desc=start_ind_desc_rosetta, end_ind_desc=end_ind_desc_rosetta) d2 = scorefileparse.read_dir(input_dir_amber_sf, 'amber', repl_orig=False, rmsd='tot', start_ind_desc=start_ind_desc_amber, end_ind_desc=end_ind_desc_amber) else: d1 = scorefileparse.read_dir(input_dir_rosetta_sf, 'rosetta', repl_orig=False, start_ind_desc=start_ind_desc_rosetta, end_ind_desc=end_ind_desc_rosetta) d2 = scorefileparse.read_dir(input_dir_amber_sf, 'amber', repl_orig=False, start_ind_desc=start_ind_desc_amber, end_ind_desc=end_ind_desc_amber) dec_norm1 = scorefileparse.norm_pdbs(d1) dec_norm2 = scorefileparse.norm_pdbs(d2) [dec_inter1, dec_inter2] = scorefileparse.pdbs_scores_intersect([dec_norm1, dec_norm2]) dec1_cp = copy.deepcopy(dec_inter1) dec2_cp = copy.deepcopy(dec_inter2) try: os.mkdir( os.path.join(output_dir,"rosetta") ) except OSError: pass try: os.mkdir( os.path.join(output_dir,"amber") ) except OSError: pass try: os.mkdir( os.path.join(output_dir,"combined") ) except OSError: pass for pdb in sorted(dec_inter1.keys()): for i in range(1,n_results+1): rosetta_lowest_energy = find_lowest_energy( dec_inter1[pdb] ) amber_lowest_energy = find_lowest_energy( dec_inter2[pdb] ) pareto_lowest_energy = find_pareto(dec1_cp, dec2_cp, pdb) if add_pdb_to_path == "True": path_p = pdb else: path_p = "" if end_ind_desc_rosetta == 1000: r_suffix = ".pdb" else: r_suffix = "_0001.pdb" if start_ind_desc_amber is None: a_prefix = "min_NoH_" else: a_prefix = "min_" if end_ind_desc_amber is None: a_suffix = ".pdb" else: a_suffix = ".pdb.pdb" #copy rosetta file src = os.path.join(input_dir_rosetta_pdb, path_p, rosetta_lowest_energy + r_suffix) dst = os.path.join(output_dir,"rosetta","{0}_{1}.pdb".format(pdb,i)) copyfile(src, dst) #copy amber file src = os.path.join(input_dir_amber_pdb, path_p, a_prefix + amber_lowest_energy + a_suffix) dst = os.path.join(output_dir,"amber","{0}_{1}.pdb".format(pdb,i)) copyfile(src, dst) #copy pareto file src = os.path.join(input_dir_rosetta_pdb, path_p, pareto_lowest_energy + r_suffix) dst = os.path.join(output_dir,"combined","{0}_{1}.pdb".format(pdb,i)) copyfile(src, dst) #delete from original scoredict so that next round will get the next-lowest dec_inter1[pdb].pop(rosetta_lowest_energy) dec_inter2[pdb].pop(amber_lowest_energy) dec1_cp[pdb].pop(pareto_lowest_energy) dec2_cp[pdb].pop(pareto_lowest_energy)
def main(input_dir_rosetta_sf, input_dir_amber_sf, input_dir_rosetta_pdb, input_dir_amber_pdb, output_dir, n_results, alternate_rmsd, start_ind_desc_amber, start_ind_desc_rosetta, end_ind_desc_amber, end_ind_desc_rosetta, add_pdb_to_path): if alternate_rmsd is True: d1 = scorefileparse.read_dir(input_dir_rosetta_sf, 'rosetta', repl_orig=False, rmsd='total_score', start_ind_desc=start_ind_desc_rosetta, end_ind_desc=end_ind_desc_rosetta) d2 = scorefileparse.read_dir(input_dir_amber_sf, 'amber', repl_orig=False, rmsd='tot', start_ind_desc=start_ind_desc_amber, end_ind_desc=end_ind_desc_amber) else: d1 = scorefileparse.read_dir(input_dir_rosetta_sf, 'rosetta', repl_orig=False, start_ind_desc=start_ind_desc_rosetta, end_ind_desc=end_ind_desc_rosetta) d2 = scorefileparse.read_dir(input_dir_amber_sf, 'amber', repl_orig=False, start_ind_desc=start_ind_desc_amber, end_ind_desc=end_ind_desc_amber) dec_norm1 = scorefileparse.norm_pdbs(d1) dec_norm2 = scorefileparse.norm_pdbs(d2) [dec_inter1, dec_inter2] = scorefileparse.pdbs_scores_intersect([dec_norm1, dec_norm2]) dec1_cp = copy.deepcopy(dec_inter1) dec2_cp = copy.deepcopy(dec_inter2) try: os.mkdir(os.path.join(output_dir, "rosetta")) except OSError: pass try: os.mkdir(os.path.join(output_dir, "amber")) except OSError: pass try: os.mkdir(os.path.join(output_dir, "combined")) except OSError: pass for pdb in sorted(dec_inter1.keys()): for i in range(1, n_results + 1): rosetta_lowest_energy = find_lowest_energy(dec_inter1[pdb]) amber_lowest_energy = find_lowest_energy(dec_inter2[pdb]) pareto_lowest_energy = find_pareto(dec1_cp, dec2_cp, pdb) if add_pdb_to_path == "True": path_p = pdb else: path_p = "" if end_ind_desc_rosetta == 1000: r_suffix = ".pdb" else: r_suffix = "_0001.pdb" if start_ind_desc_amber is None: a_prefix = "min_NoH_" else: a_prefix = "min_" if end_ind_desc_amber is None: a_suffix = ".pdb" else: a_suffix = ".pdb.pdb" #copy rosetta file src = os.path.join(input_dir_rosetta_pdb, path_p, rosetta_lowest_energy + r_suffix) dst = os.path.join(output_dir, "rosetta", "{0}_{1}.pdb".format(pdb, i)) copyfile(src, dst) #copy amber file src = os.path.join(input_dir_amber_pdb, path_p, a_prefix + amber_lowest_energy + a_suffix) dst = os.path.join(output_dir, "amber", "{0}_{1}.pdb".format(pdb, i)) copyfile(src, dst) #copy pareto file src = os.path.join(input_dir_rosetta_pdb, path_p, pareto_lowest_energy + r_suffix) dst = os.path.join(output_dir, "combined", "{0}_{1}.pdb".format(pdb, i)) copyfile(src, dst) #delete from original scoredict so that next round will get the next-lowest dec_inter1[pdb].pop(rosetta_lowest_energy) dec_inter2[pdb].pop(amber_lowest_energy) dec1_cp[pdb].pop(pareto_lowest_energy) dec2_cp[pdb].pop(pareto_lowest_energy)
def main(input_dir_1, scoretype1, input_dir_2, scoretype2, rmsd_cutoff, output_pre ): #read in and rename arguments title1 = os.path.basename(input_dir_1) title2 = os.path.basename(input_dir_2) d1, n1 = scorefileparse.read_dec_nat(input_dir_1, scoretype1, repl_orig=False) d2, n2 = scorefileparse.read_dec_nat(input_dir_2, scoretype2, repl_orig=False) dec1 = scorefileparse.filter_pdbs_by_rmsd(d1, rmsd_cutoff) nat1 = scorefileparse.filter_pdbs_by_rmsd(n1, rmsd_cutoff) dec2 = scorefileparse.filter_pdbs_by_rmsd(d2, rmsd_cutoff) nat2 = scorefileparse.filter_pdbs_by_rmsd(n2, rmsd_cutoff) dec_norm1 = scorefileparse.norm_pdbs(dec1) nat_norm1 = scorefileparse.norm_pdbs(nat1,dec1) dec_norm2 = scorefileparse.norm_pdbs(dec2) nat_norm2 = scorefileparse.norm_pdbs(nat2,dec2) [dec_inter1, nat_inter1, dec_inter2, nat_inter2] = scorefileparse.pdbs_intersect([dec_norm1, nat_norm1, dec_norm2, nat_norm2]) [dec_inter1, dec_inter2] = scorefileparse.pdbs_scores_intersect([dec_inter1, dec_inter2]) [nat_inter1, nat_inter2] = scorefileparse.pdbs_scores_intersect([nat_inter1, nat_inter2]) dec_filt1 = scorefileparse.filter_norm_pdbs(dec_norm1) nat_filt1 = scorefileparse.filter_norm_pdbs(nat_norm1) dec_filt2 = scorefileparse.filter_norm_pdbs(dec_norm2) nat_filt2 = scorefileparse.filter_norm_pdbs(nat_norm2) [dec_finter1, dec_finter2] = scorefileparse.pdbs_scores_intersect([dec_filt1, dec_filt2]) [nat_finter1, nat_finter2] = scorefileparse.pdbs_scores_intersect([nat_filt1, nat_filt2]) fig, axarr = conv.create_ax(2, len(dec_inter1)) line_plot_data = {} min_naive_by_pdb = {} for x_ind,pdb in enumerate(sorted(dec_inter1.keys())): ax = axarr[x_ind, 0] plot_r_v_r(dec_inter1, dec_inter2, nat_inter1, nat_inter2, ax, pdb, title1, title2) ax = axarr[x_ind, 1] min_naive = plot_pareto(dec_inter1, dec_inter2, nat_inter1, nat_inter2, ax, pdb, title1, title2) keys_to_include = ["Amber", "Rosetta","All","Pareto10"] for key, (rank1, rank2, rmsd) in min_naive.items(): #if key not in keys_to_include: # continue if line_plot_data.get(key) is None: line_plot_data[key] = ([],[]) line_plot_data[key][0].append(pdb) line_plot_data[key][1].append(rmsd) if min_naive_by_pdb.get(pdb) is None: min_naive_by_pdb[pdb] = {} min_naive_by_pdb[pdb][key] = rmsd #organize data indices = list(range(len(line_plot_data["All"][1]))) indices.sort(key=lambda x: line_plot_data["All"][1][x]) ranked_pdbs_by_rmsd_all = {} for i, x in enumerate(indices): ranked_pdbs_by_rmsd_all[line_plot_data["All"][0][x]] = i for label, (pdbs, rmsds) in line_plot_data.items(): line_plot_data[label] = tuple(zip(*sorted(zip(pdbs,rmsds), key=lambda x: ranked_pdbs_by_rmsd_all[x[0]] ))) filename = output_pre + "/" + title1 + "_" + title2 + ".txt" #suffix="rmsd_v_rmsd_{0}".format(rmsd_cutoff) #conv.save_fig(fig, filename, suffix, 7, len(dec_inter1)*3) #plot line plot all_pareto_labels = [] for initial in ["R","A"]: ordered_labels = ["All", "Amber", "Rosetta"] for i in range(1,11): ordered_labels.append("Pareto{0}{1}".format(initial,i)) all_pareto_labels.append("Pareto{0}{1}".format(initial,i)) lines = [ (line_plot_data[label][0], line_plot_data[label][1], label) for label in ordered_labels ] fig2, axarr2 = conv.create_ax(1, len(ordered_labels), shx=True, shy=True) for i, label in enumerate(ordered_labels): line.plot_series(axarr2[i,0], lines[0:i+1], "RMSD vs. pdb", "PDB", "RMSD", linestyle='') conv.add_legend(axarr2[i,0]) conv.save_fig(fig2, filename, "_line_{0}".format(initial), 10, len(ordered_labels)*5) #plot histogram plot hist_comp = [ ("Amber","All"), ("Rosetta", "All"), ("ParetoR10", "All"), ("ParetoA10", "All")] hist_comp.extend([ ("ParetoR{0}".format(ind),"Rosetta") for ind in range(1,11) ]) hist_comp.extend([ ("ParetoR{0}".format(ind),"Amber") for ind in range(1,11) ]) hist_comp.extend([ ("ParetoA{0}".format(ind),"Rosetta") for ind in range(1,11) ]) hist_comp.extend([ ("ParetoA{0}".format(ind), "Amber") for ind in range(1,11) ]) fig3, axarr3 = conv.create_ax(2, len(hist_comp), shx=False, shy=False) for ind, (top, bottom) in enumerate(hist_comp): gen_dist_plot(axarr3[ind,0], axarr3[ind,1], top, bottom, min_naive_by_pdb) conv.save_fig(fig3, filename, "_distdeltas", 7, len(hist_comp)*5, tight=False) #plot scatterplot fig4, axarr4 = conv.create_ax(10, 2) for i in range(1,11): gen_scatterplot(axarr4[0,i-1], "ParetoR{0}".format(i), "Rosetta", "Amber", min_naive_by_pdb) gen_scatterplot(axarr4[1,i-1], "ParetoA{0}".format(i), "Rosetta", "Amber", min_naive_by_pdb) conv.save_fig(fig4, filename, "_scattdeltas", 30, 6)