def main(args): #read in and rename arguments title1 = os.path.basename(args.input_dir_1) title2 = os.path.basename(args.input_dir_2) d1, n1 = scorefileparse.read_dec_nat(args.input_dir_1, [], args.scoretype1, True) d2, n2 = scorefileparse.read_dec_nat(args.input_dir_2, [], args.scoretype2, True) dec1 = scorefileparse.filter_pdbs_by_rmsd(d1, args.rmsd_cutoff) nat1 = scorefileparse.filter_pdbs_by_rmsd(n1, args.rmsd_cutoff) dec2 = scorefileparse.filter_pdbs_by_rmsd(d2, args.rmsd_cutoff) nat2 = scorefileparse.filter_pdbs_by_rmsd(n2, args.rmsd_cutoff) dec_norm1 = scorefileparse.norm_pdbs(dec1) nat_norm1 = scorefileparse.norm_pdbs(nat1,dec1) dec_norm2 = scorefileparse.norm_pdbs(dec2) nat_norm2 = scorefileparse.norm_pdbs(nat2,dec2) [dec_inter1, nat_inter1, dec_inter2, nat_inter2] = scorefileparse.pdbs_intersect([dec_norm1, nat_norm1, dec_norm2, nat_norm2]) [dec_inter1, dec_inter2] = scorefileparse.pdbs_scores_intersect([dec_inter1, dec_inter2]) [nat_inter1, nat_inter2] = scorefileparse.pdbs_scores_intersect([nat_inter1, nat_inter2]) dec_filt1 = scorefileparse.filter_norm_pdbs(dec_norm1) nat_filt1 = scorefileparse.filter_norm_pdbs(nat_norm1) dec_filt2 = scorefileparse.filter_norm_pdbs(dec_norm2) nat_filt2 = scorefileparse.filter_norm_pdbs(nat_norm2) [dec_finter1, dec_finter2] = scorefileparse.pdbs_scores_intersect([dec_filt1, dec_filt2]) [nat_finter1, nat_finter2] = scorefileparse.pdbs_scores_intersect([nat_filt1, nat_filt2]) fig, axarr = conv.create_ax(2, len(dec_inter1)) for x_ind,pdb in enumerate(sorted(dec_inter1.keys())): ax = axarr[x_ind, 0] plot(dec_inter1, dec_inter2, nat_inter1, nat_inter2, ax, pdb, title1, title2) ax = axarr[x_ind, 1] plot(dec_finter1, dec_finter2, nat_finter1, nat_finter2, ax, pdb, title1, title2)
def main(input_dir_1, scoretype1, input_dir_2, scoretype2, rmsd_cutoff, output_pre ): #read in and rename arguments title1 = os.path.basename(input_dir_1) title2 = os.path.basename(input_dir_2) d1, n1 = scorefileparse.read_dec_nat(input_dir_1, scoretype1, repl_orig=False) d2, n2 = scorefileparse.read_dec_nat(input_dir_2, scoretype2, repl_orig=False) dec1 = scorefileparse.filter_pdbs_by_rmsd(d1, rmsd_cutoff) nat1 = scorefileparse.filter_pdbs_by_rmsd(n1, rmsd_cutoff) dec2 = scorefileparse.filter_pdbs_by_rmsd(d2, rmsd_cutoff) nat2 = scorefileparse.filter_pdbs_by_rmsd(n2, rmsd_cutoff) dec_norm1 = scorefileparse.norm_pdbs(dec1) nat_norm1 = scorefileparse.norm_pdbs(nat1,dec1) dec_norm2 = scorefileparse.norm_pdbs(dec2) nat_norm2 = scorefileparse.norm_pdbs(nat2,dec2) [dec_inter1, nat_inter1, dec_inter2, nat_inter2] = scorefileparse.pdbs_intersect([dec_norm1, nat_norm1, dec_norm2, nat_norm2]) [dec_inter1, dec_inter2] = scorefileparse.pdbs_scores_intersect([dec_inter1, dec_inter2]) [nat_inter1, nat_inter2] = scorefileparse.pdbs_scores_intersect([nat_inter1, nat_inter2]) dec_filt1 = scorefileparse.filter_norm_pdbs(dec_norm1) nat_filt1 = scorefileparse.filter_norm_pdbs(nat_norm1) dec_filt2 = scorefileparse.filter_norm_pdbs(dec_norm2) nat_filt2 = scorefileparse.filter_norm_pdbs(nat_norm2) [dec_finter1, dec_finter2] = scorefileparse.pdbs_scores_intersect([dec_filt1, dec_filt2]) [nat_finter1, nat_finter2] = scorefileparse.pdbs_scores_intersect([nat_filt1, nat_filt2]) fig, axarr = conv.create_ax(2, len(dec_inter1)) line_plot_data = {} min_naive_by_pdb = {} for x_ind,pdb in enumerate(sorted(dec_inter1.keys())): ax = axarr[x_ind, 0] plot_r_v_r(dec_inter1, dec_inter2, nat_inter1, nat_inter2, ax, pdb, title1, title2) ax = axarr[x_ind, 1] min_naive = plot_pareto(dec_inter1, dec_inter2, nat_inter1, nat_inter2, ax, pdb, title1, title2) keys_to_include = ["Amber", "Rosetta","All","Pareto10"] for key, (rank1, rank2, rmsd) in min_naive.items(): #if key not in keys_to_include: # continue if line_plot_data.get(key) is None: line_plot_data[key] = ([],[]) line_plot_data[key][0].append(pdb) line_plot_data[key][1].append(rmsd) if min_naive_by_pdb.get(pdb) is None: min_naive_by_pdb[pdb] = {} min_naive_by_pdb[pdb][key] = rmsd #organize data indices = list(range(len(line_plot_data["All"][1]))) indices.sort(key=lambda x: line_plot_data["All"][1][x]) ranked_pdbs_by_rmsd_all = {} for i, x in enumerate(indices): ranked_pdbs_by_rmsd_all[line_plot_data["All"][0][x]] = i for label, (pdbs, rmsds) in line_plot_data.items(): line_plot_data[label] = tuple(zip(*sorted(zip(pdbs,rmsds), key=lambda x: ranked_pdbs_by_rmsd_all[x[0]] ))) filename = output_pre + "/" + title1 + "_" + title2 + ".txt" #suffix="rmsd_v_rmsd_{0}".format(rmsd_cutoff) #conv.save_fig(fig, filename, suffix, 7, len(dec_inter1)*3) #plot line plot all_pareto_labels = [] for initial in ["R","A"]: ordered_labels = ["All", "Amber", "Rosetta"] for i in range(1,11): ordered_labels.append("Pareto{0}{1}".format(initial,i)) all_pareto_labels.append("Pareto{0}{1}".format(initial,i)) lines = [ (line_plot_data[label][0], line_plot_data[label][1], label) for label in ordered_labels ] fig2, axarr2 = conv.create_ax(1, len(ordered_labels), shx=True, shy=True) for i, label in enumerate(ordered_labels): line.plot_series(axarr2[i,0], lines[0:i+1], "RMSD vs. pdb", "PDB", "RMSD", linestyle='') conv.add_legend(axarr2[i,0]) conv.save_fig(fig2, filename, "_line_{0}".format(initial), 10, len(ordered_labels)*5) #plot histogram plot hist_comp = [ ("Amber","All"), ("Rosetta", "All"), ("ParetoR10", "All"), ("ParetoA10", "All")] hist_comp.extend([ ("ParetoR{0}".format(ind),"Rosetta") for ind in range(1,11) ]) hist_comp.extend([ ("ParetoR{0}".format(ind),"Amber") for ind in range(1,11) ]) hist_comp.extend([ ("ParetoA{0}".format(ind),"Rosetta") for ind in range(1,11) ]) hist_comp.extend([ ("ParetoA{0}".format(ind), "Amber") for ind in range(1,11) ]) fig3, axarr3 = conv.create_ax(2, len(hist_comp), shx=False, shy=False) for ind, (top, bottom) in enumerate(hist_comp): gen_dist_plot(axarr3[ind,0], axarr3[ind,1], top, bottom, min_naive_by_pdb) conv.save_fig(fig3, filename, "_distdeltas", 7, len(hist_comp)*5, tight=False) #plot scatterplot fig4, axarr4 = conv.create_ax(10, 2) for i in range(1,11): gen_scatterplot(axarr4[0,i-1], "ParetoR{0}".format(i), "Rosetta", "Amber", min_naive_by_pdb) gen_scatterplot(axarr4[1,i-1], "ParetoA{0}".format(i), "Rosetta", "Amber", min_naive_by_pdb) conv.save_fig(fig4, filename, "_scattdeltas", 30, 6)