def plot_coeff_pval(ax, counters, coeffs, pvals, title=""): scatterplot.draw_actual_plot(ax, counters, coeffs, 'b', title, "Counts", "PCC") blank, sec_ax = scatterplot.draw_actual_plot(ax, counters, pvals, 'r', title, "Counts", "Pvals", secondary_y=True) conv.add_hor_line(sec_ax, y=0.05, color='r') cutoff = [counter for pval, counter in zip(pvals, counters) if pval > 0.05] if len(cutoff) > 0: conv.add_ver_line(ax, x=cutoff[0], color='r') conv.add_hor_line(ax, y=-0.1, color='b') cutoff = [ counter for coeff, counter in zip(coeffs, counters) if coeff > -0.1 ] if len(cutoff) > 0: conv.add_ver_line(ax, x=cutoff[0], color='b') #plot coeff cutoff based on derivative becoming <0.01 for 10 successive points dy = np.diff(coeffs) cutoff = -1 for i in xrange(0, len(dy) - 5): if all(abs(dy[y]) < 0.01 for y in xrange(i, i + 5)): cutoff = counters[i] break if cutoff > -1: conv.add_ver_line(ax, x=cutoff + 1, color='g')
def draw_plot(ax, x, y, color, x_axis, y_axis, title): scatterplot.draw_actual_plot(ax, x, y, color, x_axis, y_axis, title, size=40) coeff, pval = pearsonr(x, y) rho, pval = spearmanr(x, y) mae = mean_abs_error(x, y) conv.add_text_dict(ax, { "PCC" : coeff, "Rho" : rho, "MAE" : mae }) scatterplot.add_x_y_line(ax, min_val=min(x), max_val=max(x)) return [coeff, rho, mae]
def find_coeff_pval(merged, ax, title): c_list = [ c for c,r in merged ] r_list = [ r for c,r in merged ] scatterplot.draw_actual_plot(ax, c_list, r_list, 'k', title, "Counts", "Ratios") #scatterplot.plot_regression(ax, c_list, r_list, fit=True, neg=True) #scatterplot.add_x_y_line(ax, neg=True) coeff, pval = pearsonr(c_list, r_list) conv.add_text_dict(ax, { "PCC" : coeff, "p-val" : pval }) return coeff, pval
def gen_scatterplot(ax, x_axis, y_axis, z_axis, min_naive_by_pdb): x_deltas = get_dist_deltas(x_axis, "All", min_naive_by_pdb) y_deltas = get_dist_deltas(y_axis, "All", min_naive_by_pdb) z_deltas = get_dist_deltas(z_axis, "All", min_naive_by_pdb) #c_deltas = get_dist_deltas("All", None, min_naive_by_pdb) scatterplot.draw_actual_plot(ax, x_deltas, y_deltas, 'k', x_axis, x_axis + " Delta to Min RMSD (A)", "Delta to Min RMSD (A)", size=15, label=y_axis) scatterplot.draw_actual_plot(ax, x_deltas, z_deltas, 'r', x_axis, x_axis + " Delta to Min RMSD (A)", "Delta to Min RMSD (A)", size=15, label=z_axis) scatterplot.add_x_y_line(ax) conv.add_legend(ax)
def find_coeff_pval(merged, ax, title): c_list = [c for c, r in merged] r_list = [r for c, r in merged] scatterplot.draw_actual_plot(ax, c_list, r_list, 'k', title, "Counts", "Ratios") #scatterplot.plot_regression(ax, c_list, r_list, fit=True, neg=True) #scatterplot.add_x_y_line(ax, neg=True) coeff, pval = pearsonr(c_list, r_list) conv.add_text_dict(ax, {"PCC": coeff, "p-val": pval}) return coeff, pval
def plot(dec_inter1, dec_inter2, nat_inter1, nat_inter2, ax, pdb, title1, title2): d1e = scorefileparse.get_energies(dec_inter1[pdb]) d2e = scorefileparse.get_energies(dec_inter2[pdb]) n1e = scorefileparse.get_energies(nat_inter1[pdb]) n2e = scorefileparse.get_energies(nat_inter2[pdb]) r = scorefileparse.get_rmsd(dec_inter1[pdb]) scatterplot.draw_actual_plot(ax, d1e, d2e, r, pdb, title1,title2) #scatterplot.draw_actual_plot(ax, n1e, n2e, [], pdb, title1,title2) scatterplot.plot_regression(ax, scorefileparse.get_energies(dec_inter1[pdb])+scorefileparse.get_energies(nat_inter1[pdb]) ,scorefileparse.get_energies(dec_inter2[pdb])+scorefileparse.get_energies(nat_inter2[pdb]),False)
def main(args): #read in and rename arguments inp_dir=args[1] scoretype=args[2] dec, nat = scorefileparse.read_dec_nat(inp_dir, [], scoretype) disc = discparse.read_dir(inp_dir) dec_norm = scorefileparse.norm_pdbs(dec) nat_norm = scorefileparse.norm_pdbs(nat,dec) [dec_inter, nat_inter, disc_inter] = scorefileparse.pdbs_intersect([dec_norm, nat_norm, disc]) #labels = ["Average","1.0","1.5","2.0","2.5","3.0","4.0","6.0"] labels = ["Average"] energy_gap = [[] for l in labels] avg_disc = [[] for l in labels] for pdb in dec_inter.keys(): for ind in xrange(0,len(labels)): lowest_dec = min([ e[0] for e in dec_inter[pdb].values() ]) lowest_nat = min([ n[0] for n in nat_inter[pdb].values() if n[1] < 2.0 ]) energy_gap[ind].append(lowest_nat - lowest_dec) avg_disc[ind].append(disc_inter[pdb][0]) fig, axarr = conv.create_ax(len(labels), 1) for x_ind,l in enumerate(labels): ax = axarr[0,x_ind] scatterplot.draw_actual_plot(ax, avg_disc[x_ind], energy_gap[x_ind], [], l,"Disc","Energy Gap") scatterplot.plot_regression(ax, avg_disc[x_ind], energy_gap[x_ind], False, False) title = os.path.basename(inp_dir) filename=inp_dir + "/test.txt" conv.save_fig(fig, filename, "disc_v_egap", len(labels)*3, 4)
def plot_coeff_pval(ax, counters, coeffs, pvals, title=""): scatterplot.draw_actual_plot(ax, counters, coeffs, 'b', title, "Counts", "PCC") blank,sec_ax = scatterplot.draw_actual_plot(ax, counters, pvals, 'r', title, "Counts", "Pvals", secondary_y=True) conv.add_hor_line(sec_ax, y=0.05, color='r') cutoff = [ counter for pval, counter in zip(pvals, counters) if pval > 0.05 ] if len(cutoff) > 0: conv.add_ver_line(ax, x=cutoff[0], color='r') conv.add_hor_line(ax, y=-0.1, color='b') cutoff = [ counter for coeff, counter in zip(coeffs, counters) if coeff > -0.1 ] if len(cutoff) > 0: conv.add_ver_line(ax, x=cutoff[0], color='b') #plot coeff cutoff based on derivative becoming <0.01 for 10 successive points dy = np.diff(coeffs) cutoff = -1 for i in xrange(0,len(dy)-5): if all( abs(dy[y]) < 0.01 for y in xrange(i, i+5)): cutoff = counters[i] break if cutoff > -1: conv.add_ver_line(ax, x=cutoff+1, color='g')
def plot(disc_metrics_1, disc_metrics_2, title1, title2, output_pre, add_slash=True): pdbs = sorted(disc_metrics_1.keys()) n_metrics = len(disc_metrics_1[pdbs[0]]) fig, axarr = conv.create_ax(n_metrics, 1) for x_ind,metric_name in enumerate(disc_metrics_1[pdbs[0]].keys()): x = [] y = [] ax = axarr[0,x_ind] for pdb in pdbs: x.append(disc_metrics_1[pdb][metric_name]) y.append(disc_metrics_2[pdb][metric_name]) scatterplot.draw_actual_plot(ax, x, y, 'b', metric_name, title1,title2, size=20, edgecolors='k') scatterplot.plot_regression(ax,x,y,False) if add_slash: filename = output_pre + "/" + title1 + "_" + title2 + ".txt" else: filename = output_pre + title1 + "_" + title2 + ".txt" suffix="disc_v_disc" conv.save_fig(fig, filename, suffix, n_metrics*3, 3, size=9)
def plot_pareto(dec_inter1, dec_inter2, nat_inter1, nat_inter2, ax, pdb, title1, title2): d1e = scorefileparse.get_energies(dec_inter1[pdb]) d2e = scorefileparse.get_energies(dec_inter2[pdb]) r1 = scorefileparse.get_rmsd(dec_inter1[pdb]) d1e_ranks = gen_ranks(d1e) d2e_ranks = gen_ranks(d2e) pts = map(list, zip(d1e_ranks, d2e_ranks)) cleared, dominated = cull(pts, dominates) cleared_d = dict(cleared) min_tuple = { "All" : (1000,1000,60), "ParetoRA" : (1000,1000,60), "Rosetta" : (1000,1000,60), "Amber" : (1000,60,60) } min_naive = { "All" : [], "Rosetta" : [], "Amber" : [] } for i in range(1, 11): w = i * 0.1 color = [] for (e1, e2), r in zip(pts,r1): #assign points to min_tuple if r < min_tuple["All"][2]: min_tuple["All"] = (e1, e2, r) if cleared_d.get(e1) == e2 and e1 <=10 and e2 <= 10 and r < min_tuple["ParetoRA"][2]: min_tuple["ParetoRA"] = (e1, e2, r) if e1 <= 10 and r < min_tuple["Rosetta"][2]: min_tuple["Rosetta"] = (e1, e2, r) if e2 <= 10 and r < min_tuple["Amber"][2]: min_tuple["Amber"] = (e1, e2, r) #assign colors to points if cleared_d.get(e1) == e2 and e1 <=10 and e2 <= 10: color.append((0, 0, 0)) #black elif cleared_d.get(e1) == e2 and e1 <= 10: color.append((161, 8, 8)) #dark red elif cleared_d.get(e1) == e2 and e2 <= 10: color.append((0, 153, 153)) #dark cyan elif e1 <= 10: color.append((255, 51, 51)) #red elif e2 <= 10: color.append((51, 255, 255)) #cyan elif cleared_d.get(e1) == e2: color.append((128, 128, 128)) #gray else: color.append((255,255,51)) #yellow #assign min_naive rosetta_min_e1 = min([ e1 for e1,e2 in pts if e1 <= 10 ]) amber_min_e2 = min([ e2 for e1,e2 in pts if e2 <= 10 ]) pts_r = zip(d1e_ranks,d2e_ranks,r1) min_naive["All"] = min_tuple["All"] min_naive["Rosetta"] = [ (rosetta,amber,r) for rosetta,amber,r in pts_r if rosetta_min_e1 == rosetta ][0] min_naive["Amber"] = [ (rosetta,amber,r) for rosetta,amber,r in pts_r if amber_min_e2 == amber ][0] for i in range(1, 11): w = i * 0.1 key = "ParetoR{0}".format(i) pareto_equal_min = min([ e1*w+e2 for e1,e2 in cleared_d.items() ]) list_pts = [ (rosetta,amber,r) for rosetta,amber, r in pts_r if amber+rosetta*w == pareto_equal_min ] min_naive[key] = find_lowest_point( list_pts ) key = "ParetoA{0}".format(i) pareto_equal_min = min([ e1+e2*w for e1,e2 in cleared_d.items() ]) list_pts = [ (rosetta,amber,r) for rosetta,amber, r in pts_r if amber*w+rosetta == pareto_equal_min ] min_naive[key] = find_lowest_point( list_pts ) color_converted = [ (c[0]/255.0, c[1]/255.0, c[2]/255.0) if hasattr(c, "__iter__") else '' for c in color ] scatterplot.draw_actual_plot(ax, d1e_ranks, d2e_ranks, color_converted, pdb, title1, title2, cm="summer", size=20) s = "{0}\t1".format(pdb) for k, (e1, e2, r) in min_tuple.items(): s += ("\t{3} {0:.0f},{1:.0f},{2:.1f}".format(e1, e2, r, k)) #print s s = "{0}\t2".format(pdb) keys_to_include = ["All", "Amber", "Rosetta", "ParetoR10"] for k, (e1, e2, r) in min_naive.items(): if k not in keys_to_include: continue s += "\t{0}".format(k) #s += " {0:.0f},{1:.0f},{2:.1f}".format(e1, e2, r) s += " {0:.1f}".format(r) print s return min_naive
def plot_r_v_r(dec_inter1, dec_inter2, nat_inter1, nat_inter2, ax, pdb, title1, title2): r1 = scorefileparse.get_rmsd(dec_inter1[pdb], sort_by="energy") r2 = scorefileparse.get_rmsd(dec_inter2[pdb], sort_by="energy") scatterplot.draw_actual_plot(ax, r1, r2, 'k', pdb, title1,title2)
def main(list_input_dirs, energies_names, output_pre): #read in and rename arguments inp_dir1=list_input_dirs[0][0] scoretype1=list_input_dirs[0][1] inp_dir2=list_input_dirs[1][0] scoretype2=list_input_dirs[1][1] title1 = os.path.basename(inp_dir1) title2 = os.path.basename(inp_dir2) column_dict = {} for c in energies_names: column_dict[c[0]] = c[1:] dec1, nat1 = scorefileparse.read_dec_nat(inp_dir1, energies_names[scoretype1], scoretype1) dec2, nat2 = scorefileparse.read_dec_nat(inp_dir2, energies_names[scoretype2], scoretype2) [dec_inter1, nat_inter1, dec_inter2, nat_inter2] = scorefileparse.pdbs_intersect([dec1, nat1, dec2, nat2]) sum_discs = Counter() fig, axarr = conv.create_ax(1, len(dec_inter1)+1, True,True) for x_ind, pdb in enumerate(sorted(dec_inter1.keys())): discs_per_pdb = {} for w_1 in xrange(-10,10,2): for w_2 in xrange(-10,10,2): weight_1 = 2 ** w_1 weight_2 = 2 ** w_2 weighted_1 = scorefileparse.weight_dict(dec_inter1[pdb], weight_1) weighted_2 = scorefileparse.weight_dict(dec_inter2[pdb], weight_2) merged = scorefileparse.merge_dicts([weighted_1, weighted_2]) ddata1 = scorefileparse.convert_disc(merged) disc_divs = [1.0,1.5,2.0,2.5,3.0,4.0,6.0] disc1, d, counts = disc.given_data_run_disc(ddata1, True, disc_divs) discs_per_pdb[(weight_1,weight_2)] = disc1 sorted_disc = sorted(discs_per_pdb.values()) max_title = [ t for t,v in discs_per_pdb.items() if v == sorted_disc[0] ] #header_string = "\t".join("{0:.3f}-{1:.3f}".format(x,y) for x,y in sorted(discs_per_pdb.keys())) + "\tMax_Weight" #values_string = "\t".join(format(x, "10.3f") for (w1,w2),x in sorted(discs_per_pdb.items())) + "\t{0:.3f}".format(max_title[0]) #print header_string #print values_string ax = axarr[x_ind, 0] #ax.set_xlim(-10, 600) #ax.set_ylim(-10, 600) ax.set_xscale('log', basex=2) ax.set_yscale('log', basey=2) x = [ w1 for (w1,w2) in sorted(discs_per_pdb.keys()) ] y = [ w2 for (w1,w2) in sorted(discs_per_pdb.keys()) ] d = [ v for k,v in sorted(discs_per_pdb.items()) ] min_y = min(discs_per_pdb.values()) max_y = max(discs_per_pdb.values()) #print min_y, max_y s = scatterplot.draw_actual_plot(ax, x, y, d, pdb, scoretype1, scoretype2, 'bwr') fig.colorbar(s,ax=ax) #ax.axhline(y=min_y) #ax.set_ylim(min_y-0.05,max_y+0.05) scatterplot.add_x_y_line(ax, 0,600) sum_discs.update(discs_per_pdb) #print "All PDBs {0}".format(len(dec_inter1)) #sorted_disc = sorted(sum_discs.values()) #max_title = [ t for t,v in sum_discs.items() if v == sorted_disc[0] ] #header_string = "\t".join(format(x, "10.3f") for x in sorted(sum_discs.keys())) + "\tMax_Weight" #values_string = "\t".join(format(x/len(dec_inter1), "10.3f") for key,x in sorted(sum_discs.items())) + "\t{0:.3f}".format(max_title[0]) #print header_string #print values_string ax = axarr[len(dec_inter1), 0] min_y = min(x/len(dec_inter1) for x in sum_discs.values()) max_y = max(x/len(dec_inter1) for x in sum_discs.values()) x = [ w1 for w1,w2 in sorted(sum_discs.keys()) ] y = [ w2 for w1,w2 in sorted(sum_discs.keys()) ] d = [ v/len(dec_inter1) for k,v in sorted(sum_discs.items()) ] #fix titles of axes ax.set_xscale('log', basex=2) ax.set_yscale('log', basey=2) s = scatterplot.draw_actual_plot(ax, x,y,d, "All", scoretype1, scoretype2, cm='bwr') fig.colorbar(s,ax=ax) scatterplot.add_x_y_line(ax, 0,600) #ax.axhline(y=min_y) conv.save_fig(fig, output_pre, "_weights_v_disc", 3, len(dec_inter1)*3)
def plot_corr(c_1, c_2, ax, x_axis, y_axis): scatterplot.draw_actual_plot(ax, c_1, c_2, 'b', "Counts vs. Counts", x_axis, y_axis, size=10) scatterplot.plot_regression(ax, c_1, c_2)