def plot_coeff_pval(ax, counters, coeffs, pvals, title=""):
    scatterplot.draw_actual_plot(ax, counters, coeffs, 'b', title, "Counts",
                                 "PCC")
    blank, sec_ax = scatterplot.draw_actual_plot(ax,
                                                 counters,
                                                 pvals,
                                                 'r',
                                                 title,
                                                 "Counts",
                                                 "Pvals",
                                                 secondary_y=True)
    conv.add_hor_line(sec_ax, y=0.05, color='r')
    cutoff = [counter for pval, counter in zip(pvals, counters) if pval > 0.05]
    if len(cutoff) > 0:
        conv.add_ver_line(ax, x=cutoff[0], color='r')

    conv.add_hor_line(ax, y=-0.1, color='b')
    cutoff = [
        counter for coeff, counter in zip(coeffs, counters) if coeff > -0.1
    ]
    if len(cutoff) > 0:
        conv.add_ver_line(ax, x=cutoff[0], color='b')

    #plot coeff cutoff based on derivative becoming <0.01 for 10 successive points
    dy = np.diff(coeffs)
    cutoff = -1
    for i in xrange(0, len(dy) - 5):
        if all(abs(dy[y]) < 0.01 for y in xrange(i, i + 5)):
            cutoff = counters[i]
            break
    if cutoff > -1:
        conv.add_ver_line(ax, x=cutoff + 1, color='g')
def draw_plot(ax, x, y, color, x_axis, y_axis, title):
    scatterplot.draw_actual_plot(ax, x, y, color, x_axis, y_axis, title, size=40)
    coeff, pval = pearsonr(x, y)
    rho, pval = spearmanr(x, y)
    mae = mean_abs_error(x, y)
    conv.add_text_dict(ax, { "PCC" : coeff, "Rho" : rho, "MAE" : mae })
        
    scatterplot.add_x_y_line(ax, min_val=min(x), max_val=max(x))

    return [coeff, rho, mae]
def find_coeff_pval(merged, ax, title):

    c_list = [ c for c,r in merged ]
    r_list = [ r for c,r in merged ]

    scatterplot.draw_actual_plot(ax, c_list, r_list, 'k', title, "Counts", "Ratios")
    #scatterplot.plot_regression(ax, c_list, r_list, fit=True, neg=True)
    #scatterplot.add_x_y_line(ax, neg=True)

    coeff, pval = pearsonr(c_list, r_list)
    conv.add_text_dict(ax, { "PCC" : coeff, "p-val" : pval })
    return coeff, pval
def gen_scatterplot(ax, x_axis, y_axis, z_axis, min_naive_by_pdb):
    x_deltas = get_dist_deltas(x_axis, "All", min_naive_by_pdb)
    y_deltas = get_dist_deltas(y_axis, "All", min_naive_by_pdb)
    z_deltas = get_dist_deltas(z_axis, "All", min_naive_by_pdb)
    #c_deltas = get_dist_deltas("All", None, min_naive_by_pdb)

    scatterplot.draw_actual_plot(ax, x_deltas, y_deltas, 'k', x_axis, x_axis + " Delta to Min RMSD (A)", "Delta to Min RMSD (A)", size=15, label=y_axis)
    scatterplot.draw_actual_plot(ax, x_deltas, z_deltas, 'r', x_axis, x_axis + " Delta to Min RMSD (A)", "Delta to Min RMSD (A)", size=15, label=z_axis)

    scatterplot.add_x_y_line(ax)

    conv.add_legend(ax)
def find_coeff_pval(merged, ax, title):

    c_list = [c for c, r in merged]
    r_list = [r for c, r in merged]

    scatterplot.draw_actual_plot(ax, c_list, r_list, 'k', title, "Counts",
                                 "Ratios")
    #scatterplot.plot_regression(ax, c_list, r_list, fit=True, neg=True)
    #scatterplot.add_x_y_line(ax, neg=True)

    coeff, pval = pearsonr(c_list, r_list)
    conv.add_text_dict(ax, {"PCC": coeff, "p-val": pval})
    return coeff, pval
def plot(dec_inter1, dec_inter2, nat_inter1, nat_inter2, ax, pdb, title1, title2):

    d1e = scorefileparse.get_energies(dec_inter1[pdb])
    d2e = scorefileparse.get_energies(dec_inter2[pdb])
    n1e = scorefileparse.get_energies(nat_inter1[pdb])
    n2e = scorefileparse.get_energies(nat_inter2[pdb])

    r = scorefileparse.get_rmsd(dec_inter1[pdb])

    scatterplot.draw_actual_plot(ax, d1e, d2e, r, pdb, title1,title2)

    #scatterplot.draw_actual_plot(ax, n1e, n2e, [], pdb, title1,title2)

    scatterplot.plot_regression(ax, scorefileparse.get_energies(dec_inter1[pdb])+scorefileparse.get_energies(nat_inter1[pdb])
                                ,scorefileparse.get_energies(dec_inter2[pdb])+scorefileparse.get_energies(nat_inter2[pdb]),False)
def main(args):
    #read in and rename arguments
    inp_dir=args[1]
    scoretype=args[2]

    dec, nat = scorefileparse.read_dec_nat(inp_dir, [], scoretype)

    disc = discparse.read_dir(inp_dir)

    dec_norm = scorefileparse.norm_pdbs(dec)
    nat_norm = scorefileparse.norm_pdbs(nat,dec)

    [dec_inter, nat_inter, disc_inter] = scorefileparse.pdbs_intersect([dec_norm, nat_norm, disc]) 

    #labels = ["Average","1.0","1.5","2.0","2.5","3.0","4.0","6.0"]
    labels = ["Average"]
    energy_gap = [[] for l in labels]
    avg_disc = [[] for l in labels]

    for pdb in dec_inter.keys():

        for ind in xrange(0,len(labels)):
            lowest_dec = min([ e[0] for e in dec_inter[pdb].values() ])
            lowest_nat = min([ n[0] for n in nat_inter[pdb].values() if n[1] < 2.0 ])
            energy_gap[ind].append(lowest_nat - lowest_dec)
            avg_disc[ind].append(disc_inter[pdb][0])

    fig, axarr = conv.create_ax(len(labels), 1)

    for x_ind,l in enumerate(labels):
        ax = axarr[0,x_ind] 

        scatterplot.draw_actual_plot(ax, avg_disc[x_ind], energy_gap[x_ind], [], l,"Disc","Energy Gap")

        scatterplot.plot_regression(ax, avg_disc[x_ind], energy_gap[x_ind], False, False)

    title = os.path.basename(inp_dir)

    filename=inp_dir + "/test.txt"

    conv.save_fig(fig, filename, "disc_v_egap", len(labels)*3, 4)
def plot_coeff_pval(ax, counters, coeffs, pvals, title=""):
    scatterplot.draw_actual_plot(ax, counters, coeffs, 'b', title, "Counts", "PCC")
    blank,sec_ax = scatterplot.draw_actual_plot(ax, counters, pvals, 'r', title, "Counts", "Pvals", secondary_y=True)
    conv.add_hor_line(sec_ax, y=0.05, color='r')
    cutoff = [ counter for pval, counter in zip(pvals, counters) if pval > 0.05 ]
    if len(cutoff) > 0:
        conv.add_ver_line(ax, x=cutoff[0], color='r')

    conv.add_hor_line(ax, y=-0.1, color='b')
    cutoff = [ counter for coeff, counter in zip(coeffs, counters) if coeff > -0.1 ]
    if len(cutoff) > 0:
        conv.add_ver_line(ax, x=cutoff[0], color='b')

    #plot coeff cutoff based on derivative becoming <0.01 for 10 successive points
    dy = np.diff(coeffs)
    cutoff = -1
    for i in xrange(0,len(dy)-5):
        if all( abs(dy[y]) < 0.01 for y in xrange(i, i+5)):
            cutoff = counters[i]
            break 
    if cutoff > -1:
        conv.add_ver_line(ax, x=cutoff+1, color='g')
def plot(disc_metrics_1, disc_metrics_2, title1, title2, output_pre, add_slash=True):
    pdbs = sorted(disc_metrics_1.keys())
    n_metrics = len(disc_metrics_1[pdbs[0]])

    fig, axarr = conv.create_ax(n_metrics, 1)

    for x_ind,metric_name in enumerate(disc_metrics_1[pdbs[0]].keys()):
        x = []
        y = []
        ax = axarr[0,x_ind]
        for pdb in pdbs:
            x.append(disc_metrics_1[pdb][metric_name])
            y.append(disc_metrics_2[pdb][metric_name])

        scatterplot.draw_actual_plot(ax, x, y, 'b', metric_name, title1,title2, size=20, edgecolors='k')
        scatterplot.plot_regression(ax,x,y,False)

    if add_slash:
        filename = output_pre + "/" + title1 + "_" + title2 + ".txt"
    else:
	filename = output_pre + title1 + "_" + title2 + ".txt"
    suffix="disc_v_disc"
 
    conv.save_fig(fig, filename, suffix, n_metrics*3, 3, size=9)
def plot_pareto(dec_inter1, dec_inter2, nat_inter1, nat_inter2, ax, pdb, title1, title2):

    d1e = scorefileparse.get_energies(dec_inter1[pdb])
    d2e = scorefileparse.get_energies(dec_inter2[pdb])

    r1 = scorefileparse.get_rmsd(dec_inter1[pdb])

    d1e_ranks = gen_ranks(d1e)
    d2e_ranks = gen_ranks(d2e)

    pts = map(list, zip(d1e_ranks, d2e_ranks))

    cleared, dominated = cull(pts, dominates)

    cleared_d = dict(cleared)
    
    min_tuple = { "All" : (1000,1000,60), "ParetoRA" : (1000,1000,60), "Rosetta" : (1000,1000,60), "Amber" : (1000,60,60) }
    min_naive = { "All" : [], "Rosetta" : [], "Amber" : []  }

    for i in range(1, 11):
        w = i * 0.1
        

    color = []
    for (e1, e2), r in zip(pts,r1):
        #assign points to min_tuple
        if r < min_tuple["All"][2]:
            min_tuple["All"] = (e1, e2, r)
        if cleared_d.get(e1) == e2 and e1 <=10 and e2 <= 10 and r < min_tuple["ParetoRA"][2]:
            min_tuple["ParetoRA"] = (e1, e2, r)
        if e1 <= 10 and r < min_tuple["Rosetta"][2]:
            min_tuple["Rosetta"] = (e1, e2, r)
        if e2 <= 10 and r < min_tuple["Amber"][2]:
            min_tuple["Amber"] = (e1, e2, r)
        #assign colors to points
        if cleared_d.get(e1) == e2 and e1 <=10 and e2 <= 10:
            color.append((0, 0, 0)) #black
        elif cleared_d.get(e1) == e2 and e1 <= 10:
            color.append((161, 8, 8)) #dark red
        elif cleared_d.get(e1) == e2  and e2 <= 10:
            color.append((0, 153, 153)) #dark cyan
        elif e1 <= 10:
            color.append((255, 51, 51)) #red
        elif e2 <= 10:
            color.append((51, 255, 255)) #cyan
        elif cleared_d.get(e1) == e2:
            color.append((128, 128, 128)) #gray
        else:
            color.append((255,255,51)) #yellow

    #assign min_naive
    rosetta_min_e1 = min([  e1 for e1,e2 in pts if e1 <= 10 ])
    amber_min_e2 = min([  e2 for e1,e2 in pts if e2 <= 10 ])

    pts_r = zip(d1e_ranks,d2e_ranks,r1)

    min_naive["All"] = min_tuple["All"]
    min_naive["Rosetta"] = [ (rosetta,amber,r) for rosetta,amber,r in pts_r if rosetta_min_e1 == rosetta ][0]
    min_naive["Amber"] = [ (rosetta,amber,r) for rosetta,amber,r in pts_r if amber_min_e2 == amber ][0]
    for i in range(1, 11):
        w = i * 0.1
        key = "ParetoR{0}".format(i)
        pareto_equal_min = min([ e1*w+e2 for e1,e2 in cleared_d.items() ])
        list_pts =  [ (rosetta,amber,r) for rosetta,amber, r in pts_r if amber+rosetta*w == pareto_equal_min ]
        min_naive[key] = find_lowest_point( list_pts )
        key = "ParetoA{0}".format(i)
        pareto_equal_min = min([ e1+e2*w for e1,e2 in cleared_d.items() ])
        list_pts =  [ (rosetta,amber,r) for rosetta,amber, r in pts_r if amber*w+rosetta == pareto_equal_min ]
        min_naive[key] = find_lowest_point( list_pts )

    color_converted = [ (c[0]/255.0, c[1]/255.0, c[2]/255.0) if hasattr(c, "__iter__") else '' for c in color ] 

    scatterplot.draw_actual_plot(ax, d1e_ranks, d2e_ranks, color_converted, pdb, title1, title2, cm="summer", size=20)

    s = "{0}\t1".format(pdb)
    for k, (e1, e2, r) in min_tuple.items():
        s += ("\t{3} {0:.0f},{1:.0f},{2:.1f}".format(e1, e2, r, k))

    #print s

    s = "{0}\t2".format(pdb)
    keys_to_include = ["All", "Amber", "Rosetta", "ParetoR10"]
    for k, (e1, e2, r) in min_naive.items():
        if k not in keys_to_include:
            continue
        s += "\t{0}".format(k)
        #s += " {0:.0f},{1:.0f},{2:.1f}".format(e1, e2, r)
        s += " {0:.1f}".format(r)

    print s

    return min_naive
def plot_r_v_r(dec_inter1, dec_inter2, nat_inter1, nat_inter2, ax, pdb, title1, title2):

    r1 = scorefileparse.get_rmsd(dec_inter1[pdb], sort_by="energy")
    r2 = scorefileparse.get_rmsd(dec_inter2[pdb], sort_by="energy")
    
    scatterplot.draw_actual_plot(ax, r1, r2, 'k', pdb, title1,title2)
def main(list_input_dirs, energies_names, output_pre):
    #read in and rename arguments
    inp_dir1=list_input_dirs[0][0]
    scoretype1=list_input_dirs[0][1]
    inp_dir2=list_input_dirs[1][0]
    scoretype2=list_input_dirs[1][1]

    title1 = os.path.basename(inp_dir1)
    title2 = os.path.basename(inp_dir2)

    column_dict = {}

    for c in energies_names:
        column_dict[c[0]] = c[1:]

    dec1, nat1 = scorefileparse.read_dec_nat(inp_dir1, energies_names[scoretype1], scoretype1)
    dec2, nat2 = scorefileparse.read_dec_nat(inp_dir2, energies_names[scoretype2], scoretype2)

    [dec_inter1, nat_inter1, dec_inter2, nat_inter2] = scorefileparse.pdbs_intersect([dec1, nat1, dec2, nat2]) 

    sum_discs = Counter()

    fig, axarr = conv.create_ax(1, len(dec_inter1)+1, True,True)

    for x_ind, pdb in enumerate(sorted(dec_inter1.keys())):

        discs_per_pdb = {}

        for w_1 in xrange(-10,10,2):
            for w_2 in xrange(-10,10,2): 
                weight_1 = 2 ** w_1
                weight_2 = 2 ** w_2
                weighted_1 = scorefileparse.weight_dict(dec_inter1[pdb], weight_1)
                weighted_2 = scorefileparse.weight_dict(dec_inter2[pdb], weight_2)
                merged = scorefileparse.merge_dicts([weighted_1, weighted_2])
                ddata1 = scorefileparse.convert_disc(merged)

                disc_divs = [1.0,1.5,2.0,2.5,3.0,4.0,6.0]

                disc1, d, counts = disc.given_data_run_disc(ddata1, True, disc_divs)
                discs_per_pdb[(weight_1,weight_2)] = disc1

        sorted_disc = sorted(discs_per_pdb.values())
        max_title = [ t for t,v in discs_per_pdb.items() if v == sorted_disc[0] ]
        
        #header_string = "\t".join("{0:.3f}-{1:.3f}".format(x,y) for x,y in sorted(discs_per_pdb.keys())) + "\tMax_Weight"
        #values_string = "\t".join(format(x, "10.3f") for (w1,w2),x in sorted(discs_per_pdb.items())) + "\t{0:.3f}".format(max_title[0])
        
        #print header_string
        #print values_string

        ax = axarr[x_ind, 0]

        #ax.set_xlim(-10, 600)
        #ax.set_ylim(-10, 600)

        ax.set_xscale('log', basex=2)
        ax.set_yscale('log', basey=2)

        x = [ w1 for (w1,w2) in sorted(discs_per_pdb.keys()) ]
        y = [ w2 for (w1,w2) in sorted(discs_per_pdb.keys()) ]
        d = [ v for k,v in sorted(discs_per_pdb.items()) ]
  
        min_y = min(discs_per_pdb.values())
        max_y = max(discs_per_pdb.values())
        #print min_y, max_y
        s = scatterplot.draw_actual_plot(ax, x, y, d, pdb, scoretype1, scoretype2, 'bwr')
        fig.colorbar(s,ax=ax)
        #ax.axhline(y=min_y)
        #ax.set_ylim(min_y-0.05,max_y+0.05)
        scatterplot.add_x_y_line(ax, 0,600)

        sum_discs.update(discs_per_pdb)

    #print "All PDBs {0}".format(len(dec_inter1))

    #sorted_disc = sorted(sum_discs.values())
    #max_title = [ t for t,v in sum_discs.items() if v == sorted_disc[0] ]

    #header_string = "\t".join(format(x, "10.3f") for x in sorted(sum_discs.keys())) + "\tMax_Weight"
    #values_string = "\t".join(format(x/len(dec_inter1), "10.3f") for key,x in sorted(sum_discs.items())) + "\t{0:.3f}".format(max_title[0])
  
    #print header_string
    #print values_string 

    ax = axarr[len(dec_inter1), 0]

    min_y = min(x/len(dec_inter1) for x in sum_discs.values())   
    max_y = max(x/len(dec_inter1) for x in sum_discs.values())

    x = [ w1 for w1,w2 in sorted(sum_discs.keys()) ]
    y = [ w2 for w1,w2 in sorted(sum_discs.keys()) ]
    d = [ v/len(dec_inter1) for k,v in sorted(sum_discs.items()) ]
    #fix titles of axes

    ax.set_xscale('log', basex=2)
    ax.set_yscale('log', basey=2)

    s = scatterplot.draw_actual_plot(ax, x,y,d, "All", scoretype1, scoretype2, cm='bwr')
    fig.colorbar(s,ax=ax)
    scatterplot.add_x_y_line(ax, 0,600)
    #ax.axhline(y=min_y)

    conv.save_fig(fig, output_pre, "_weights_v_disc", 3, len(dec_inter1)*3)
Beispiel #13
0
def plot_corr(c_1, c_2, ax, x_axis, y_axis):
    scatterplot.draw_actual_plot(ax, c_1, c_2, 'b', "Counts vs. Counts", x_axis, y_axis, size=10)

    scatterplot.plot_regression(ax, c_1, c_2)