def draw_plot(ax, x, y, color, x_axis, y_axis, title):
    scatterplot.draw_actual_plot(ax, x, y, color, x_axis, y_axis, title, size=40)
    coeff, pval = pearsonr(x, y)
    rho, pval = spearmanr(x, y)
    mae = mean_abs_error(x, y)
    conv.add_text_dict(ax, { "PCC" : coeff, "Rho" : rho, "MAE" : mae })
        
    scatterplot.add_x_y_line(ax, min_val=min(x), max_val=max(x))

    return [coeff, rho, mae]
def gen_scatterplot(ax, x_axis, y_axis, z_axis, min_naive_by_pdb):
    x_deltas = get_dist_deltas(x_axis, "All", min_naive_by_pdb)
    y_deltas = get_dist_deltas(y_axis, "All", min_naive_by_pdb)
    z_deltas = get_dist_deltas(z_axis, "All", min_naive_by_pdb)
    #c_deltas = get_dist_deltas("All", None, min_naive_by_pdb)

    scatterplot.draw_actual_plot(ax, x_deltas, y_deltas, 'k', x_axis, x_axis + " Delta to Min RMSD (A)", "Delta to Min RMSD (A)", size=15, label=y_axis)
    scatterplot.draw_actual_plot(ax, x_deltas, z_deltas, 'r', x_axis, x_axis + " Delta to Min RMSD (A)", "Delta to Min RMSD (A)", size=15, label=z_axis)

    scatterplot.add_x_y_line(ax)

    conv.add_legend(ax)
def main(rec_corr_path, ddg_path, amber_pdb_path, rosetta_pdb_path, out_csv_path):
    amber_csv_path = os.path.join(ddg_path, "amber")
    rosetta_csv_path = os.path.join(ddg_path, "rosetta")
    amber_inter_mean_path = os.path.join(ddg_path, "amber_inter_mean")
    rosetta_inter_mean_path = os.path.join(ddg_path, "rosetta_inter_mean")
    rosetta_inter_path = os.path.join(ddg_path, "rosetta_inter")


    #Plots to generate
    #Per pdb - known ddg vs. many diff protocols, each with their own row.  A protocol may have more than one plot depending on filtering method (i.e. mean, bottom 3, pareto)
    #For all pdbs - pred rosetta corr values vs. many diff protocols, each with their series color.  A protocol may have more than one plot depending on filtering method (i.e. mean, bottom 3, pareto).  3 rows one for each corr value
    #For all pdbs - known ddg vs. many diff protocols, each with their own row. A protocol may have more than one plot depending on filtering method (i.e. mean, bottom 3, pareto)

    list_rec_corr_names = glob.glob(rec_corr_path + "*.rc")

    #corr_values_dict has the following shape - "Pred" : { "Pred" : [ddg_vals] }, "Amber" : { "Mean.." : [ddg_vals], "Bott.." : [ddg_vals]}, "Rosetta" : { "Mean.." : [ddg_vals], "Bott.." : [ddg_vals]} 
    corr_values_dict = {}
    
    all_amber_ddg_dict = {}
    all_rosetta_ddg_dict = {}
    all_known_ddg_dict = {}
    all_pred_ddg_dict = {}

    k_ddg = []
    p_ddg = []

    for rec_corr in list_rec_corr_names:
        print rec_corr
        rec_corr_list = read_csv_list(rec_corr)
        #no known ddg
        if len(rec_corr_list[0]) == 3:
            continue
        amber_dg_dict = {}
        rosetta_dg_dict = {}
        
        #read in all amber csvs that correspond to column 3 in rec_corr file and rosetta ones too
        for record_id, prefix, filename, known_ddg, pred_ddg in rec_corr_list:
            amber_dg_dict[filename] = { "Mean Binding Energy" : get_mean_csv(os.path.join(amber_csv_path,filename+".csv"), protocol="amber"),
                                         "Bottom 3 Binding Energy" : get_bottom3_csv(os.path.join(amber_csv_path,filename+".csv"), protocol="amber"),
                                         "Mean Interaction Energy" : get_mean_txt(os.path.join(amber_inter_mean_path,filename+".txt")) }
            rosetta_dg_dict[filename] = { "Mean Binding Energy" : get_mean_csv(os.path.join(rosetta_csv_path,filename+".csv"), protocol="rosetta"), 
                                         "Bottom 3 Binding Energy" : get_bottom3_csv(os.path.join(rosetta_csv_path,filename+".csv"), protocol="rosetta"), 
                                         "Mean Interaction Energy" : get_mean_txt(os.path.join(rosetta_inter_mean_path,filename+".txt")),
                                         "Bottom 3 Interaction Energy" : get_bottom3_csv(os.path.join(rosetta_inter_path,filename+".csv")) }
    
        #find wt csv that correspond to wt row in rec_corr_file (column 2)
        wt_csv_name = [ rec[2] for rec in rec_corr_list if "wt" in rec[1] ][0]

        amber_ddg_dict = {}
        rosetta_ddg_dict = {}
        known_ddg_dict = {}
        pred_ddg_dict = {}
 
        #loops thru other records in rec_corr_dict
        for rec, prefix, filename, k,p in rec_corr_list:
            if "wt" not in prefix:
                if amber_ddg_dict.get(filename) is None:
                    amber_ddg_dict[filename] = {}
                if rosetta_ddg_dict.get(filename) is None:
                    rosetta_ddg_dict[filename] = {}
                for key, dg in amber_dg_dict[wt_csv_name].items():
                    amber_ddg_dict[filename][key] = amber_dg_dict[filename][key] - dg
                for key, dg in rosetta_dg_dict[wt_csv_name].items():
                    rosetta_ddg_dict[filename][key] = rosetta_dg_dict[filename][key] - dg
                known_ddg_dict[filename] = { "Known" : float(k) }
                pred_ddg_dict[filename] = { "Pred" : float(p) }

        all_amber_ddg_dict.update(amber_ddg_dict)
        all_rosetta_ddg_dict.update(rosetta_ddg_dict)
        all_known_ddg_dict.update(known_ddg_dict)
        all_pred_ddg_dict.update(pred_ddg_dict)        
   
        fig, axarr = conv.create_ax(max([len(d) for k, d in amber_dg_dict.items() ]+[len(d) for k,d in rosetta_dg_dict.items()]), 3, shx=True, shy=True)
        plot_ddg_dict(rosetta_ddg_dict,known_ddg_dict,axarr,0,"Rosetta",corr_values_dict)
        plot_ddg_dict(amber_ddg_dict,known_ddg_dict,axarr,1,"Amber",corr_values_dict)
        plot_ddg_dict(pred_ddg_dict,known_ddg_dict,axarr,2,"Pred",corr_values_dict)
       
        conv.save_fig(fig, out_csv_path + "/" + os.path.splitext(os.path.basename(rec_corr))[0] + ".txt", "ddg", max([len(d) for k, d in amber_dg_dict.items() ]+[len(d) for k,d in rosetta_dg_dict.items()])*4, 12)
 

    #Plot all correlation values
    
    fig_all, axarr_all = conv.create_ax(len(corr_values_dict["Rosetta"]),3)

    #assumes that Rosetta has more protocols than Amber
    for x_ind,(protocol, vals) in enumerate(corr_values_dict["Rosetta"].items()):
        if corr_values_dict["Amber"].get(protocol) is not None:
            amber_vals = corr_values_dict["Amber"][protocol]
        else:
            amber_vals = None
        pred_vals = corr_values_dict["Pred"]["Pred"]
        labels=["-PCC","-Rho","-Mae"]
        for ind,(val_list,label) in enumerate(zip(vals,labels)):
            series = [[val_list,pred_vals[ind],"Rosetta "+protocol]]
            if amber_vals is not None:
                series.append([amber_vals[ind],pred_vals[ind],"Amber "+protocol])
            scatterplot.plot_series(axarr_all[ind,x_ind], series, protocol,"Pred",label,colors=['coral','cyan'], size=40)
            scatterplot.add_x_y_line(axarr_all[ind,x_ind])

 	    #if x_ind == 2:   
 	    #    axarr_all[x_ind,y_ind].set_xlim([-0.2,10.0])
        #        axarr_all[x_ind,y_ind].set_ylim([-0.2,10.0])
    	#    	scatterplot.add_x_y_line(axarr_all[x_ind,y_ind],0.0,10.0)
	    #else:
		#axarr_all[x_ind,y_ind].set_xlim([-1.2,1.2])
        #        axarr_all[x_ind,y_ind].set_ylim([-1.2,1.2])
        #        scatterplot.add_x_y_line(axarr_all[x_ind,y_ind],-1.0,1.0)
	    
    conv.save_fig(fig_all, out_csv_path + "/all.txt", "ddg", 16, 12)

    fig_all_corr, axarr_all_corr = conv.create_ax(max([len(d) for k, d in all_amber_ddg_dict.items() ]+[len(d) for k,d in all_rosetta_ddg_dict.items()]), 3, shx=True, shy=True)
    plot_ddg_dict(all_rosetta_ddg_dict,all_known_ddg_dict,axarr_all_corr,0,"Rosetta",corr_values_dict)
    plot_ddg_dict(all_amber_ddg_dict,all_known_ddg_dict,axarr_all_corr,1,"Amber",corr_values_dict)
    plot_ddg_dict(all_pred_ddg_dict,all_known_ddg_dict,axarr_all_corr,2,"Pred",corr_values_dict)
    
    conv.save_fig(fig_all_corr, out_csv_path + "/all_corr.txt", "ddg",max([len(d) for k, d in amber_dg_dict.items() ]+[len(d) for k,d in rosetta_dg_dict.items()])*4, 12) 
def main(list_input_dirs, energies_names, output_pre):
    #read in and rename arguments
    inp_dir1=list_input_dirs[0][0]
    scoretype1=list_input_dirs[0][1]
    inp_dir2=list_input_dirs[1][0]
    scoretype2=list_input_dirs[1][1]

    title1 = os.path.basename(inp_dir1)
    title2 = os.path.basename(inp_dir2)

    column_dict = {}

    for c in energies_names:
        column_dict[c[0]] = c[1:]

    dec1, nat1 = scorefileparse.read_dec_nat(inp_dir1, energies_names[scoretype1], scoretype1)
    dec2, nat2 = scorefileparse.read_dec_nat(inp_dir2, energies_names[scoretype2], scoretype2)

    [dec_inter1, nat_inter1, dec_inter2, nat_inter2] = scorefileparse.pdbs_intersect([dec1, nat1, dec2, nat2]) 

    sum_discs = Counter()

    fig, axarr = conv.create_ax(1, len(dec_inter1)+1, True,True)

    for x_ind, pdb in enumerate(sorted(dec_inter1.keys())):

        discs_per_pdb = {}

        for w_1 in xrange(-10,10,2):
            for w_2 in xrange(-10,10,2): 
                weight_1 = 2 ** w_1
                weight_2 = 2 ** w_2
                weighted_1 = scorefileparse.weight_dict(dec_inter1[pdb], weight_1)
                weighted_2 = scorefileparse.weight_dict(dec_inter2[pdb], weight_2)
                merged = scorefileparse.merge_dicts([weighted_1, weighted_2])
                ddata1 = scorefileparse.convert_disc(merged)

                disc_divs = [1.0,1.5,2.0,2.5,3.0,4.0,6.0]

                disc1, d, counts = disc.given_data_run_disc(ddata1, True, disc_divs)
                discs_per_pdb[(weight_1,weight_2)] = disc1

        sorted_disc = sorted(discs_per_pdb.values())
        max_title = [ t for t,v in discs_per_pdb.items() if v == sorted_disc[0] ]
        
        #header_string = "\t".join("{0:.3f}-{1:.3f}".format(x,y) for x,y in sorted(discs_per_pdb.keys())) + "\tMax_Weight"
        #values_string = "\t".join(format(x, "10.3f") for (w1,w2),x in sorted(discs_per_pdb.items())) + "\t{0:.3f}".format(max_title[0])
        
        #print header_string
        #print values_string

        ax = axarr[x_ind, 0]

        #ax.set_xlim(-10, 600)
        #ax.set_ylim(-10, 600)

        ax.set_xscale('log', basex=2)
        ax.set_yscale('log', basey=2)

        x = [ w1 for (w1,w2) in sorted(discs_per_pdb.keys()) ]
        y = [ w2 for (w1,w2) in sorted(discs_per_pdb.keys()) ]
        d = [ v for k,v in sorted(discs_per_pdb.items()) ]
  
        min_y = min(discs_per_pdb.values())
        max_y = max(discs_per_pdb.values())
        #print min_y, max_y
        s = scatterplot.draw_actual_plot(ax, x, y, d, pdb, scoretype1, scoretype2, 'bwr')
        fig.colorbar(s,ax=ax)
        #ax.axhline(y=min_y)
        #ax.set_ylim(min_y-0.05,max_y+0.05)
        scatterplot.add_x_y_line(ax, 0,600)

        sum_discs.update(discs_per_pdb)

    #print "All PDBs {0}".format(len(dec_inter1))

    #sorted_disc = sorted(sum_discs.values())
    #max_title = [ t for t,v in sum_discs.items() if v == sorted_disc[0] ]

    #header_string = "\t".join(format(x, "10.3f") for x in sorted(sum_discs.keys())) + "\tMax_Weight"
    #values_string = "\t".join(format(x/len(dec_inter1), "10.3f") for key,x in sorted(sum_discs.items())) + "\t{0:.3f}".format(max_title[0])
  
    #print header_string
    #print values_string 

    ax = axarr[len(dec_inter1), 0]

    min_y = min(x/len(dec_inter1) for x in sum_discs.values())   
    max_y = max(x/len(dec_inter1) for x in sum_discs.values())

    x = [ w1 for w1,w2 in sorted(sum_discs.keys()) ]
    y = [ w2 for w1,w2 in sorted(sum_discs.keys()) ]
    d = [ v/len(dec_inter1) for k,v in sorted(sum_discs.items()) ]
    #fix titles of axes

    ax.set_xscale('log', basex=2)
    ax.set_yscale('log', basey=2)

    s = scatterplot.draw_actual_plot(ax, x,y,d, "All", scoretype1, scoretype2, cm='bwr')
    fig.colorbar(s,ax=ax)
    scatterplot.add_x_y_line(ax, 0,600)
    #ax.axhline(y=min_y)

    conv.save_fig(fig, output_pre, "_weights_v_disc", 3, len(dec_inter1)*3)