Python read_dec_nat Examples, customIO.scorefileparse.read_dec_nat Python Examples

Example #1

0

Show file

File: plot_energy_v_energy.py Project: kmb413/DecoyDiscrimination

def main(args):
    #read in and rename arguments
    title1 = os.path.basename(args.input_dir_1)
    title2 = os.path.basename(args.input_dir_2)

    d1, n1 = scorefileparse.read_dec_nat(args.input_dir_1, [], args.scoretype1, True)
    d2, n2 = scorefileparse.read_dec_nat(args.input_dir_2, [], args.scoretype2, True)

    dec1 = scorefileparse.filter_pdbs_by_rmsd(d1, args.rmsd_cutoff)
    nat1 = scorefileparse.filter_pdbs_by_rmsd(n1, args.rmsd_cutoff)
    dec2 = scorefileparse.filter_pdbs_by_rmsd(d2, args.rmsd_cutoff)
    nat2 = scorefileparse.filter_pdbs_by_rmsd(n2, args.rmsd_cutoff)

    dec_norm1 = scorefileparse.norm_pdbs(dec1)
    nat_norm1 = scorefileparse.norm_pdbs(nat1,dec1)
    dec_norm2 = scorefileparse.norm_pdbs(dec2)
    nat_norm2 = scorefileparse.norm_pdbs(nat2,dec2)

    [dec_inter1, nat_inter1, dec_inter2, nat_inter2] = scorefileparse.pdbs_intersect([dec_norm1, nat_norm1, dec_norm2, nat_norm2]) 
    [dec_inter1, dec_inter2] = scorefileparse.pdbs_scores_intersect([dec_inter1, dec_inter2])       
    [nat_inter1, nat_inter2] = scorefileparse.pdbs_scores_intersect([nat_inter1, nat_inter2])       

    dec_filt1 = scorefileparse.filter_norm_pdbs(dec_norm1)
    nat_filt1 = scorefileparse.filter_norm_pdbs(nat_norm1)
    dec_filt2 = scorefileparse.filter_norm_pdbs(dec_norm2)
    nat_filt2 = scorefileparse.filter_norm_pdbs(nat_norm2)

    [dec_finter1, dec_finter2] = scorefileparse.pdbs_scores_intersect([dec_filt1, dec_filt2])
    [nat_finter1, nat_finter2] = scorefileparse.pdbs_scores_intersect([nat_filt1, nat_filt2])

    fig, axarr = conv.create_ax(2, len(dec_inter1))

    for x_ind,pdb in enumerate(sorted(dec_inter1.keys())):

        ax = axarr[x_ind, 0] 

	    plot(dec_inter1, dec_inter2, nat_inter1, nat_inter2, ax, pdb, title1, title2)

	    ax = axarr[x_ind, 1]

	    plot(dec_finter1, dec_finter2, nat_finter1, nat_finter2, ax, pdb, title1, title2)

Example #2

0

Show file

File: plot_energy_gaps.py Project: kmb413/DecoyDiscrimination

def main(args):
    #read in and rename arguments
    inp_dir=args[1]
    scoretype=args[2]

    dec, nat = scorefileparse.read_dec_nat(inp_dir, [], scoretype)

    disc = discparse.read_dir(inp_dir)

    dec_norm = scorefileparse.norm_pdbs(dec)
    nat_norm = scorefileparse.norm_pdbs(nat,dec)

    [dec_inter, nat_inter, disc_inter] = scorefileparse.pdbs_intersect([dec_norm, nat_norm, disc]) 

    #labels = ["Average","1.0","1.5","2.0","2.5","3.0","4.0","6.0"]
    labels = ["Average"]
    energy_gap = [[] for l in labels]
    avg_disc = [[] for l in labels]

    for pdb in dec_inter.keys():

        for ind in xrange(0,len(labels)):
            lowest_dec = min([ e[0] for e in dec_inter[pdb].values() ])
            lowest_nat = min([ n[0] for n in nat_inter[pdb].values() if n[1] < 2.0 ])
            energy_gap[ind].append(lowest_nat - lowest_dec)
            avg_disc[ind].append(disc_inter[pdb][0])

    fig, axarr = conv.create_ax(len(labels), 1)

    for x_ind,l in enumerate(labels):
        ax = axarr[0,x_ind] 

        scatterplot.draw_actual_plot(ax, avg_disc[x_ind], energy_gap[x_ind], [], l,"Disc","Energy Gap")

        scatterplot.plot_regression(ax, avg_disc[x_ind], energy_gap[x_ind], False, False)

    title = os.path.basename(inp_dir)

    filename=inp_dir + "/test.txt"

    conv.save_fig(fig, filename, "disc_v_egap", len(labels)*3, 4)

Example #3

0

Show file

File: find_best_solution.py Project: kmb413/DecoyDiscrimination

def main(input_dir_1, scoretype1, input_dir_2, scoretype2, rmsd_cutoff, output_pre ):
    #read in and rename arguments
    title1 = os.path.basename(input_dir_1)
    title2 = os.path.basename(input_dir_2)

    d1, n1 = scorefileparse.read_dec_nat(input_dir_1, scoretype1, repl_orig=False)
    d2, n2 = scorefileparse.read_dec_nat(input_dir_2, scoretype2, repl_orig=False)

    dec1 = scorefileparse.filter_pdbs_by_rmsd(d1, rmsd_cutoff)
    nat1 = scorefileparse.filter_pdbs_by_rmsd(n1, rmsd_cutoff)
    dec2 = scorefileparse.filter_pdbs_by_rmsd(d2, rmsd_cutoff)
    nat2 = scorefileparse.filter_pdbs_by_rmsd(n2, rmsd_cutoff)

    dec_norm1 = scorefileparse.norm_pdbs(dec1)
    nat_norm1 = scorefileparse.norm_pdbs(nat1,dec1)
    dec_norm2 = scorefileparse.norm_pdbs(dec2)
    nat_norm2 = scorefileparse.norm_pdbs(nat2,dec2)

    [dec_inter1, nat_inter1, dec_inter2, nat_inter2] = scorefileparse.pdbs_intersect([dec_norm1, nat_norm1, dec_norm2, nat_norm2]) 
    [dec_inter1, dec_inter2] = scorefileparse.pdbs_scores_intersect([dec_inter1, dec_inter2])       
    [nat_inter1, nat_inter2] = scorefileparse.pdbs_scores_intersect([nat_inter1, nat_inter2])       

    dec_filt1 = scorefileparse.filter_norm_pdbs(dec_norm1)
    nat_filt1 = scorefileparse.filter_norm_pdbs(nat_norm1)
    dec_filt2 = scorefileparse.filter_norm_pdbs(dec_norm2)
    nat_filt2 = scorefileparse.filter_norm_pdbs(nat_norm2)

    [dec_finter1, dec_finter2] = scorefileparse.pdbs_scores_intersect([dec_filt1, dec_filt2])
    [nat_finter1, nat_finter2] = scorefileparse.pdbs_scores_intersect([nat_filt1, nat_filt2])

    fig, axarr = conv.create_ax(2, len(dec_inter1))

    line_plot_data = {}

    min_naive_by_pdb = {}

    for x_ind,pdb in enumerate(sorted(dec_inter1.keys())):

        ax = axarr[x_ind, 0] 

        plot_r_v_r(dec_inter1, dec_inter2, nat_inter1, nat_inter2, ax, pdb, title1, title2)

        ax = axarr[x_ind, 1]

        min_naive = plot_pareto(dec_inter1, dec_inter2, nat_inter1, nat_inter2, ax, pdb, title1, title2)
        keys_to_include = ["Amber", "Rosetta","All","Pareto10"]
        for key, (rank1, rank2, rmsd) in min_naive.items():
	     #if key not in keys_to_include:
	     #    continue
	     if line_plot_data.get(key) is None:
	         line_plot_data[key] = ([],[])
       	     line_plot_data[key][0].append(pdb)
	     line_plot_data[key][1].append(rmsd)
	     if min_naive_by_pdb.get(pdb) is None:
                 min_naive_by_pdb[pdb] = {}
             min_naive_by_pdb[pdb][key] = rmsd

    #organize data
    indices = list(range(len(line_plot_data["All"][1])))
    indices.sort(key=lambda x: line_plot_data["All"][1][x])
    
    ranked_pdbs_by_rmsd_all = {}

    for i, x in enumerate(indices):
        ranked_pdbs_by_rmsd_all[line_plot_data["All"][0][x]] = i

    for label, (pdbs, rmsds) in line_plot_data.items():
	line_plot_data[label] = tuple(zip(*sorted(zip(pdbs,rmsds), key=lambda x: ranked_pdbs_by_rmsd_all[x[0]] )))    

    filename = output_pre + "/" + title1 + "_" + title2 + ".txt"   
    
    #suffix="rmsd_v_rmsd_{0}".format(rmsd_cutoff)
 
    #conv.save_fig(fig, filename, suffix, 7, len(dec_inter1)*3)

    #plot line plot
    all_pareto_labels = []

    for initial in ["R","A"]:
        ordered_labels = ["All", "Amber", "Rosetta"]
        for i in range(1,11):
            ordered_labels.append("Pareto{0}{1}".format(initial,i))
            all_pareto_labels.append("Pareto{0}{1}".format(initial,i))
        
        lines = [ (line_plot_data[label][0], line_plot_data[label][1], label) for label in ordered_labels ]

        fig2, axarr2 = conv.create_ax(1, len(ordered_labels), shx=True, shy=True)

        for i, label in enumerate(ordered_labels):

            line.plot_series(axarr2[i,0], lines[0:i+1], "RMSD vs. pdb", "PDB", "RMSD", linestyle='')
    
            conv.add_legend(axarr2[i,0])
        conv.save_fig(fig2, filename, "_line_{0}".format(initial), 10, len(ordered_labels)*5)

    #plot histogram plot

    hist_comp = [ ("Amber","All"), ("Rosetta", "All"), ("ParetoR10", "All"), ("ParetoA10", "All")]

    hist_comp.extend([ ("ParetoR{0}".format(ind),"Rosetta") for ind in range(1,11) ])
    hist_comp.extend([ ("ParetoR{0}".format(ind),"Amber") for ind in range(1,11) ])
    hist_comp.extend([ ("ParetoA{0}".format(ind),"Rosetta") for ind in range(1,11) ])
    hist_comp.extend([ ("ParetoA{0}".format(ind), "Amber") for ind in range(1,11) ])

    fig3, axarr3 = conv.create_ax(2, len(hist_comp), shx=False, shy=False)

    for ind, (top, bottom) in enumerate(hist_comp):
        gen_dist_plot(axarr3[ind,0], axarr3[ind,1], top, bottom, min_naive_by_pdb)

    conv.save_fig(fig3, filename, "_distdeltas", 7, len(hist_comp)*5, tight=False)

    #plot scatterplot
    fig4, axarr4 = conv.create_ax(10, 2)
    for i in range(1,11):
        gen_scatterplot(axarr4[0,i-1], "ParetoR{0}".format(i), "Rosetta", "Amber", min_naive_by_pdb)
        gen_scatterplot(axarr4[1,i-1], "ParetoA{0}".format(i), "Rosetta", "Amber", min_naive_by_pdb)

    conv.save_fig(fig4, filename, "_scattdeltas", 30, 6)

Example #4

0

Show file

File: weight_score_terms.py Project: kmb413/DecoyDiscrimination

def main(list_input_dirs, energies_names, output_pre):
    #read in and rename arguments
    inp_dir1=list_input_dirs[0][0]
    scoretype1=list_input_dirs[0][1]
    inp_dir2=list_input_dirs[1][0]
    scoretype2=list_input_dirs[1][1]

    title1 = os.path.basename(inp_dir1)
    title2 = os.path.basename(inp_dir2)

    column_dict = {}

    for c in energies_names:
        column_dict[c[0]] = c[1:]

    dec1, nat1 = scorefileparse.read_dec_nat(inp_dir1, energies_names[scoretype1], scoretype1)
    dec2, nat2 = scorefileparse.read_dec_nat(inp_dir2, energies_names[scoretype2], scoretype2)

    [dec_inter1, nat_inter1, dec_inter2, nat_inter2] = scorefileparse.pdbs_intersect([dec1, nat1, dec2, nat2]) 

    sum_discs = Counter()

    fig, axarr = conv.create_ax(1, len(dec_inter1)+1, True,True)

    for x_ind, pdb in enumerate(sorted(dec_inter1.keys())):

        discs_per_pdb = {}

        for w_1 in xrange(-10,10,2):
            for w_2 in xrange(-10,10,2): 
                weight_1 = 2 ** w_1
                weight_2 = 2 ** w_2
                weighted_1 = scorefileparse.weight_dict(dec_inter1[pdb], weight_1)
                weighted_2 = scorefileparse.weight_dict(dec_inter2[pdb], weight_2)
                merged = scorefileparse.merge_dicts([weighted_1, weighted_2])
                ddata1 = scorefileparse.convert_disc(merged)

                disc_divs = [1.0,1.5,2.0,2.5,3.0,4.0,6.0]

                disc1, d, counts = disc.given_data_run_disc(ddata1, True, disc_divs)
                discs_per_pdb[(weight_1,weight_2)] = disc1

        sorted_disc = sorted(discs_per_pdb.values())
        max_title = [ t for t,v in discs_per_pdb.items() if v == sorted_disc[0] ]
        
        #header_string = "\t".join("{0:.3f}-{1:.3f}".format(x,y) for x,y in sorted(discs_per_pdb.keys())) + "\tMax_Weight"
        #values_string = "\t".join(format(x, "10.3f") for (w1,w2),x in sorted(discs_per_pdb.items())) + "\t{0:.3f}".format(max_title[0])
        
        #print header_string
        #print values_string

        ax = axarr[x_ind, 0]

        #ax.set_xlim(-10, 600)
        #ax.set_ylim(-10, 600)

        ax.set_xscale('log', basex=2)
        ax.set_yscale('log', basey=2)

        x = [ w1 for (w1,w2) in sorted(discs_per_pdb.keys()) ]
        y = [ w2 for (w1,w2) in sorted(discs_per_pdb.keys()) ]
        d = [ v for k,v in sorted(discs_per_pdb.items()) ]
  
        min_y = min(discs_per_pdb.values())
        max_y = max(discs_per_pdb.values())
        #print min_y, max_y
        s = scatterplot.draw_actual_plot(ax, x, y, d, pdb, scoretype1, scoretype2, 'bwr')
        fig.colorbar(s,ax=ax)
        #ax.axhline(y=min_y)
        #ax.set_ylim(min_y-0.05,max_y+0.05)
        scatterplot.add_x_y_line(ax, 0,600)

        sum_discs.update(discs_per_pdb)

    #print "All PDBs {0}".format(len(dec_inter1))

    #sorted_disc = sorted(sum_discs.values())
    #max_title = [ t for t,v in sum_discs.items() if v == sorted_disc[0] ]

    #header_string = "\t".join(format(x, "10.3f") for x in sorted(sum_discs.keys())) + "\tMax_Weight"
    #values_string = "\t".join(format(x/len(dec_inter1), "10.3f") for key,x in sorted(sum_discs.items())) + "\t{0:.3f}".format(max_title[0])
  
    #print header_string
    #print values_string 

    ax = axarr[len(dec_inter1), 0]

    min_y = min(x/len(dec_inter1) for x in sum_discs.values())   
    max_y = max(x/len(dec_inter1) for x in sum_discs.values())

    x = [ w1 for w1,w2 in sorted(sum_discs.keys()) ]
    y = [ w2 for w1,w2 in sorted(sum_discs.keys()) ]
    d = [ v/len(dec_inter1) for k,v in sorted(sum_discs.items()) ]
    #fix titles of axes

    ax.set_xscale('log', basex=2)
    ax.set_yscale('log', basey=2)

    s = scatterplot.draw_actual_plot(ax, x,y,d, "All", scoretype1, scoretype2, cm='bwr')
    fig.colorbar(s,ax=ax)
    scatterplot.add_x_y_line(ax, 0,600)
    #ax.axhline(y=min_y)

    conv.save_fig(fig, output_pre, "_weights_v_disc", 3, len(dec_inter1)*3)