def main(args):
    #read in and rename arguments
    inp_dir=args[1]
    scoretype=args[2]

    dec, nat = scorefileparse.read_dec_nat(inp_dir, [], scoretype)

    disc = discparse.read_dir(inp_dir)

    dec_norm = scorefileparse.norm_pdbs(dec)
    nat_norm = scorefileparse.norm_pdbs(nat,dec)

    [dec_inter, nat_inter, disc_inter] = scorefileparse.pdbs_intersect([dec_norm, nat_norm, disc]) 

    #labels = ["Average","1.0","1.5","2.0","2.5","3.0","4.0","6.0"]
    labels = ["Average"]
    energy_gap = [[] for l in labels]
    avg_disc = [[] for l in labels]

    for pdb in dec_inter.keys():

        for ind in xrange(0,len(labels)):
            lowest_dec = min([ e[0] for e in dec_inter[pdb].values() ])
            lowest_nat = min([ n[0] for n in nat_inter[pdb].values() if n[1] < 2.0 ])
            energy_gap[ind].append(lowest_nat - lowest_dec)
            avg_disc[ind].append(disc_inter[pdb][0])

    fig, axarr = conv.create_ax(len(labels), 1)

    for x_ind,l in enumerate(labels):
        ax = axarr[0,x_ind] 

        scatterplot.draw_actual_plot(ax, avg_disc[x_ind], energy_gap[x_ind], [], l,"Disc","Energy Gap")

        scatterplot.plot_regression(ax, avg_disc[x_ind], energy_gap[x_ind], False, False)

    title = os.path.basename(inp_dir)

    filename=inp_dir + "/test.txt"

    conv.save_fig(fig, filename, "disc_v_egap", len(labels)*3, 4)
def main(args):
    #read in and rename arguments
    title1 = os.path.basename(args.input_dir_1)
    title2 = os.path.basename(args.input_dir_2)

    d1, n1 = scorefileparse.read_dec_nat(args.input_dir_1, [], args.scoretype1, True)
    d2, n2 = scorefileparse.read_dec_nat(args.input_dir_2, [], args.scoretype2, True)

    dec1 = scorefileparse.filter_pdbs_by_rmsd(d1, args.rmsd_cutoff)
    nat1 = scorefileparse.filter_pdbs_by_rmsd(n1, args.rmsd_cutoff)
    dec2 = scorefileparse.filter_pdbs_by_rmsd(d2, args.rmsd_cutoff)
    nat2 = scorefileparse.filter_pdbs_by_rmsd(n2, args.rmsd_cutoff)

    dec_norm1 = scorefileparse.norm_pdbs(dec1)
    nat_norm1 = scorefileparse.norm_pdbs(nat1,dec1)
    dec_norm2 = scorefileparse.norm_pdbs(dec2)
    nat_norm2 = scorefileparse.norm_pdbs(nat2,dec2)

    [dec_inter1, nat_inter1, dec_inter2, nat_inter2] = scorefileparse.pdbs_intersect([dec_norm1, nat_norm1, dec_norm2, nat_norm2]) 
    [dec_inter1, dec_inter2] = scorefileparse.pdbs_scores_intersect([dec_inter1, dec_inter2])       
    [nat_inter1, nat_inter2] = scorefileparse.pdbs_scores_intersect([nat_inter1, nat_inter2])       

    dec_filt1 = scorefileparse.filter_norm_pdbs(dec_norm1)
    nat_filt1 = scorefileparse.filter_norm_pdbs(nat_norm1)
    dec_filt2 = scorefileparse.filter_norm_pdbs(dec_norm2)
    nat_filt2 = scorefileparse.filter_norm_pdbs(nat_norm2)

    [dec_finter1, dec_finter2] = scorefileparse.pdbs_scores_intersect([dec_filt1, dec_filt2])
    [nat_finter1, nat_finter2] = scorefileparse.pdbs_scores_intersect([nat_filt1, nat_filt2])

    fig, axarr = conv.create_ax(2, len(dec_inter1))

    for x_ind,pdb in enumerate(sorted(dec_inter1.keys())):

        ax = axarr[x_ind, 0] 

	    plot(dec_inter1, dec_inter2, nat_inter1, nat_inter2, ax, pdb, title1, title2)

	    ax = axarr[x_ind, 1]

	    plot(dec_finter1, dec_finter2, nat_finter1, nat_finter2, ax, pdb, title1, title2)
def main(input_dir_rosetta_sf, input_dir_amber_sf, input_dir_rosetta_pdb, input_dir_amber_pdb, output_dir, n_results):

    d1 = scorefileparse.read_dir(input_dir_rosetta_sf, 'rosetta', repl_orig=True)
    d2 = scorefileparse.read_dir(input_dir_amber_sf, 'amber', repl_orig=True)

    dec_norm1 = scorefileparse.norm_pdbs(d1)
    dec_norm2 = scorefileparse.norm_pdbs(d2)

    [dec_inter1, dec_inter2] = scorefileparse.pdbs_scores_intersect([dec_norm1, dec_norm2])       

    dec1_cp = copy.deepcopy(dec_inter1)
    dec2_cp = copy.deepcopy(dec_inter2)

    min_naive_by_pdb = {}

    for pdb in sorted(dec_inter1.keys()):
        for i in range(1,n_results+1):
            rosetta_lowest_energy = find_lowest_energy( dec_inter1[pdb] )
            amber_lowest_energy = find_lowest_energy( dec_inter2[pdb] )
            pareto_lowest_energy = find_pareto(dec1_cp, dec2_cp, pdb)

            #copy rosetta file
            src = os.join(input_dir_rosetta_pdb, pdb, rosetta_lowest_energy + "_0001.pdb")
            dst = os.join(output_dir,"rosetta","{0}_{1}.pdb".format(pdb,i)) 
            copyfile(src, dst)
            #copy amber file
            src = os.join(input_dir_rosetta_pdb, pdb, "min_NoH_" + amber_lowest_energy + ".pdb")
            dst = os.join(output_dir,"amber","{0}_{1}.pdb".format(pdb,i))
            copyfile(src, dst)
            #copy pareto file
            src = os.join(input_dir_rosetta_pdb, pdb, pareto_lowest_energy + "_0001.pdb")
            dst = os.join(output_dir,"combined","{0}_{1}.pdb".format(pdb,i))
            copyfile(src, dst)

            #delete from original scoredict so that next round will get the next-lowest
            dec_inter1[pdb].pop(rosetta_lowest_energy)
            dec_inter2[pdb].pop(amber_lowest_energy)
            dec1_cp[pdb].pop(pareto_lowest_energy)
            dec2_cp[pdb].pop(pareto_lowest_energy)
Esempio n. 4
0
def main(input_dir_rosetta_sf, input_dir_amber_sf, input_dir_rosetta_pdb, input_dir_amber_pdb, output_dir, n_results, alternate_rmsd, start_ind_desc_amber, start_ind_desc_rosetta, end_ind_desc_amber, end_ind_desc_rosetta, add_pdb_to_path):

    if alternate_rmsd is True:
        d1 = scorefileparse.read_dir(input_dir_rosetta_sf, 'rosetta', repl_orig=False, rmsd='total_score', start_ind_desc=start_ind_desc_rosetta, end_ind_desc=end_ind_desc_rosetta)
        d2 = scorefileparse.read_dir(input_dir_amber_sf, 'amber', repl_orig=False, rmsd='tot', start_ind_desc=start_ind_desc_amber, end_ind_desc=end_ind_desc_amber)
    else:
	d1 = scorefileparse.read_dir(input_dir_rosetta_sf, 'rosetta', repl_orig=False, start_ind_desc=start_ind_desc_rosetta, end_ind_desc=end_ind_desc_rosetta)
        d2 = scorefileparse.read_dir(input_dir_amber_sf, 'amber', repl_orig=False, start_ind_desc=start_ind_desc_amber, end_ind_desc=end_ind_desc_amber)
    
    dec_norm1 = scorefileparse.norm_pdbs(d1)
    dec_norm2 = scorefileparse.norm_pdbs(d2)
    
    [dec_inter1, dec_inter2] = scorefileparse.pdbs_scores_intersect([dec_norm1, dec_norm2])       
    
    dec1_cp = copy.deepcopy(dec_inter1)
    dec2_cp = copy.deepcopy(dec_inter2)

    try:
        os.mkdir( os.path.join(output_dir,"rosetta") )
    except OSError:
        pass
    try:
        os.mkdir( os.path.join(output_dir,"amber") )
    except OSError:
        pass
    try:
        os.mkdir( os.path.join(output_dir,"combined") )
    except OSError:
        pass
    
    for pdb in sorted(dec_inter1.keys()):
        for i in range(1,n_results+1):
            rosetta_lowest_energy = find_lowest_energy( dec_inter1[pdb] )
            amber_lowest_energy = find_lowest_energy( dec_inter2[pdb] )
            pareto_lowest_energy = find_pareto(dec1_cp, dec2_cp, pdb)
	    
	    if add_pdb_to_path == "True": 
                path_p = pdb
	    else:
		path_p = "" 
	    
            if end_ind_desc_rosetta == 1000:
                r_suffix = ".pdb"
            else:
                r_suffix = "_0001.pdb"

	    if start_ind_desc_amber is None:
                a_prefix = "min_NoH_"
            else:
		a_prefix = "min_"

            if end_ind_desc_amber is None:
                a_suffix = ".pdb"
            else:
                a_suffix = ".pdb.pdb"
	
	    #copy rosetta file
            src = os.path.join(input_dir_rosetta_pdb, path_p, rosetta_lowest_energy + r_suffix)
            dst = os.path.join(output_dir,"rosetta","{0}_{1}.pdb".format(pdb,i)) 
            copyfile(src, dst)
            #copy amber file
            src = os.path.join(input_dir_amber_pdb, path_p, a_prefix + amber_lowest_energy + a_suffix) 
            dst = os.path.join(output_dir,"amber","{0}_{1}.pdb".format(pdb,i))
            copyfile(src, dst)
            #copy pareto file
            src = os.path.join(input_dir_rosetta_pdb, path_p, pareto_lowest_energy + r_suffix)
            dst = os.path.join(output_dir,"combined","{0}_{1}.pdb".format(pdb,i))
            copyfile(src, dst)

            #delete from original scoredict so that next round will get the next-lowest
            dec_inter1[pdb].pop(rosetta_lowest_energy)
            dec_inter2[pdb].pop(amber_lowest_energy)
            dec1_cp[pdb].pop(pareto_lowest_energy)
            dec2_cp[pdb].pop(pareto_lowest_energy)
Esempio n. 5
0
def main(input_dir_rosetta_sf, input_dir_amber_sf, input_dir_rosetta_pdb,
         input_dir_amber_pdb, output_dir, n_results, alternate_rmsd,
         start_ind_desc_amber, start_ind_desc_rosetta, end_ind_desc_amber,
         end_ind_desc_rosetta, add_pdb_to_path):

    if alternate_rmsd is True:
        d1 = scorefileparse.read_dir(input_dir_rosetta_sf,
                                     'rosetta',
                                     repl_orig=False,
                                     rmsd='total_score',
                                     start_ind_desc=start_ind_desc_rosetta,
                                     end_ind_desc=end_ind_desc_rosetta)
        d2 = scorefileparse.read_dir(input_dir_amber_sf,
                                     'amber',
                                     repl_orig=False,
                                     rmsd='tot',
                                     start_ind_desc=start_ind_desc_amber,
                                     end_ind_desc=end_ind_desc_amber)
    else:
        d1 = scorefileparse.read_dir(input_dir_rosetta_sf,
                                     'rosetta',
                                     repl_orig=False,
                                     start_ind_desc=start_ind_desc_rosetta,
                                     end_ind_desc=end_ind_desc_rosetta)
        d2 = scorefileparse.read_dir(input_dir_amber_sf,
                                     'amber',
                                     repl_orig=False,
                                     start_ind_desc=start_ind_desc_amber,
                                     end_ind_desc=end_ind_desc_amber)

    dec_norm1 = scorefileparse.norm_pdbs(d1)
    dec_norm2 = scorefileparse.norm_pdbs(d2)

    [dec_inter1,
     dec_inter2] = scorefileparse.pdbs_scores_intersect([dec_norm1, dec_norm2])

    dec1_cp = copy.deepcopy(dec_inter1)
    dec2_cp = copy.deepcopy(dec_inter2)

    try:
        os.mkdir(os.path.join(output_dir, "rosetta"))
    except OSError:
        pass
    try:
        os.mkdir(os.path.join(output_dir, "amber"))
    except OSError:
        pass
    try:
        os.mkdir(os.path.join(output_dir, "combined"))
    except OSError:
        pass

    for pdb in sorted(dec_inter1.keys()):
        for i in range(1, n_results + 1):
            rosetta_lowest_energy = find_lowest_energy(dec_inter1[pdb])
            amber_lowest_energy = find_lowest_energy(dec_inter2[pdb])
            pareto_lowest_energy = find_pareto(dec1_cp, dec2_cp, pdb)

            if add_pdb_to_path == "True":
                path_p = pdb
            else:
                path_p = ""

            if end_ind_desc_rosetta == 1000:
                r_suffix = ".pdb"
            else:
                r_suffix = "_0001.pdb"

            if start_ind_desc_amber is None:
                a_prefix = "min_NoH_"
            else:
                a_prefix = "min_"

            if end_ind_desc_amber is None:
                a_suffix = ".pdb"
            else:
                a_suffix = ".pdb.pdb"

#copy rosetta file
            src = os.path.join(input_dir_rosetta_pdb, path_p,
                               rosetta_lowest_energy + r_suffix)
            dst = os.path.join(output_dir, "rosetta",
                               "{0}_{1}.pdb".format(pdb, i))
            copyfile(src, dst)
            #copy amber file
            src = os.path.join(input_dir_amber_pdb, path_p,
                               a_prefix + amber_lowest_energy + a_suffix)
            dst = os.path.join(output_dir, "amber",
                               "{0}_{1}.pdb".format(pdb, i))
            copyfile(src, dst)
            #copy pareto file
            src = os.path.join(input_dir_rosetta_pdb, path_p,
                               pareto_lowest_energy + r_suffix)
            dst = os.path.join(output_dir, "combined",
                               "{0}_{1}.pdb".format(pdb, i))
            copyfile(src, dst)

            #delete from original scoredict so that next round will get the next-lowest
            dec_inter1[pdb].pop(rosetta_lowest_energy)
            dec_inter2[pdb].pop(amber_lowest_energy)
            dec1_cp[pdb].pop(pareto_lowest_energy)
            dec2_cp[pdb].pop(pareto_lowest_energy)
def main(input_dir_1, scoretype1, input_dir_2, scoretype2, rmsd_cutoff, output_pre ):
    #read in and rename arguments
    title1 = os.path.basename(input_dir_1)
    title2 = os.path.basename(input_dir_2)

    d1, n1 = scorefileparse.read_dec_nat(input_dir_1, scoretype1, repl_orig=False)
    d2, n2 = scorefileparse.read_dec_nat(input_dir_2, scoretype2, repl_orig=False)

    dec1 = scorefileparse.filter_pdbs_by_rmsd(d1, rmsd_cutoff)
    nat1 = scorefileparse.filter_pdbs_by_rmsd(n1, rmsd_cutoff)
    dec2 = scorefileparse.filter_pdbs_by_rmsd(d2, rmsd_cutoff)
    nat2 = scorefileparse.filter_pdbs_by_rmsd(n2, rmsd_cutoff)

    dec_norm1 = scorefileparse.norm_pdbs(dec1)
    nat_norm1 = scorefileparse.norm_pdbs(nat1,dec1)
    dec_norm2 = scorefileparse.norm_pdbs(dec2)
    nat_norm2 = scorefileparse.norm_pdbs(nat2,dec2)

    [dec_inter1, nat_inter1, dec_inter2, nat_inter2] = scorefileparse.pdbs_intersect([dec_norm1, nat_norm1, dec_norm2, nat_norm2]) 
    [dec_inter1, dec_inter2] = scorefileparse.pdbs_scores_intersect([dec_inter1, dec_inter2])       
    [nat_inter1, nat_inter2] = scorefileparse.pdbs_scores_intersect([nat_inter1, nat_inter2])       

    dec_filt1 = scorefileparse.filter_norm_pdbs(dec_norm1)
    nat_filt1 = scorefileparse.filter_norm_pdbs(nat_norm1)
    dec_filt2 = scorefileparse.filter_norm_pdbs(dec_norm2)
    nat_filt2 = scorefileparse.filter_norm_pdbs(nat_norm2)

    [dec_finter1, dec_finter2] = scorefileparse.pdbs_scores_intersect([dec_filt1, dec_filt2])
    [nat_finter1, nat_finter2] = scorefileparse.pdbs_scores_intersect([nat_filt1, nat_filt2])

    fig, axarr = conv.create_ax(2, len(dec_inter1))

    line_plot_data = {}

    min_naive_by_pdb = {}

    for x_ind,pdb in enumerate(sorted(dec_inter1.keys())):

        ax = axarr[x_ind, 0] 

        plot_r_v_r(dec_inter1, dec_inter2, nat_inter1, nat_inter2, ax, pdb, title1, title2)

        ax = axarr[x_ind, 1]

        min_naive = plot_pareto(dec_inter1, dec_inter2, nat_inter1, nat_inter2, ax, pdb, title1, title2)
        keys_to_include = ["Amber", "Rosetta","All","Pareto10"]
        for key, (rank1, rank2, rmsd) in min_naive.items():
	     #if key not in keys_to_include:
	     #    continue
	     if line_plot_data.get(key) is None:
	         line_plot_data[key] = ([],[])
       	     line_plot_data[key][0].append(pdb)
	     line_plot_data[key][1].append(rmsd)
	     if min_naive_by_pdb.get(pdb) is None:
                 min_naive_by_pdb[pdb] = {}
             min_naive_by_pdb[pdb][key] = rmsd

    #organize data
    indices = list(range(len(line_plot_data["All"][1])))
    indices.sort(key=lambda x: line_plot_data["All"][1][x])
    
    ranked_pdbs_by_rmsd_all = {}

    for i, x in enumerate(indices):
        ranked_pdbs_by_rmsd_all[line_plot_data["All"][0][x]] = i

    for label, (pdbs, rmsds) in line_plot_data.items():
	line_plot_data[label] = tuple(zip(*sorted(zip(pdbs,rmsds), key=lambda x: ranked_pdbs_by_rmsd_all[x[0]] )))    

    filename = output_pre + "/" + title1 + "_" + title2 + ".txt"   
    
    #suffix="rmsd_v_rmsd_{0}".format(rmsd_cutoff)
 
    #conv.save_fig(fig, filename, suffix, 7, len(dec_inter1)*3)

    #plot line plot
    all_pareto_labels = []

    for initial in ["R","A"]:
        ordered_labels = ["All", "Amber", "Rosetta"]
        for i in range(1,11):
            ordered_labels.append("Pareto{0}{1}".format(initial,i))
            all_pareto_labels.append("Pareto{0}{1}".format(initial,i))
        
        lines = [ (line_plot_data[label][0], line_plot_data[label][1], label) for label in ordered_labels ]

        fig2, axarr2 = conv.create_ax(1, len(ordered_labels), shx=True, shy=True)

        for i, label in enumerate(ordered_labels):

            line.plot_series(axarr2[i,0], lines[0:i+1], "RMSD vs. pdb", "PDB", "RMSD", linestyle='')
    
            conv.add_legend(axarr2[i,0])
        conv.save_fig(fig2, filename, "_line_{0}".format(initial), 10, len(ordered_labels)*5)

    #plot histogram plot

    hist_comp = [ ("Amber","All"), ("Rosetta", "All"), ("ParetoR10", "All"), ("ParetoA10", "All")]

    hist_comp.extend([ ("ParetoR{0}".format(ind),"Rosetta") for ind in range(1,11) ])
    hist_comp.extend([ ("ParetoR{0}".format(ind),"Amber") for ind in range(1,11) ])
    hist_comp.extend([ ("ParetoA{0}".format(ind),"Rosetta") for ind in range(1,11) ])
    hist_comp.extend([ ("ParetoA{0}".format(ind), "Amber") for ind in range(1,11) ])

    fig3, axarr3 = conv.create_ax(2, len(hist_comp), shx=False, shy=False)

    for ind, (top, bottom) in enumerate(hist_comp):
        gen_dist_plot(axarr3[ind,0], axarr3[ind,1], top, bottom, min_naive_by_pdb)

    conv.save_fig(fig3, filename, "_distdeltas", 7, len(hist_comp)*5, tight=False)

    #plot scatterplot
    fig4, axarr4 = conv.create_ax(10, 2)
    for i in range(1,11):
        gen_scatterplot(axarr4[0,i-1], "ParetoR{0}".format(i), "Rosetta", "Amber", min_naive_by_pdb)
        gen_scatterplot(axarr4[1,i-1], "ParetoA{0}".format(i), "Rosetta", "Amber", min_naive_by_pdb)

    conv.save_fig(fig4, filename, "_scattdeltas", 30, 6)