def plot(dec_inter1, dec_inter2, nat_inter1, nat_inter2, ax, pdb, title1, title2): d1e = scorefileparse.get_energies(dec_inter1[pdb]) d2e = scorefileparse.get_energies(dec_inter2[pdb]) n1e = scorefileparse.get_energies(nat_inter1[pdb]) n2e = scorefileparse.get_energies(nat_inter2[pdb]) r = scorefileparse.get_rmsd(dec_inter1[pdb]) scatterplot.draw_actual_plot(ax, d1e, d2e, r, pdb, title1,title2) #scatterplot.draw_actual_plot(ax, n1e, n2e, [], pdb, title1,title2) scatterplot.plot_regression(ax, scorefileparse.get_energies(dec_inter1[pdb])+scorefileparse.get_energies(nat_inter1[pdb]) ,scorefileparse.get_energies(dec_inter2[pdb])+scorefileparse.get_energies(nat_inter2[pdb]),False)
def find_pareto(dec_inter1, dec_inter2, pdb): d1e = scorefileparse.get_energies(dec_inter1[pdb]) d2e = scorefileparse.get_energies(dec_inter2[pdb]) r1 = scorefileparse.get_rmsd(dec_inter1[pdb]) d1e_ranks = gen_ranks(d1e) d2e_ranks = gen_ranks(d2e) pts = map(list, zip(d1e_ranks, d2e_ranks)) cleared, dominated = cull(pts, dominates) cleared_d = dict(cleared) pts_r = zip(d1e_ranks,d2e_ranks,r1,sorted(dec_inter1[pdb].keys())) pareto_equal_min = min([ e1+e2 for e1,e2 in cleared_d.items() ]) list_pts = [ (rosetta,amber,r,key) for rosetta,amber,r,key in pts_r if amber+rosetta == pareto_equal_min ] min_filename = find_lowest_point( list_pts ) return min_filename
def find_pareto(dec_inter1, dec_inter2, pdb): d1e = scorefileparse.get_energies(dec_inter1[pdb]) d2e = scorefileparse.get_energies(dec_inter2[pdb]) r1 = scorefileparse.get_rmsd(dec_inter1[pdb]) d1e_ranks = gen_ranks(d1e) d2e_ranks = gen_ranks(d2e) pts = map(list, zip(d1e_ranks, d2e_ranks)) cleared, dominated = cull(pts, dominates) cleared_d = dict(cleared) pts_r = zip(d1e_ranks, d2e_ranks, r1, sorted(dec_inter1[pdb].keys())) pareto_equal_min = min([e1 + e2 for e1, e2 in cleared_d.items()]) list_pts = [(rosetta, amber, r, key) for rosetta, amber, r, key in pts_r if amber + rosetta == pareto_equal_min] min_filename = find_lowest_point(list_pts) return min_filename
def main(path_prefix): scores_dict = scorefileparse.read_vals(path_prefix+"/final_score.sc", "rosetta", rmsd="rmsALL", trim=False) sorted_energies = sorted(scorefileparse.get_energies(scores_dict)) bottom_20_pct = sorted_energies[0:len(sorted_energies)/5] scores_bottom_20 = dict(( key, (e, r)) for key, (e, r) in scores_dict.items() if e in bottom_20_pct ) #sorted_rmsd = sorted(scorefileparse.get_rmsd(scores_bottom_20)) sorted_keys_by_r = sorted(scores_bottom_20.keys(), key=lambda x: scores_bottom_20[x][1]) length = float(len(sorted_keys_by_r)) sel_pdbs = [ sorted_keys_by_r[int(ceil(i * length / 50))] for i in range(50) ] #to see how long directory names are (dependent on NMR or xtal) test_dir = next(os.walk(path_prefix + "/"))[1][0] length_dir = len(test_dir.split("_")) for pdb in sel_pdbs: parent_dir = '_'.join(pdb.split("_")[0:length_dir]) src = "{0}/{1}/{2}.pdb".format(path_prefix,parent_dir,pdb) dst = "{0}/{1}.pdb".format(path_prefix,pdb) copyfile(src, dst)
def plot_pareto(dec_inter1, dec_inter2, nat_inter1, nat_inter2, ax, pdb, title1, title2): d1e = scorefileparse.get_energies(dec_inter1[pdb]) d2e = scorefileparse.get_energies(dec_inter2[pdb]) r1 = scorefileparse.get_rmsd(dec_inter1[pdb]) d1e_ranks = gen_ranks(d1e) d2e_ranks = gen_ranks(d2e) pts = map(list, zip(d1e_ranks, d2e_ranks)) cleared, dominated = cull(pts, dominates) cleared_d = dict(cleared) min_tuple = { "All" : (1000,1000,60), "ParetoRA" : (1000,1000,60), "Rosetta" : (1000,1000,60), "Amber" : (1000,60,60) } min_naive = { "All" : [], "Rosetta" : [], "Amber" : [] } for i in range(1, 11): w = i * 0.1 color = [] for (e1, e2), r in zip(pts,r1): #assign points to min_tuple if r < min_tuple["All"][2]: min_tuple["All"] = (e1, e2, r) if cleared_d.get(e1) == e2 and e1 <=10 and e2 <= 10 and r < min_tuple["ParetoRA"][2]: min_tuple["ParetoRA"] = (e1, e2, r) if e1 <= 10 and r < min_tuple["Rosetta"][2]: min_tuple["Rosetta"] = (e1, e2, r) if e2 <= 10 and r < min_tuple["Amber"][2]: min_tuple["Amber"] = (e1, e2, r) #assign colors to points if cleared_d.get(e1) == e2 and e1 <=10 and e2 <= 10: color.append((0, 0, 0)) #black elif cleared_d.get(e1) == e2 and e1 <= 10: color.append((161, 8, 8)) #dark red elif cleared_d.get(e1) == e2 and e2 <= 10: color.append((0, 153, 153)) #dark cyan elif e1 <= 10: color.append((255, 51, 51)) #red elif e2 <= 10: color.append((51, 255, 255)) #cyan elif cleared_d.get(e1) == e2: color.append((128, 128, 128)) #gray else: color.append((255,255,51)) #yellow #assign min_naive rosetta_min_e1 = min([ e1 for e1,e2 in pts if e1 <= 10 ]) amber_min_e2 = min([ e2 for e1,e2 in pts if e2 <= 10 ]) pts_r = zip(d1e_ranks,d2e_ranks,r1) min_naive["All"] = min_tuple["All"] min_naive["Rosetta"] = [ (rosetta,amber,r) for rosetta,amber,r in pts_r if rosetta_min_e1 == rosetta ][0] min_naive["Amber"] = [ (rosetta,amber,r) for rosetta,amber,r in pts_r if amber_min_e2 == amber ][0] for i in range(1, 11): w = i * 0.1 key = "ParetoR{0}".format(i) pareto_equal_min = min([ e1*w+e2 for e1,e2 in cleared_d.items() ]) list_pts = [ (rosetta,amber,r) for rosetta,amber, r in pts_r if amber+rosetta*w == pareto_equal_min ] min_naive[key] = find_lowest_point( list_pts ) key = "ParetoA{0}".format(i) pareto_equal_min = min([ e1+e2*w for e1,e2 in cleared_d.items() ]) list_pts = [ (rosetta,amber,r) for rosetta,amber, r in pts_r if amber*w+rosetta == pareto_equal_min ] min_naive[key] = find_lowest_point( list_pts ) color_converted = [ (c[0]/255.0, c[1]/255.0, c[2]/255.0) if hasattr(c, "__iter__") else '' for c in color ] scatterplot.draw_actual_plot(ax, d1e_ranks, d2e_ranks, color_converted, pdb, title1, title2, cm="summer", size=20) s = "{0}\t1".format(pdb) for k, (e1, e2, r) in min_tuple.items(): s += ("\t{3} {0:.0f},{1:.0f},{2:.1f}".format(e1, e2, r, k)) #print s s = "{0}\t2".format(pdb) keys_to_include = ["All", "Amber", "Rosetta", "ParetoR10"] for k, (e1, e2, r) in min_naive.items(): if k not in keys_to_include: continue s += "\t{0}".format(k) #s += " {0:.0f},{1:.0f},{2:.1f}".format(e1, e2, r) s += " {0:.1f}".format(r) print s return min_naive