def gen_scatterplot(ax, x_axis, y_axis, z_axis, min_naive_by_pdb): x_deltas = get_dist_deltas(x_axis, "All", min_naive_by_pdb) y_deltas = get_dist_deltas(y_axis, "All", min_naive_by_pdb) z_deltas = get_dist_deltas(z_axis, "All", min_naive_by_pdb) #c_deltas = get_dist_deltas("All", None, min_naive_by_pdb) scatterplot.draw_actual_plot(ax, x_deltas, y_deltas, 'k', x_axis, x_axis + " Delta to Min RMSD (A)", "Delta to Min RMSD (A)", size=15, label=y_axis) scatterplot.draw_actual_plot(ax, x_deltas, z_deltas, 'r', x_axis, x_axis + " Delta to Min RMSD (A)", "Delta to Min RMSD (A)", size=15, label=z_axis) scatterplot.add_x_y_line(ax) conv.add_legend(ax)
def main(seq_file, canonical_file, output_prefix): series = [] canonical_list_seq = seq_IO.read_sequences(canonical_file) print "Beginning Script: {0}".format(datetime.datetime.now()) for canonical in canonical_list_seq: with open(seq_file) as strings: seq_list = strings.read().splitlines() seq_ind_list = [ (seq, ind) for ind, seq in enumerate(seq_list) ] orig_len = len(seq_ind_list) if canonical not in seq_list: one_away = gsconv.gen_hamdist_one(canonical) one_away = [ o for o in one_away if o != canonical ] + [canonical] seq_ind_list = seq_ind_list[:] + [ (o, ind) for (ind, o) in enumerate(one_away, len(seq_ind_list)) ] edges = [(seq2,seq) for seq,seq2 in itertools.combinations(seq_ind_list,2) if gsconv.hamdist(seq2[0],seq[0]) < 2 ] print len(seq_ind_list) print "Generated Edges: {0}".format(datetime.datetime.now()) numpy.set_printoptions(threshold='nan') canon_ind=[ i for (s, i) in seq_ind_list if s == canonical ][0] T_mat = trans_matrix(seq_ind_list,edges) #print raise_matrix(T_mat,1) #print raise_matrix(T_mat,3) #T = raise_matrix(T_mat,10) #T = raise_matrix(T_mat,20) x = [0] y = [0] print "Transformed Matrix: {0}".format(datetime.datetime.now()) x.append(1) y.append(find_frac(T_mat, canon_ind, orig_len)) T_mat_new = T_mat for i in range(2,23): x.append(i) T_mat_new, frac = square_matrix(T_mat_new,T_mat,canon_ind, orig_len) y.append(frac) print "Raised Matrix {0}: {1}".format(i, datetime.datetime.now()) series.append([x,y,canonical]) fig, ax = conv.create_ax(1, 1) color=['orange', 'palevioletred', 'mediumaquamarine', 'deepskyblue'] scatterplot.plot_series( ax[0,0], series, title="", x_axis="Number of Steps", colors=color, y_axis="Fraction Cleaved Variants Reached", alpha=0.85, connect_dots=True, size=15, edgecolors='k', linewidth=0) ax[0,0].set_xlim(xmin=1) ax[0,0].set_ylim(ymin=0.0, ymax=1.0) ax[0,0].set_xticks(xrange(1,23,3)) lgd = conv.add_legend(ax[0,0], location='upper center', bbox_to_anchor=(0.5, 1.05), ncol=2, size=8) conv.save_fig(fig, output_prefix, "fraction_func", 2.5, 3, size=9.5, extra_artists=lgd) print "Outputted Figure: {0}".format(datetime.datetime.now())
def main(input_dir_1, scoretype1, input_dir_2, scoretype2, rmsd_cutoff, output_pre ): #read in and rename arguments title1 = os.path.basename(input_dir_1) title2 = os.path.basename(input_dir_2) d1, n1 = scorefileparse.read_dec_nat(input_dir_1, scoretype1, repl_orig=False) d2, n2 = scorefileparse.read_dec_nat(input_dir_2, scoretype2, repl_orig=False) dec1 = scorefileparse.filter_pdbs_by_rmsd(d1, rmsd_cutoff) nat1 = scorefileparse.filter_pdbs_by_rmsd(n1, rmsd_cutoff) dec2 = scorefileparse.filter_pdbs_by_rmsd(d2, rmsd_cutoff) nat2 = scorefileparse.filter_pdbs_by_rmsd(n2, rmsd_cutoff) dec_norm1 = scorefileparse.norm_pdbs(dec1) nat_norm1 = scorefileparse.norm_pdbs(nat1,dec1) dec_norm2 = scorefileparse.norm_pdbs(dec2) nat_norm2 = scorefileparse.norm_pdbs(nat2,dec2) [dec_inter1, nat_inter1, dec_inter2, nat_inter2] = scorefileparse.pdbs_intersect([dec_norm1, nat_norm1, dec_norm2, nat_norm2]) [dec_inter1, dec_inter2] = scorefileparse.pdbs_scores_intersect([dec_inter1, dec_inter2]) [nat_inter1, nat_inter2] = scorefileparse.pdbs_scores_intersect([nat_inter1, nat_inter2]) dec_filt1 = scorefileparse.filter_norm_pdbs(dec_norm1) nat_filt1 = scorefileparse.filter_norm_pdbs(nat_norm1) dec_filt2 = scorefileparse.filter_norm_pdbs(dec_norm2) nat_filt2 = scorefileparse.filter_norm_pdbs(nat_norm2) [dec_finter1, dec_finter2] = scorefileparse.pdbs_scores_intersect([dec_filt1, dec_filt2]) [nat_finter1, nat_finter2] = scorefileparse.pdbs_scores_intersect([nat_filt1, nat_filt2]) fig, axarr = conv.create_ax(2, len(dec_inter1)) line_plot_data = {} min_naive_by_pdb = {} for x_ind,pdb in enumerate(sorted(dec_inter1.keys())): ax = axarr[x_ind, 0] plot_r_v_r(dec_inter1, dec_inter2, nat_inter1, nat_inter2, ax, pdb, title1, title2) ax = axarr[x_ind, 1] min_naive = plot_pareto(dec_inter1, dec_inter2, nat_inter1, nat_inter2, ax, pdb, title1, title2) keys_to_include = ["Amber", "Rosetta","All","Pareto10"] for key, (rank1, rank2, rmsd) in min_naive.items(): #if key not in keys_to_include: # continue if line_plot_data.get(key) is None: line_plot_data[key] = ([],[]) line_plot_data[key][0].append(pdb) line_plot_data[key][1].append(rmsd) if min_naive_by_pdb.get(pdb) is None: min_naive_by_pdb[pdb] = {} min_naive_by_pdb[pdb][key] = rmsd #organize data indices = list(range(len(line_plot_data["All"][1]))) indices.sort(key=lambda x: line_plot_data["All"][1][x]) ranked_pdbs_by_rmsd_all = {} for i, x in enumerate(indices): ranked_pdbs_by_rmsd_all[line_plot_data["All"][0][x]] = i for label, (pdbs, rmsds) in line_plot_data.items(): line_plot_data[label] = tuple(zip(*sorted(zip(pdbs,rmsds), key=lambda x: ranked_pdbs_by_rmsd_all[x[0]] ))) filename = output_pre + "/" + title1 + "_" + title2 + ".txt" #suffix="rmsd_v_rmsd_{0}".format(rmsd_cutoff) #conv.save_fig(fig, filename, suffix, 7, len(dec_inter1)*3) #plot line plot all_pareto_labels = [] for initial in ["R","A"]: ordered_labels = ["All", "Amber", "Rosetta"] for i in range(1,11): ordered_labels.append("Pareto{0}{1}".format(initial,i)) all_pareto_labels.append("Pareto{0}{1}".format(initial,i)) lines = [ (line_plot_data[label][0], line_plot_data[label][1], label) for label in ordered_labels ] fig2, axarr2 = conv.create_ax(1, len(ordered_labels), shx=True, shy=True) for i, label in enumerate(ordered_labels): line.plot_series(axarr2[i,0], lines[0:i+1], "RMSD vs. pdb", "PDB", "RMSD", linestyle='') conv.add_legend(axarr2[i,0]) conv.save_fig(fig2, filename, "_line_{0}".format(initial), 10, len(ordered_labels)*5) #plot histogram plot hist_comp = [ ("Amber","All"), ("Rosetta", "All"), ("ParetoR10", "All"), ("ParetoA10", "All")] hist_comp.extend([ ("ParetoR{0}".format(ind),"Rosetta") for ind in range(1,11) ]) hist_comp.extend([ ("ParetoR{0}".format(ind),"Amber") for ind in range(1,11) ]) hist_comp.extend([ ("ParetoA{0}".format(ind),"Rosetta") for ind in range(1,11) ]) hist_comp.extend([ ("ParetoA{0}".format(ind), "Amber") for ind in range(1,11) ]) fig3, axarr3 = conv.create_ax(2, len(hist_comp), shx=False, shy=False) for ind, (top, bottom) in enumerate(hist_comp): gen_dist_plot(axarr3[ind,0], axarr3[ind,1], top, bottom, min_naive_by_pdb) conv.save_fig(fig3, filename, "_distdeltas", 7, len(hist_comp)*5, tight=False) #plot scatterplot fig4, axarr4 = conv.create_ax(10, 2) for i in range(1,11): gen_scatterplot(axarr4[0,i-1], "ParetoR{0}".format(i), "Rosetta", "Amber", min_naive_by_pdb) gen_scatterplot(axarr4[1,i-1], "ParetoA{0}".format(i), "Rosetta", "Amber", min_naive_by_pdb) conv.save_fig(fig4, filename, "_scattdeltas", 30, 6)
def main(seq_file, canonical_file, output_prefix): #canonical_list_seq = seq_IO.read_sequences(canonical_file) canonical_list_seq = ["DEMEE", "DEMED"] print "Beginning Script: {0}".format(datetime.datetime.now()) with open(seq_file) as strings: seq_list = strings.read().splitlines() seq_ind_list = [(seq, ind) for ind, seq in enumerate(seq_list)] seq_ind_dict = {seq: ind for seq, ind in seq_ind_list} orig_len = len(seq_ind_list) edges = [] edges_set = set() print "Read in Data: {0}".format(datetime.datetime.now()) for seq, seq_ind in seq_ind_dict.items(): neighbors = gsconv.gen_hamdist_one(seq) edges_set.update([(seq, n) for n in neighbors if n in seq_ind_dict]) edges += [((seq, seq_ind), (n, seq_ind_dict[n])) for n in neighbors if n in seq_ind_dict and (n, seq) not in edges_set] print len(seq_ind_list) print "Generated Edges: {0}".format(datetime.datetime.now()) numpy.set_printoptions(threshold='nan') canon_ind_dict = { canonical: [i for (s, i) in seq_ind_list if s == canonical][0] for canonical in canonical_list_seq } T_mat = trans_matrix(seq_ind_list, edges) #print raise_matrix(T_mat,1) #print raise_matrix(T_mat,3) #T = raise_matrix(T_mat,10) #T = raise_matrix(T_mat,20) print "Transformed Matrix: {0}".format(datetime.datetime.now()) canon_x = {can: [0, 1] for can in canonical_list_seq} canon_y = { can: [0.0, find_frac(T_mat, canon_ind_dict[can], orig_len)] for can in canonical_list_seq } print "Made x and y dicts: {0}".format(datetime.datetime.now()) T_mat_new = T_mat for i in range(2, 23): T_mat_new = square_matrix(T_mat_new, T_mat) for can in canonical_list_seq: canon_x[can].append(i) canon_y[can].append( find_frac(T_mat_new, canon_ind_dict[can], orig_len)) print "Raised Matrix {0}: {1}".format(i, datetime.datetime.now()) series = [[canon_x[can], canon_y[can], can] for can in canonical_list_seq] fig, ax = conv.create_ax(1, 1) color = ['orange', 'palevioletred', 'mediumaquamarine', 'deepskyblue'] scatterplot.plot_series(ax[0, 0], series, title="", x_axis="Number of Steps", colors=color, y_axis="Fraction Cleaved Variants Reached", alpha=0.85, connect_dots=True, size=15, edgecolors='k', linewidth=0) ax[0, 0].set_xlim(xmin=1) ax[0, 0].set_ylim(ymin=0.0, ymax=1.0) ax[0, 0].set_xticks(xrange(1, 23, 3)) lgd = conv.add_legend(ax[0, 0], location='upper center', bbox_to_anchor=(0.5, 1.05), ncol=2, size=8) conv.save_fig(fig, output_prefix, "fraction_func", 2.5, 3, size=9.5, extra_artists=lgd) print "Outputted Figure: {0}".format(datetime.datetime.now())
def main(seq_file, canonical_file, output_prefix): #canonical_list_seq = seq_IO.read_sequences(canonical_file) canonical_list_seq = ["DEMEE","DEMED"] print "Beginning Script: {0}".format(datetime.datetime.now()) with open(seq_file) as strings: seq_list = strings.read().splitlines() seq_ind_list = [ (seq, ind) for ind, seq in enumerate(seq_list) ] seq_ind_dict = { seq : ind for seq, ind in seq_ind_list } orig_len = len(seq_ind_list) edges = [] edges_set = set() print "Read in Data: {0}".format(datetime.datetime.now()) for seq, seq_ind in seq_ind_dict.items(): neighbors = gsconv.gen_hamdist_one(seq) edges_set.update([ (seq, n) for n in neighbors if n in seq_ind_dict ]) edges += [((seq, seq_ind), (n,seq_ind_dict[n])) for n in neighbors if n in seq_ind_dict and (n,seq) not in edges_set ] print len(seq_ind_list) print "Generated Edges: {0}".format(datetime.datetime.now()) numpy.set_printoptions(threshold='nan') canon_ind_dict = { canonical : [ i for (s, i) in seq_ind_list if s == canonical ][0] for canonical in canonical_list_seq } T_mat = trans_matrix(seq_ind_list,edges) #print raise_matrix(T_mat,1) #print raise_matrix(T_mat,3) #T = raise_matrix(T_mat,10) #T = raise_matrix(T_mat,20) print "Transformed Matrix: {0}".format(datetime.datetime.now()) canon_x = { can : [0,1] for can in canonical_list_seq } canon_y = { can : [0.0, find_frac(T_mat, canon_ind_dict[can], orig_len)] for can in canonical_list_seq } print "Made x and y dicts: {0}".format(datetime.datetime.now()) T_mat_new = T_mat for i in range(2,23): T_mat_new = square_matrix(T_mat_new, T_mat) for can in canonical_list_seq: canon_x[can].append(i) canon_y[can].append(find_frac(T_mat_new, canon_ind_dict[can], orig_len)) print "Raised Matrix {0}: {1}".format(i, datetime.datetime.now()) series = [ [canon_x[can],canon_y[can], can] for can in canonical_list_seq ] fig, ax = conv.create_ax(1, 1) color=['orange', 'palevioletred', 'mediumaquamarine', 'deepskyblue'] scatterplot.plot_series( ax[0,0], series, title="", x_axis="Number of Steps", colors=color, y_axis="Fraction Cleaved Variants Reached", alpha=0.85, connect_dots=True, size=15, edgecolors='k', linewidth=0) ax[0,0].set_xlim(xmin=1) ax[0,0].set_ylim(ymin=0.0, ymax=1.0) ax[0,0].set_xticks(xrange(1,23,3)) lgd = conv.add_legend(ax[0,0], location='upper center', bbox_to_anchor=(0.5, 1.05), ncol=2, size=8) conv.save_fig(fig, output_prefix, "fraction_func", 2.5, 3, size=9.5, extra_artists=lgd) print "Outputted Figure: {0}".format(datetime.datetime.now())
def main(seq_file, canonical_file, output_prefix): series = [] canonical_list_seq = seq_IO.read_sequences(canonical_file) print "Beginning Script: {0}".format(datetime.datetime.now()) for canonical in canonical_list_seq: with open(seq_file) as strings: seq_list = strings.read().splitlines() seq_ind_list = [(seq, ind) for ind, seq in enumerate(seq_list)] orig_len = len(seq_ind_list) if canonical not in seq_list: one_away = gsconv.gen_hamdist_one(canonical) one_away = [o for o in one_away if o != canonical] + [canonical] seq_ind_list = seq_ind_list[:] + [ (o, ind) for (ind, o) in enumerate(one_away, len(seq_ind_list)) ] edges = [(seq2, seq) for seq, seq2 in itertools.combinations(seq_ind_list, 2) if gsconv.hamdist(seq2[0], seq[0]) < 2] print len(seq_ind_list) print "Generated Edges: {0}".format(datetime.datetime.now()) numpy.set_printoptions(threshold='nan') canon_ind = [i for (s, i) in seq_ind_list if s == canonical][0] T_mat = trans_matrix(seq_ind_list, edges) #print raise_matrix(T_mat,1) #print raise_matrix(T_mat,3) #T = raise_matrix(T_mat,10) #T = raise_matrix(T_mat,20) x = [0] y = [0] print "Transformed Matrix: {0}".format(datetime.datetime.now()) x.append(1) y.append(find_frac(T_mat, canon_ind, orig_len)) T_mat_new = T_mat for i in range(2, 23): x.append(i) T_mat_new, frac = square_matrix(T_mat_new, T_mat, canon_ind, orig_len) y.append(frac) print "Raised Matrix {0}: {1}".format(i, datetime.datetime.now()) series.append([x, y, canonical]) fig, ax = conv.create_ax(1, 1) color = ['orange', 'palevioletred', 'mediumaquamarine', 'deepskyblue'] scatterplot.plot_series(ax[0, 0], series, title="", x_axis="Number of Steps", colors=color, y_axis="Fraction Cleaved Variants Reached", alpha=0.85, connect_dots=True, size=15, edgecolors='k', linewidth=0) ax[0, 0].set_xlim(xmin=1) ax[0, 0].set_ylim(ymin=0.0, ymax=1.0) ax[0, 0].set_xticks(xrange(1, 23, 3)) lgd = conv.add_legend(ax[0, 0], location='upper center', bbox_to_anchor=(0.5, 1.05), ncol=2, size=8) conv.save_fig(fig, output_prefix, "fraction_func", 2.5, 3, size=9.5, extra_artists=lgd) print "Outputted Figure: {0}".format(datetime.datetime.now())