def gen_scatterplot(ax, x_axis, y_axis, z_axis, min_naive_by_pdb):
    x_deltas = get_dist_deltas(x_axis, "All", min_naive_by_pdb)
    y_deltas = get_dist_deltas(y_axis, "All", min_naive_by_pdb)
    z_deltas = get_dist_deltas(z_axis, "All", min_naive_by_pdb)
    #c_deltas = get_dist_deltas("All", None, min_naive_by_pdb)

    scatterplot.draw_actual_plot(ax, x_deltas, y_deltas, 'k', x_axis, x_axis + " Delta to Min RMSD (A)", "Delta to Min RMSD (A)", size=15, label=y_axis)
    scatterplot.draw_actual_plot(ax, x_deltas, z_deltas, 'r', x_axis, x_axis + " Delta to Min RMSD (A)", "Delta to Min RMSD (A)", size=15, label=z_axis)

    scatterplot.add_x_y_line(ax)

    conv.add_legend(ax)
def main(seq_file, canonical_file, output_prefix):

    series = []

    canonical_list_seq = seq_IO.read_sequences(canonical_file)

    print "Beginning Script: {0}".format(datetime.datetime.now())

    for canonical in canonical_list_seq:

        with open(seq_file) as strings:
            seq_list = strings.read().splitlines()
	    seq_ind_list = [ (seq, ind) for ind, seq in enumerate(seq_list) ]
	orig_len = len(seq_ind_list)
        if canonical not in seq_list:
	    one_away = gsconv.gen_hamdist_one(canonical)
            one_away = [ o for o in one_away if o != canonical ] + [canonical]
	    seq_ind_list = seq_ind_list[:] + [ (o, ind) for (ind, o) in enumerate(one_away, len(seq_ind_list)) ]

        edges = [(seq2,seq) for seq,seq2 in itertools.combinations(seq_ind_list,2) if gsconv.hamdist(seq2[0],seq[0]) < 2 ]
	print len(seq_ind_list)
        print "Generated Edges: {0}".format(datetime.datetime.now())    

        numpy.set_printoptions(threshold='nan')

        canon_ind=[ i for (s, i) in seq_ind_list if s == canonical ][0]

        T_mat = trans_matrix(seq_ind_list,edges)
        #print raise_matrix(T_mat,1)
        #print raise_matrix(T_mat,3)
        #T = raise_matrix(T_mat,10)
        #T = raise_matrix(T_mat,20)
        x = [0]
        y = [0]

        print "Transformed Matrix: {0}".format(datetime.datetime.now())

        x.append(1)
        y.append(find_frac(T_mat, canon_ind, orig_len))

        T_mat_new = T_mat

        for i in range(2,23):
            x.append(i)
            T_mat_new, frac = square_matrix(T_mat_new,T_mat,canon_ind, orig_len)
	    y.append(frac)

	    print "Raised Matrix {0}: {1}".format(i, datetime.datetime.now())

	series.append([x,y,canonical])

    fig, ax = conv.create_ax(1, 1)

    color=['orange', 'palevioletred', 'mediumaquamarine', 'deepskyblue']

    scatterplot.plot_series( ax[0,0], series, title="", x_axis="Number of Steps", colors=color, y_axis="Fraction Cleaved Variants Reached", alpha=0.85, connect_dots=True, size=15, edgecolors='k', linewidth=0)
    ax[0,0].set_xlim(xmin=1)
    ax[0,0].set_ylim(ymin=0.0, ymax=1.0)
    ax[0,0].set_xticks(xrange(1,23,3))
    lgd = conv.add_legend(ax[0,0], location='upper center', bbox_to_anchor=(0.5, 1.05), ncol=2, size=8)
    conv.save_fig(fig, output_prefix, "fraction_func", 2.5, 3, size=9.5, extra_artists=lgd)

    print "Outputted Figure: {0}".format(datetime.datetime.now())    
def main(input_dir_1, scoretype1, input_dir_2, scoretype2, rmsd_cutoff, output_pre ):
    #read in and rename arguments
    title1 = os.path.basename(input_dir_1)
    title2 = os.path.basename(input_dir_2)

    d1, n1 = scorefileparse.read_dec_nat(input_dir_1, scoretype1, repl_orig=False)
    d2, n2 = scorefileparse.read_dec_nat(input_dir_2, scoretype2, repl_orig=False)

    dec1 = scorefileparse.filter_pdbs_by_rmsd(d1, rmsd_cutoff)
    nat1 = scorefileparse.filter_pdbs_by_rmsd(n1, rmsd_cutoff)
    dec2 = scorefileparse.filter_pdbs_by_rmsd(d2, rmsd_cutoff)
    nat2 = scorefileparse.filter_pdbs_by_rmsd(n2, rmsd_cutoff)

    dec_norm1 = scorefileparse.norm_pdbs(dec1)
    nat_norm1 = scorefileparse.norm_pdbs(nat1,dec1)
    dec_norm2 = scorefileparse.norm_pdbs(dec2)
    nat_norm2 = scorefileparse.norm_pdbs(nat2,dec2)

    [dec_inter1, nat_inter1, dec_inter2, nat_inter2] = scorefileparse.pdbs_intersect([dec_norm1, nat_norm1, dec_norm2, nat_norm2]) 
    [dec_inter1, dec_inter2] = scorefileparse.pdbs_scores_intersect([dec_inter1, dec_inter2])       
    [nat_inter1, nat_inter2] = scorefileparse.pdbs_scores_intersect([nat_inter1, nat_inter2])       

    dec_filt1 = scorefileparse.filter_norm_pdbs(dec_norm1)
    nat_filt1 = scorefileparse.filter_norm_pdbs(nat_norm1)
    dec_filt2 = scorefileparse.filter_norm_pdbs(dec_norm2)
    nat_filt2 = scorefileparse.filter_norm_pdbs(nat_norm2)

    [dec_finter1, dec_finter2] = scorefileparse.pdbs_scores_intersect([dec_filt1, dec_filt2])
    [nat_finter1, nat_finter2] = scorefileparse.pdbs_scores_intersect([nat_filt1, nat_filt2])

    fig, axarr = conv.create_ax(2, len(dec_inter1))

    line_plot_data = {}

    min_naive_by_pdb = {}

    for x_ind,pdb in enumerate(sorted(dec_inter1.keys())):

        ax = axarr[x_ind, 0] 

        plot_r_v_r(dec_inter1, dec_inter2, nat_inter1, nat_inter2, ax, pdb, title1, title2)

        ax = axarr[x_ind, 1]

        min_naive = plot_pareto(dec_inter1, dec_inter2, nat_inter1, nat_inter2, ax, pdb, title1, title2)
        keys_to_include = ["Amber", "Rosetta","All","Pareto10"]
        for key, (rank1, rank2, rmsd) in min_naive.items():
	     #if key not in keys_to_include:
	     #    continue
	     if line_plot_data.get(key) is None:
	         line_plot_data[key] = ([],[])
       	     line_plot_data[key][0].append(pdb)
	     line_plot_data[key][1].append(rmsd)
	     if min_naive_by_pdb.get(pdb) is None:
                 min_naive_by_pdb[pdb] = {}
             min_naive_by_pdb[pdb][key] = rmsd

    #organize data
    indices = list(range(len(line_plot_data["All"][1])))
    indices.sort(key=lambda x: line_plot_data["All"][1][x])
    
    ranked_pdbs_by_rmsd_all = {}

    for i, x in enumerate(indices):
        ranked_pdbs_by_rmsd_all[line_plot_data["All"][0][x]] = i

    for label, (pdbs, rmsds) in line_plot_data.items():
	line_plot_data[label] = tuple(zip(*sorted(zip(pdbs,rmsds), key=lambda x: ranked_pdbs_by_rmsd_all[x[0]] )))    

    filename = output_pre + "/" + title1 + "_" + title2 + ".txt"   
    
    #suffix="rmsd_v_rmsd_{0}".format(rmsd_cutoff)
 
    #conv.save_fig(fig, filename, suffix, 7, len(dec_inter1)*3)

    #plot line plot
    all_pareto_labels = []

    for initial in ["R","A"]:
        ordered_labels = ["All", "Amber", "Rosetta"]
        for i in range(1,11):
            ordered_labels.append("Pareto{0}{1}".format(initial,i))
            all_pareto_labels.append("Pareto{0}{1}".format(initial,i))
        
        lines = [ (line_plot_data[label][0], line_plot_data[label][1], label) for label in ordered_labels ]

        fig2, axarr2 = conv.create_ax(1, len(ordered_labels), shx=True, shy=True)

        for i, label in enumerate(ordered_labels):

            line.plot_series(axarr2[i,0], lines[0:i+1], "RMSD vs. pdb", "PDB", "RMSD", linestyle='')
    
            conv.add_legend(axarr2[i,0])
        conv.save_fig(fig2, filename, "_line_{0}".format(initial), 10, len(ordered_labels)*5)

    #plot histogram plot

    hist_comp = [ ("Amber","All"), ("Rosetta", "All"), ("ParetoR10", "All"), ("ParetoA10", "All")]

    hist_comp.extend([ ("ParetoR{0}".format(ind),"Rosetta") for ind in range(1,11) ])
    hist_comp.extend([ ("ParetoR{0}".format(ind),"Amber") for ind in range(1,11) ])
    hist_comp.extend([ ("ParetoA{0}".format(ind),"Rosetta") for ind in range(1,11) ])
    hist_comp.extend([ ("ParetoA{0}".format(ind), "Amber") for ind in range(1,11) ])

    fig3, axarr3 = conv.create_ax(2, len(hist_comp), shx=False, shy=False)

    for ind, (top, bottom) in enumerate(hist_comp):
        gen_dist_plot(axarr3[ind,0], axarr3[ind,1], top, bottom, min_naive_by_pdb)

    conv.save_fig(fig3, filename, "_distdeltas", 7, len(hist_comp)*5, tight=False)

    #plot scatterplot
    fig4, axarr4 = conv.create_ax(10, 2)
    for i in range(1,11):
        gen_scatterplot(axarr4[0,i-1], "ParetoR{0}".format(i), "Rosetta", "Amber", min_naive_by_pdb)
        gen_scatterplot(axarr4[1,i-1], "ParetoA{0}".format(i), "Rosetta", "Amber", min_naive_by_pdb)

    conv.save_fig(fig4, filename, "_scattdeltas", 30, 6)
Beispiel #4
0
def main(seq_file, canonical_file, output_prefix):

    #canonical_list_seq = seq_IO.read_sequences(canonical_file)
    canonical_list_seq = ["DEMEE", "DEMED"]
    print "Beginning Script: {0}".format(datetime.datetime.now())

    with open(seq_file) as strings:
        seq_list = strings.read().splitlines()
        seq_ind_list = [(seq, ind) for ind, seq in enumerate(seq_list)]

    seq_ind_dict = {seq: ind for seq, ind in seq_ind_list}

    orig_len = len(seq_ind_list)

    edges = []
    edges_set = set()
    print "Read in Data: {0}".format(datetime.datetime.now())

    for seq, seq_ind in seq_ind_dict.items():
        neighbors = gsconv.gen_hamdist_one(seq)
        edges_set.update([(seq, n) for n in neighbors if n in seq_ind_dict])
        edges += [((seq, seq_ind), (n, seq_ind_dict[n])) for n in neighbors
                  if n in seq_ind_dict and (n, seq) not in edges_set]

    print len(seq_ind_list)
    print "Generated Edges: {0}".format(datetime.datetime.now())

    numpy.set_printoptions(threshold='nan')

    canon_ind_dict = {
        canonical: [i for (s, i) in seq_ind_list if s == canonical][0]
        for canonical in canonical_list_seq
    }

    T_mat = trans_matrix(seq_ind_list, edges)
    #print raise_matrix(T_mat,1)
    #print raise_matrix(T_mat,3)
    #T = raise_matrix(T_mat,10)
    #T = raise_matrix(T_mat,20)
    print "Transformed Matrix: {0}".format(datetime.datetime.now())

    canon_x = {can: [0, 1] for can in canonical_list_seq}
    canon_y = {
        can: [0.0, find_frac(T_mat, canon_ind_dict[can], orig_len)]
        for can in canonical_list_seq
    }

    print "Made x and y dicts: {0}".format(datetime.datetime.now())

    T_mat_new = T_mat

    for i in range(2, 23):

        T_mat_new = square_matrix(T_mat_new, T_mat)

        for can in canonical_list_seq:
            canon_x[can].append(i)
            canon_y[can].append(
                find_frac(T_mat_new, canon_ind_dict[can], orig_len))

        print "Raised Matrix {0}: {1}".format(i, datetime.datetime.now())

    series = [[canon_x[can], canon_y[can], can] for can in canonical_list_seq]

    fig, ax = conv.create_ax(1, 1)

    color = ['orange', 'palevioletred', 'mediumaquamarine', 'deepskyblue']

    scatterplot.plot_series(ax[0, 0],
                            series,
                            title="",
                            x_axis="Number of Steps",
                            colors=color,
                            y_axis="Fraction Cleaved Variants Reached",
                            alpha=0.85,
                            connect_dots=True,
                            size=15,
                            edgecolors='k',
                            linewidth=0)
    ax[0, 0].set_xlim(xmin=1)
    ax[0, 0].set_ylim(ymin=0.0, ymax=1.0)
    ax[0, 0].set_xticks(xrange(1, 23, 3))
    lgd = conv.add_legend(ax[0, 0],
                          location='upper center',
                          bbox_to_anchor=(0.5, 1.05),
                          ncol=2,
                          size=8)
    conv.save_fig(fig,
                  output_prefix,
                  "fraction_func",
                  2.5,
                  3,
                  size=9.5,
                  extra_artists=lgd)

    print "Outputted Figure: {0}".format(datetime.datetime.now())
def main(seq_file, canonical_file, output_prefix):

    #canonical_list_seq = seq_IO.read_sequences(canonical_file)
    canonical_list_seq = ["DEMEE","DEMED"]
    print "Beginning Script: {0}".format(datetime.datetime.now())

    with open(seq_file) as strings:
        seq_list = strings.read().splitlines()
	seq_ind_list = [ (seq, ind) for ind, seq in enumerate(seq_list) ]
    
    seq_ind_dict = { seq : ind for seq, ind in seq_ind_list }

    orig_len = len(seq_ind_list)

    edges = []
    edges_set = set()
    print "Read in Data: {0}".format(datetime.datetime.now())

    for seq, seq_ind in seq_ind_dict.items():
        neighbors = gsconv.gen_hamdist_one(seq)
        edges_set.update([ (seq, n) for n in neighbors if n in seq_ind_dict ])
        edges += [((seq, seq_ind), (n,seq_ind_dict[n])) for n in neighbors if n in seq_ind_dict and (n,seq) not in edges_set ]

    print len(seq_ind_list)
    print "Generated Edges: {0}".format(datetime.datetime.now())    

    numpy.set_printoptions(threshold='nan')

    canon_ind_dict = { canonical : [ i for (s, i) in seq_ind_list if s == canonical ][0] for canonical in canonical_list_seq }

    T_mat = trans_matrix(seq_ind_list,edges)
        #print raise_matrix(T_mat,1)
        #print raise_matrix(T_mat,3)
        #T = raise_matrix(T_mat,10)
        #T = raise_matrix(T_mat,20)
    print "Transformed Matrix: {0}".format(datetime.datetime.now())

    canon_x = { can : [0,1] for can in canonical_list_seq }
    canon_y = { can : [0.0, find_frac(T_mat, canon_ind_dict[can], orig_len)] for can in canonical_list_seq }

    print "Made x and y dicts: {0}".format(datetime.datetime.now())

    T_mat_new = T_mat

    for i in range(2,23):

	T_mat_new = square_matrix(T_mat_new, T_mat)

	for can in canonical_list_seq:
            canon_x[can].append(i)
	    canon_y[can].append(find_frac(T_mat_new, canon_ind_dict[can], orig_len))

	print "Raised Matrix {0}: {1}".format(i, datetime.datetime.now())


    series = [ [canon_x[can],canon_y[can], can] for can in canonical_list_seq ]

    fig, ax = conv.create_ax(1, 1)

    color=['orange', 'palevioletred', 'mediumaquamarine', 'deepskyblue']

    scatterplot.plot_series( ax[0,0], series, title="", x_axis="Number of Steps", colors=color, y_axis="Fraction Cleaved Variants Reached", alpha=0.85, connect_dots=True, size=15, edgecolors='k', linewidth=0)
    ax[0,0].set_xlim(xmin=1)
    ax[0,0].set_ylim(ymin=0.0, ymax=1.0)
    ax[0,0].set_xticks(xrange(1,23,3))
    lgd = conv.add_legend(ax[0,0], location='upper center', bbox_to_anchor=(0.5, 1.05), ncol=2, size=8)
    conv.save_fig(fig, output_prefix, "fraction_func", 2.5, 3, size=9.5, extra_artists=lgd)

    print "Outputted Figure: {0}".format(datetime.datetime.now())    
Beispiel #6
0
def main(seq_file, canonical_file, output_prefix):

    series = []

    canonical_list_seq = seq_IO.read_sequences(canonical_file)

    print "Beginning Script: {0}".format(datetime.datetime.now())

    for canonical in canonical_list_seq:

        with open(seq_file) as strings:
            seq_list = strings.read().splitlines()
            seq_ind_list = [(seq, ind) for ind, seq in enumerate(seq_list)]
        orig_len = len(seq_ind_list)
        if canonical not in seq_list:
            one_away = gsconv.gen_hamdist_one(canonical)
            one_away = [o for o in one_away if o != canonical] + [canonical]
            seq_ind_list = seq_ind_list[:] + [
                (o, ind) for (ind, o) in enumerate(one_away, len(seq_ind_list))
            ]

        edges = [(seq2, seq)
                 for seq, seq2 in itertools.combinations(seq_ind_list, 2)
                 if gsconv.hamdist(seq2[0], seq[0]) < 2]
        print len(seq_ind_list)
        print "Generated Edges: {0}".format(datetime.datetime.now())

        numpy.set_printoptions(threshold='nan')

        canon_ind = [i for (s, i) in seq_ind_list if s == canonical][0]

        T_mat = trans_matrix(seq_ind_list, edges)
        #print raise_matrix(T_mat,1)
        #print raise_matrix(T_mat,3)
        #T = raise_matrix(T_mat,10)
        #T = raise_matrix(T_mat,20)
        x = [0]
        y = [0]

        print "Transformed Matrix: {0}".format(datetime.datetime.now())

        x.append(1)
        y.append(find_frac(T_mat, canon_ind, orig_len))

        T_mat_new = T_mat

        for i in range(2, 23):
            x.append(i)
            T_mat_new, frac = square_matrix(T_mat_new, T_mat, canon_ind,
                                            orig_len)
            y.append(frac)

            print "Raised Matrix {0}: {1}".format(i, datetime.datetime.now())

        series.append([x, y, canonical])

    fig, ax = conv.create_ax(1, 1)

    color = ['orange', 'palevioletred', 'mediumaquamarine', 'deepskyblue']

    scatterplot.plot_series(ax[0, 0],
                            series,
                            title="",
                            x_axis="Number of Steps",
                            colors=color,
                            y_axis="Fraction Cleaved Variants Reached",
                            alpha=0.85,
                            connect_dots=True,
                            size=15,
                            edgecolors='k',
                            linewidth=0)
    ax[0, 0].set_xlim(xmin=1)
    ax[0, 0].set_ylim(ymin=0.0, ymax=1.0)
    ax[0, 0].set_xticks(xrange(1, 23, 3))
    lgd = conv.add_legend(ax[0, 0],
                          location='upper center',
                          bbox_to_anchor=(0.5, 1.05),
                          ncol=2,
                          size=8)
    conv.save_fig(fig,
                  output_prefix,
                  "fraction_func",
                  2.5,
                  3,
                  size=9.5,
                  extra_artists=lgd)

    print "Outputted Figure: {0}".format(datetime.datetime.now())