예제 #1
0
def make_plot(data):
    fig, axs = plt.subplots(2, 3, figsize=(8, 6), sharey=True)
    for i, taxon in enumerate(data):
        if i <= 2:
            ax = axs[0][i]
        else:
            ax = axs[1][i - 3]
        xs = data[taxon][0]
        ys = data[taxon][1]
        positions = data[taxon][2]
        colors = []
        for position in positions:
            if position == "T": colors.append("red")
            if position == "M": colors.append("magenta")
            if position == "B": colors.append("cyan")
        ax.scatter(xs, ys, alpha=0.6, c=colors, edgecolor='k')

        # line of best fit
        grid = np.r_[0.5:5:512j]
        k0 = smooth.NonParamRegression(xs,
                                       ys,
                                       method=npr_methods.SpatialAverage())
        k0.fit()
        ax.plot(grid, k0(grid), "k", linewidth=2)

        test = stats.pearsonr(data[taxon][0], data[taxon][1])
        print taxon, test

        ax.set_title(taxon.split(";")[-1], fontsize=12)
        ax.grid(ls="--")
        ax.set_xticks([0, 1, 2, 3, 4, 5])
    #	if i!=0:
    #		ax.set_yticklabels([])

    fig.add_subplot(111, frameon=False)
    plt.tick_params(labelcolor='none',
                    top=False,
                    bottom=False,
                    left=False,
                    right=False)
    plt.xlabel("Distance to surface (cm)")
    plt.ylabel("Standardized abundance of taxon")

    plt.tight_layout()
    plt.savefig("figure.png", dpi=300)
예제 #2
0
def aseErr_expression(genes_fpkm,called_ase_file,true_ase_file):
    
    called_ase_dict = loadASE_list(called_ase_file)
    true_ase_dict = loadASE_list(true_ase_file)

    gene_expression = loadFPKM(genes_fpkm)

    ##to be filled in with error values later (not paired with names)
    ase_errors = []
    ##filled with fpkms of expressed genes, in step with ase_errors
    expression_values = []

    ##called_ase genes are a subset of true_ase genes
    for ensid in called_ase_dict.keys():
	true_ase = true_ase_dict[ensid]
	called_ase = called_ase_dict[ensid]

	fpkm = gene_expression[ensid]

	true_ase = math.log( (true_ase/(1-true_ase)),2)
	called_ase = math.log( (called_ase/(1-called_ase)),2)

	expression_values.append(math.log(fpkm,2))

	ase_err = math.fabs(true_ase - called_ase)
	ase_errors.append(ase_err)

    plt.scatter(expression_values,ase_errors,alpha=.3,marker='o')
    r,pval =  stats.spearmanr(expression_values,ase_errors)
    plt.xlabel('log2(FPKM)\nr=%f'%r)
    plt.ylabel('log2 ASE_errors')
    plt.ylim([0,2.5])
    plt.xlim([-1,12])


    k0 = smooth.NonParamRegression(expression_values, ase_errors, method=npr_methods.SpatialAverage())
    k0.fit()

    xs = np.arange(-1,12,.01)

    plt.plot(xs, k0(xs), linewidth=2)

    plt.show()
예제 #3
0
def results(dataset, metadata_path, w2v, rescale=None):
    print("Configuring Tensorflow Graph")
    with tf.Graph().as_default():
        sess, siamese_model = initialize_tf_graph(metadata_path, w2v)
        dataset.test.open()
        dataset.train.open()
        avg_test_loss, avg_test_pco, test_result_set = evaluate(
            sess=sess,
            dataset=dataset.test,
            model=siamese_model,
            step=-1,
            max_dev_itr=0,
            mode='test')
        avg_train_loss, avg_train_pco, train_result_set = evaluate(
            sess=sess,
            dataset=dataset.train,
            model=siamese_model,
            max_dev_itr=0,
            step=-1,
            mode='train')
        dataset.test.close()
        dataset.train.close()
        print('TEST RESULTS:\nMSE: {}\t Pearson Correlation: {}\n\n'
              'TRAIN RESULTS:\nMSE: {}\t Pearson Correlation: {}'.format(
                  avg_test_loss, avg_test_pco, avg_train_loss, avg_train_pco))

        _, _, train_sims, train_gt = train_result_set
        _, _, test_sims, test_gt = test_result_set
        grid = np.r_[0:1:1000j]

        if rescale is not None:
            train_gt = datasets.rescale(train_gt,
                                        new_range=rescale,
                                        original_range=[0.0, 1.0])
            test_gt = datasets.rescale(test_gt,
                                       new_range=rescale,
                                       original_range=[0.0, 1.0])
            # grid = np.r_[rescale[0]:rescale[1]:1000j]

        figure_path = os.path.join(siamese_model.exp_dir,
                                   'results_test_sim.jpg')
        reg_fig_path = os.path.join(siamese_model.exp_dir,
                                    'results_line_fit.jpg')
        plt.title('Regression Plot for Test Set Similarities')
        plt.ylabel('Ground Truth Similarities')
        plt.xlabel('Predicted  Similarities')

        print("Performing Non Parametric Regression")
        non_param_reg = non_parametric_regression(
            train_sims, train_gt, method=npr_methods.SpatialAverage())

        reg_test_sim = non_param_reg(test_sims)
        reg_pco = pearsonr(reg_test_sim, test_gt)
        reg_mse = mean_squared_error(test_gt, reg_test_sim)
        print("Post Regression Test Results:\nPCO: {}\nMSE: {}".format(
            reg_pco, reg_mse))

        plt.scatter(reg_test_sim, test_gt, label='Similarities', s=0.2)
        plt.savefig(figure_path)

        plt.clf()

        plt.title('Regression Plot for Test Set Similarities')
        plt.ylabel('Ground Truth Similarities')
        plt.xlabel('Predicted  Similarities')
        plt.scatter(test_sims, test_gt, label='Similarities', s=0.2)
        plt.plot(grid,
                 non_param_reg(grid),
                 label="Local Linear Smoothing",
                 linewidth=2.0,
                 color='r')
        plt.savefig(reg_fig_path)

        print("saved similarity plot at {}".format(figure_path))
        print("saved regression plot at {}".format(reg_fig_path))