def make_plot(data): fig, axs = plt.subplots(2, 3, figsize=(8, 6), sharey=True) for i, taxon in enumerate(data): if i <= 2: ax = axs[0][i] else: ax = axs[1][i - 3] xs = data[taxon][0] ys = data[taxon][1] positions = data[taxon][2] colors = [] for position in positions: if position == "T": colors.append("red") if position == "M": colors.append("magenta") if position == "B": colors.append("cyan") ax.scatter(xs, ys, alpha=0.6, c=colors, edgecolor='k') # line of best fit grid = np.r_[0.5:5:512j] k0 = smooth.NonParamRegression(xs, ys, method=npr_methods.SpatialAverage()) k0.fit() ax.plot(grid, k0(grid), "k", linewidth=2) test = stats.pearsonr(data[taxon][0], data[taxon][1]) print taxon, test ax.set_title(taxon.split(";")[-1], fontsize=12) ax.grid(ls="--") ax.set_xticks([0, 1, 2, 3, 4, 5]) # if i!=0: # ax.set_yticklabels([]) fig.add_subplot(111, frameon=False) plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False) plt.xlabel("Distance to surface (cm)") plt.ylabel("Standardized abundance of taxon") plt.tight_layout() plt.savefig("figure.png", dpi=300)
def aseErr_expression(genes_fpkm,called_ase_file,true_ase_file): called_ase_dict = loadASE_list(called_ase_file) true_ase_dict = loadASE_list(true_ase_file) gene_expression = loadFPKM(genes_fpkm) ##to be filled in with error values later (not paired with names) ase_errors = [] ##filled with fpkms of expressed genes, in step with ase_errors expression_values = [] ##called_ase genes are a subset of true_ase genes for ensid in called_ase_dict.keys(): true_ase = true_ase_dict[ensid] called_ase = called_ase_dict[ensid] fpkm = gene_expression[ensid] true_ase = math.log( (true_ase/(1-true_ase)),2) called_ase = math.log( (called_ase/(1-called_ase)),2) expression_values.append(math.log(fpkm,2)) ase_err = math.fabs(true_ase - called_ase) ase_errors.append(ase_err) plt.scatter(expression_values,ase_errors,alpha=.3,marker='o') r,pval = stats.spearmanr(expression_values,ase_errors) plt.xlabel('log2(FPKM)\nr=%f'%r) plt.ylabel('log2 ASE_errors') plt.ylim([0,2.5]) plt.xlim([-1,12]) k0 = smooth.NonParamRegression(expression_values, ase_errors, method=npr_methods.SpatialAverage()) k0.fit() xs = np.arange(-1,12,.01) plt.plot(xs, k0(xs), linewidth=2) plt.show()
def results(dataset, metadata_path, w2v, rescale=None): print("Configuring Tensorflow Graph") with tf.Graph().as_default(): sess, siamese_model = initialize_tf_graph(metadata_path, w2v) dataset.test.open() dataset.train.open() avg_test_loss, avg_test_pco, test_result_set = evaluate( sess=sess, dataset=dataset.test, model=siamese_model, step=-1, max_dev_itr=0, mode='test') avg_train_loss, avg_train_pco, train_result_set = evaluate( sess=sess, dataset=dataset.train, model=siamese_model, max_dev_itr=0, step=-1, mode='train') dataset.test.close() dataset.train.close() print('TEST RESULTS:\nMSE: {}\t Pearson Correlation: {}\n\n' 'TRAIN RESULTS:\nMSE: {}\t Pearson Correlation: {}'.format( avg_test_loss, avg_test_pco, avg_train_loss, avg_train_pco)) _, _, train_sims, train_gt = train_result_set _, _, test_sims, test_gt = test_result_set grid = np.r_[0:1:1000j] if rescale is not None: train_gt = datasets.rescale(train_gt, new_range=rescale, original_range=[0.0, 1.0]) test_gt = datasets.rescale(test_gt, new_range=rescale, original_range=[0.0, 1.0]) # grid = np.r_[rescale[0]:rescale[1]:1000j] figure_path = os.path.join(siamese_model.exp_dir, 'results_test_sim.jpg') reg_fig_path = os.path.join(siamese_model.exp_dir, 'results_line_fit.jpg') plt.title('Regression Plot for Test Set Similarities') plt.ylabel('Ground Truth Similarities') plt.xlabel('Predicted Similarities') print("Performing Non Parametric Regression") non_param_reg = non_parametric_regression( train_sims, train_gt, method=npr_methods.SpatialAverage()) reg_test_sim = non_param_reg(test_sims) reg_pco = pearsonr(reg_test_sim, test_gt) reg_mse = mean_squared_error(test_gt, reg_test_sim) print("Post Regression Test Results:\nPCO: {}\nMSE: {}".format( reg_pco, reg_mse)) plt.scatter(reg_test_sim, test_gt, label='Similarities', s=0.2) plt.savefig(figure_path) plt.clf() plt.title('Regression Plot for Test Set Similarities') plt.ylabel('Ground Truth Similarities') plt.xlabel('Predicted Similarities') plt.scatter(test_sims, test_gt, label='Similarities', s=0.2) plt.plot(grid, non_param_reg(grid), label="Local Linear Smoothing", linewidth=2.0, color='r') plt.savefig(reg_fig_path) print("saved similarity plot at {}".format(figure_path)) print("saved regression plot at {}".format(reg_fig_path))