def shift_model(sample_N=None): shift_model = GP('Nucleosome shift', sample_N=sample_N) shift_model.design_matrix(incl_shift=True, incl_prom=False, incl_gene=False, incl_antisense=False) return shift_model
def small_promoter_model(sample_N=None): small_prom_model = GP('Promoter occupancy', sample_N=sample_N) small_prom_model.design_matrix(incl_prom=True, incl_gene=False, incl_cc=False, incl_occ=True, incl_small=True, incl_nuc=False, incl_antisense=False) return small_prom_model
def gene_disorg_model(sample_N=None): gene_disorg_model = GP('Nucleosome disorganization', sample_N=sample_N) gene_disorg_model.design_matrix(incl_prom=False, incl_gene=True, incl_cc=True, incl_occ=False, incl_small=False, incl_nuc=True, incl_antisense=False) return gene_disorg_model
def combined_model(sample_N=None): combined_model = GP('Combined chromatin', sample_N=sample_N) sm_model = small_promoter_model(sample_N) disorg_model = gene_disorg_model(sample_N) X1 = sm_model.X X2 = disorg_model.X same_cols = set(X1.columns).intersection(set(X2.columns)) for col in same_cols: X1 = X1.drop(col, axis=1) combined_model.X = X1.join(X2) return combined_model
def regression_plots(): from src.regression_compare import plot_compare_r2, load_results from src.gp import plot_res_distribution, plot_res_distribution_time from src.gp import GP gp_dir = "%s/gp" % OUTPUT_DIR mkdirs_safe([gp_dir]) # plot comparison plot_compare_r2(gp_dir) plt.savefig('%s/compare_gp_r2.pdf' % gp_dir, transparent=True) plot_compare_r2(gp_dir, show_legend=True) plt.savefig('%s/compare_gp_r2_legend.pdf' % gp_dir, transparent=True) from src.gp import plot_res_distribution_time, plot_res_distribution, GP results = load_results(gp_dir) for name in ['Full']: cur = GP(name, results_path='%s/%s_results.csv' % (gp_dir, name)) plot_res_distribution(cur, selected_genes=selected_genes) plt.savefig('%s/%s_predictions.pdf' % (gp_dir, name), transparent=True) for time in [7.5, 30, 120]: plot_res_distribution_time(cur, time, selected_genes=selected_genes) plt.savefig('%s/%s_%s.pdf' % (gp_dir, name, time), transparent=True, dpi=100)
def rna_only_model(sample_N=None): intercept_model = GP('RNA only', sample_N=sample_N) intercept_model.design_matrix(incl_prom=False, incl_gene=False, incl_antisense=False) return intercept_model
def prom_model(sample_N=None): prom_model = GP('Promoter', sample_N=sample_N) prom_model.design_matrix(incl_prom=True, incl_gene=False, incl_antisense=False) return prom_model
def body_model(sample_N=None): body_model = GP('Gene body', sample_N=sample_N) body_model.design_matrix(incl_prom=False, incl_gene=True, incl_antisense=False) return body_model
def full_model(sample_N=None): full_model = GP('Full', sample_N=sample_N) full_model.design_matrix(incl_shift=True) return full_model
def nuc_model(sample_N=None): nuc_model = GP('Nucleosome fragments', sample_N=sample_N) nuc_model.design_matrix(incl_nuc=False, incl_antisense=False) return nuc_model
def sm_model(sample_N=None): sm_model = GP('Small fragments', sample_N=sample_N) sm_model.design_matrix(incl_small=False, incl_antisense=False) return sm_model
def occ_model(sample_N=None): occ_model = GP('Occupancy', sample_N=sample_N) occ_model.design_matrix(incl_cc=False, incl_occ=True, incl_antisense=False) return occ_model
def cc_model(sample_N=None): cc_model = GP('Cross correlation', sample_N=sample_N) cc_model.design_matrix(incl_cc=True, incl_occ=False, incl_antisense=False) return cc_model
def sense_model(sample_N=None): sense_model = GP('Sense', sample_N=sample_N) sense_model.design_matrix(incl_antisense=False) return sense_model
def antisense_model(sample_N=None): antisense_model = GP('Antisense', sample_N=sample_N) antisense_model.design_matrix(incl_shift=False, incl_antisense=True, incl_sense=False) return antisense_model
optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss.backward() print('Iter %d/%d - Loss: %.3f' % (i + 1, training_iter, loss.item())) optimizer.step() return model train_x = torch.linspace(0, 1, 15) train_y = torch.sin(train_x * (2 * math.pi)) #model = test_sm(train_x, train_y) kern = gpytorch.kernels.SpectralMixtureKernel(num_mixtures=4) gp = GP(train_x, train_y, kern) """ likelihood = gpytorch.likelihoods.GaussianLikelihood() model = SpectralMixtureGPModel(train_x, train_y, likelihood) model.train() likelihood.train() # Use the adam optimizer optimizer = torch.optim.Adam(model.parameters(), lr=0.1) # "Loss" for GPs - the marginal log likelihood mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model) #SM works from package but not when wrapped in GP training_iter = 100
def scatters(): from src.chromatin_summary_plots import plot_distribution from src.chromatin_metrics_data import ChromatinDataStore ind_names = [ "Average $\\Delta$ promoter small fragment\noccupancy ", "Average $\\Delta$ gene body disorganization\n", "Average $\\Delta$ gene body nucleosome\noccupancy ", "Combined chromatin score\n", ] save_names = [ 'small_occ_vs_TPM_120', 'disorg_vs_TPM_120', 'nuc_occ_vs_TPM_120', 'combined_vs_TPM_120', ] xs = [ # mean measures datastore.promoter_sm_occupancy_delta.mean(axis=1), datastore.gene_body_disorganization_delta.mean(axis=1), datastore.gene_body_nuc_occ_delta.mean(axis=1), datastore.combined_chromatin_score ] y = datastore.sense_log2_TPM[120] for i in range(len(xs)): ind_name = ind_names[i] x = xs[i] ind_title_name = ind_name save_title_name = save_names[i] xlabel_name = ind_name.replace('\n', ' ') plot_distribution(x, y.loc[x.index], xlabel_name, 'True log$_2$ transcript level, TPM', highlight=selected_genes, xlim=(-3, 3), ylim=(0, 16), title=('%svs transcript level @ 120 min' % ind_title_name), tight_layout=[0.1, 0.075, 0.9, 0.85], xticks=(-4, 4, 2), yticks=(0, 16, 5), plot_aux='cross') save_path = '%s/%s.pdf' % (save_dir, save_title_name) plt.savefig(save_path, transparent=True, dpi=scatter_dpi) print_fl("Wrote %s" % save_path) save_path = '%s/combined_vs_TPM.pdf' % (save_dir) plot_combined_vs_TPM(datastore, selected_genes) plt.savefig(save_path, transparent=True, dpi=scatter_dpi) save_path = '%s/disorg_vs_TPM.pdf' % (save_dir) plot_disorg_vs_TPM(datastore, selected_genes) plt.savefig(save_path, transparent=True, dpi=scatter_dpi) save_path = '%s/small_occ_vs_TPM.pdf' % (save_dir) plot_occ_vs_TPM(datastore, selected_genes) plt.savefig(save_path, transparent=True, dpi=scatter_dpi) from src.gp import plot_res_distribution_time from src.gp import GP from src.regression_compare import plot_compare_r2, load_results gp_dir = "%s/gp" % OUTPUT_DIR results = load_results(gp_dir) name = 'Full' save_path = '%s/gp_120.pdf' % (save_dir) time = 120 cur = GP(name, results_path='%s/%s_results.csv' % (gp_dir, name)) plot_res_distribution_time(cur, time, selected_genes=selected_genes, show_pearsonr=True, plot_aux='none', show_r2=False, tight_layout=[0.1, 0.075, 0.9, 0.85]) plt.savefig(save_path, transparent=True, dpi=scatter_dpi)