def get_data_from_args(dataset, pathway, from_age, scaling, shuffle): data = GeneData.load(dataset).restrict_pathway(pathway) if from_age is not None: restriction_name = from_age from_age = dct_ages[from_age] data.restrict_ages(restriction_name, from_age=from_age) scaler = build_scaler(scaling, data) if scaler is not None: data.scale_ages(scaler) if shuffle: data.shuffle() return data
def get_data_from_args(dataset, pathway, from_age, scaling, shuffle): data = GeneData.load(dataset).restrict_pathway(pathway) if from_age is not None: restriction_name = from_age from_age = dct_ages[from_age] data.restrict_ages(restriction_name,from_age=from_age) scaler = build_scaler(scaling,data) if scaler is not None: data.scale_ages(scaler) if shuffle: data.shuffle() return data
import setup import config as cfg from load_data import GeneData from shapes.sigmoid import Sigmoid from fitter import Fitter from all_fits import get_all_fits, iterate_fits from scalers import LogScaler cfg.verbosity = 1 age_scaler = LogScaler() pathway = 'serotonin' data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler) fitter = Fitter(Sigmoid(priors=None)) fits = get_all_fits(data,fitter) extreme = [(g,r) for dsname,g,r,fit in iterate_fits(fits, R2_threshold=0.5, return_keys=True) if abs(fit.theta[0]) > 100]
ax.set_xticks([stage.central_age for stage in stages]) ax.set_xticklabels([stage.short_name for stage in stages], fontsize=xtick_fontsize, fontstretch='condensed', rotation=90) # set y ticks (first and last only) ax.set_ylabel('expression level', fontsize=fontsize) ticks = ax.get_yticks() ticks = np.array([ticks[0], ticks[-1]]) ax.set_yticks(ticks) ax.set_yticklabels(['{:g}'.format(t) for t in ticks], fontsize=fontsize) return fig cfg.verbosity = 1 age_scaler = LogScaler() data = GeneData.load('both').scale_ages(age_scaler) shapes = [Sigmoid('sigmoid_wide'), Poly(1,'poly1'), Poly(3,'poly3'), Spline()] GRs = [ ('ADRB1','A1C', (5, 8)), ('GLRA2','STC', (5, 12)), ('TUBA1A','V1C', (10, 14)), ] for g,r,yrange in GRs: print 'Doing {}@{}...'.format(g,r) thetas = [] for shape in shapes: series = data.get_one_series(g,r) sigma_prior = 'normal' if not isinstance(shape,Spline) else None fitter = Fitter(shape, sigma_prior=sigma_prior)
rotation=90) # set y ticks (first and last only) ax.set_ylabel('expression level', fontsize=fontsize) ticks = ax.get_yticks() ticks = np.array([ticks[0], ticks[-1]]) ax.set_yticks(ticks) ax.set_yticklabels(['{:g}'.format(t) for t in ticks], fontsize=fontsize) return fig cfg.verbosity = 1 age_scaler = LogScaler() data = GeneData.load('both').scale_ages(age_scaler) shapes = [ Sigmoid('sigmoid_wide'), Poly(1, 'poly1'), Poly(3, 'poly3'), Spline() ] GRs = [ ('ADRB1', 'A1C', (5, 8)), ('GLRA2', 'STC', (5, 12)), ('TUBA1A', 'V1C', (10, 14)), ] for g, r, yrange in GRs: print 'Doing {}@{}...'.format(g, r)
import setup import config as cfg from load_data import GeneData from shapes.sigmoid import Sigmoid from fitter import Fitter from all_fits import get_all_fits, iterate_fits from scalers import LogScaler cfg.verbosity = 1 age_scaler = LogScaler() pathway = 'serotonin' data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler) fitter = Fitter(Sigmoid(priors=None)) fits = get_all_fits(data, fitter) extreme = [(g, r) for dsname, g, r, fit in iterate_fits( fits, R2_threshold=0.5, return_keys=True) if abs(fit.theta[0]) > 100]
ttl='Fit for genes with top t-test scores', top_text=top_text, filename='gradual-maturation-t-test' + filename_suffix, ) cfg.verbosity = 1 age_scaler = LogScaler() lst_pathways = [ 'serotonin', 'dopamine', ] for pathway in lst_pathways: data = GeneData.load('both').restrict_pathway(pathway).restrict_ages( 'EF3', PCW(10)).scale_ages(age_scaler) shape = Sigmoid(priors='sigmoid_wide') fitter = Fitter(shape, sigma_prior='normal') fits = get_all_fits(data, fitter, allow_new_computation=False) ds_fits = fits['kang2011'] for b_reversed in [False, True]: regions = ['V1C', 'OFC'] if b_reversed: regions = regions[::-1] scores = [] for i, g in enumerate(data.gene_names): mu1 = ds_fits[(g, regions[0])].theta_samples[2, :] mu2 = ds_fits[(g, regions[1])].theta_samples[2, :] t, pval = ttest_ind(mu1, mu2)
counts, bin_edges = np.histogram(z_scores, 50) probs = counts / float(sum(counts)) width = bin_edges[1] - bin_edges[0] ax.bar(bin_edges[:-1], probs, width=width, color='b') ax.set_xlabel('z score', fontsize=fontsize) ax.set_ylabel('probability', fontsize=fontsize) ax.tick_params(axis='both', labelsize=fontsize) return fig cfg.verbosity = 1 age_scaler = LogScaler() pathway = '17full' data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler) data_shuffled = GeneData.load('both').restrict_pathway(pathway).scale_ages( age_scaler).shuffle() shape = Sigmoid('sigmoid_wide') fitter = Fitter(shape, sigma_prior='normal') fits = get_all_fits(data, fitter, allow_new_computation=False) fits_shuffled = get_all_fits(data_shuffled, fitter, allow_new_computation=False) R2_pairs = [(fit.LOO_score, fit2.LOO_score) for fit, fit2 in iterate_fits(fits, fits_shuffled)] R2 = np.array([r for r, r_shuffled in R2_pairs]) R2_shuffled = np.array([r_shuffled for r, r_shuffled in R2_pairs]) name = '{}-{}'.format(data.pathway, shape.cache_name())
b_R2_dist = False, ttl = 'Fit for genes with top t-test scores', top_text = top_text, filename = 'gradual-maturation-t-test' + filename_suffix, ) cfg.verbosity = 1 age_scaler = LogScaler() lst_pathways = [ 'serotonin', 'dopamine', ] for pathway in lst_pathways: data = GeneData.load('both').restrict_pathway(pathway).restrict_ages('EF3',PCW(10)).scale_ages(age_scaler) shape = Sigmoid(priors='sigmoid_wide') fitter = Fitter(shape, sigma_prior='normal') fits = get_all_fits(data, fitter, allow_new_computation=False) ds_fits = fits['kang2011'] for b_reversed in [False,True]: regions = ['V1C', 'OFC'] if b_reversed: regions = regions[::-1] scores = [] for i,g in enumerate(data.gene_names): mu1 = ds_fits[(g,regions[0])].theta_samples[2,:] mu2 = ds_fits[(g,regions[1])].theta_samples[2,:] t,pval = ttest_ind(mu1,mu2)
[stage.short_name for stage in stages], fontsize=xtick_fontsize, fontstretch="condensed", rotation=90 ) # set y ticks (first and last only) ax.set_ylabel("expression level", fontsize=fontsize) ticks = ax.get_yticks() ticks = np.array([ticks[0], ticks[-1]]) ax.set_yticks(ticks) ax.set_yticklabels(["{:g}".format(t) for t in ticks], fontsize=fontsize) return fig cfg.verbosity = 1 age_scaler = LogScaler() data = GeneData.load("both").scale_ages(age_scaler) # 0) GABRA2@HIP, delta-R2=0.669 # 1) GPR50@DFC, delta-R2=0.573 # 2) UCHL1@V1C, delta-R2=0.571 # 3) GRIN2B@HIP, delta-R2=0.548 # 4) SSTR1@A1C, delta-R2=0.533 # 5) GRM7@DFC, delta-R2=0.531 # 6) GRM7@STC, delta-R2=0.525 # 7) TOMM40L@VFC, delta-R2=0.514 # 8) CREB5@DFC, delta-R2=0.496 # 9) GRM7@S1C, delta-R2=0.474 GRs = [ ("HTR5A", "S1C", (6, 10)), # example of extreme parameter values without priors ("GABRA2", "HIP", (4, 12)), # delta-R2=0.669 (not actual trend in data)
def get_fits(): data = GeneData.load('both').restrict_pathway('17pathways').scale_ages(age_scaler) shape = Sigmoid(priors='sigmoid_wide') fitter = Fitter(shape, sigma_prior='normal') fits = get_all_fits(data, fitter) return fits
ax = fig.add_subplot(111) counts,bin_edges = np.histogram(z_scores,50) probs = counts / float(sum(counts)) width = bin_edges[1] - bin_edges[0] ax.bar(bin_edges[:-1], probs, width=width, color='b') ax.set_xlabel('z score', fontsize=fontsize) ax.set_ylabel('probability', fontsize=fontsize) ax.tick_params(axis='both', labelsize=fontsize) return fig cfg.verbosity = 1 age_scaler = LogScaler() pathway = '17full' data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler) data_shuffled = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler).shuffle() shape = Sigmoid('sigmoid_wide') fitter = Fitter(shape,sigma_prior='normal') fits = get_all_fits(data,fitter,allow_new_computation=False) fits_shuffled = get_all_fits(data_shuffled,fitter,allow_new_computation=False) R2_pairs = [(fit.LOO_score,fit2.LOO_score) for fit,fit2 in iterate_fits(fits,fits_shuffled)] R2 = np.array([r for r,r_shuffled in R2_pairs]) R2_shuffled = np.array([r_shuffled for r,r_shuffled in R2_pairs]) name = '{}-{}'.format(data.pathway,shape.cache_name()) fig = plot_score_distribution(R2,R2_shuffled) save_figure(fig,'RP/R2-distribution-{}.png'.format(name), under_results=True, b_close=True) mu_shuffled = np.mean(R2_shuffled)