import setup import config as cfg from load_data import GeneData from shapes.sigmoid import Sigmoid from fitter import Fitter from all_fits import get_all_fits, iterate_fits from scalers import LogScaler cfg.verbosity = 1 age_scaler = LogScaler() pathway = 'serotonin' data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler) fitter = Fitter(Sigmoid(priors=None)) fits = get_all_fits(data, fitter) extreme = [(g, r) for dsname, g, r, fit in iterate_fits( fits, R2_threshold=0.5, return_keys=True) if abs(fit.theta[0]) > 100]
ax.set_ylabel('expression level', fontsize=fontsize) ticks = ax.get_yticks() ticks = np.array([ticks[0], ticks[-1]]) ax.set_yticks(ticks) ax.set_yticklabels(['{:g}'.format(t) for t in ticks], fontsize=fontsize) return fig cfg.verbosity = 1 age_scaler = LogScaler() data = GeneData.load('both').scale_ages(age_scaler) shapes = [ Sigmoid('sigmoid_wide'), Poly(1, 'poly1'), Poly(3, 'poly3'), Spline() ] GRs = [ ('ADRB1', 'A1C', (5, 8)), ('GLRA2', 'STC', (5, 12)), ('TUBA1A', 'V1C', (10, 14)), ] for g, r, yrange in GRs: print 'Doing {}@{}...'.format(g, r) thetas = [] for shape in shapes: series = data.get_one_series(g, r)
ax.bar(bin_edges[:-1], probs, width=width, color='b') ax.set_xlabel('z score', fontsize=fontsize) ax.set_ylabel('probability', fontsize=fontsize) ax.tick_params(axis='both', labelsize=fontsize) return fig cfg.verbosity = 1 age_scaler = LogScaler() pathway = '17full' data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler) data_shuffled = GeneData.load('both').restrict_pathway(pathway).scale_ages( age_scaler).shuffle() shape = Sigmoid('sigmoid_wide') fitter = Fitter(shape, sigma_prior='normal') fits = get_all_fits(data, fitter, allow_new_computation=False) fits_shuffled = get_all_fits(data_shuffled, fitter, allow_new_computation=False) R2_pairs = [(fit.LOO_score, fit2.LOO_score) for fit, fit2 in iterate_fits(fits, fits_shuffled)] R2 = np.array([r for r, r_shuffled in R2_pairs]) R2_shuffled = np.array([r_shuffled for r, r_shuffled in R2_pairs]) name = '{}-{}'.format(data.pathway, shape.cache_name()) fig = plot_score_distribution(R2, R2_shuffled) save_figure(fig, 'RP/R2-distribution-{}.png'.format(name), under_results=True,
# mark birth time with a vertical line ymin, ymax = ax.get_ylim() birth_age = age_scaler.scale(0) ax.plot([birth_age, birth_age], [ymin, ymax], '--', color='0.85') return fig cfg.verbosity = 1 age_scaler = LogScaler() all_data = GeneData.load('both').scale_ages(age_scaler) pathway = '17full' data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler) shape = Sigmoid(priors='sigmoid_wide') fitter = Fitter(shape, sigma_prior='normal') fits = get_all_fits(data, fitter) R2_threshold = 0.5 for b_unique in [False, True]: dct_pathways = load_17_pathways_breakdown(b_unique) dct_pathways['17 pathways'] = None for name, genes in dct_pathways.iteritems(): fig = plot_onset_times(all_data, data, fitter, fits, {name: genes}, R2_threshold, b_unique) str_dir = 'unique' if b_unique else 'overlapping' str_unique = ' (unique)' if b_unique else '' filename = 'RP/{}/change-distributions-{}{}.png'.format( str_dir, name, str_unique) save_figure(fig, filename, under_results=True)
zoom_max = max(max(zoom_data), zoom_max) ax.plot(xpos, h, linewidth=3, label=shape.cache_name()) ax.set_xlim(*zoom) ax.set_ylim(0, zoom_max * 1.1) ax.legend(loc='best', fontsize=fontsize, frameon=False) ax.set_xlabel('test $R^2$ score', fontsize=fontsize) ax.set_ylabel("probability density", fontsize=fontsize) ax.tick_params(axis='both', labelsize=fontsize) return fig cfg.verbosity = 1 age_scaler = LogScaler() data = GeneData.load('both').scale_ages(age_scaler) sigmoid = Sigmoid(priors='sigmoid_wide') sigslope = Sigslope(priors='sigslope80') shapes = [sigmoid, sigslope] fitters = [Fitter(shape, sigma_prior='normal') for shape in shapes] ##################################################### # Example fits ##################################################### GRs = [ ('ABHD4', 'STC', (5, 8)), ] for g, r, yrange in GRs: for fitter in fitters: print 'Doing {}@{}...'.format(g, r) series = data.get_one_series(g, r) theta, _, _, _ = fitter.fit(series.ages, series.single_expression)
rv = stats.norm(loc, sigma) x = np.linspace(xmin, xmax, 100) prob = rv.pdf(x) plt.plot(x, prob, 'k', linewidth=3) ttl_fit = r'Normal fit: $loc$={:.3f}, $\sigma$={:.3f}'.format( loc, sigma) ttl = '\n'.join([ttl, ttl_fit]) plt.title(ttl) return vals cfg.verbosity = 1 age_scaler = LogScaler() pathway = 'serotonin' data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler) shape = Sigmoid() fitter = Fitter(shape) fits = get_all_fits(data, fitter, allow_new_computation=False) def translate(g, r, fit): series = data.get_one_series(g, r) theta, sigma = fitter.translate_parameters_to_priors_scale( series.ages, series.single_expression, fit.theta, fit.sigma) a, h, mu, w = theta if h < 0: theta = (a + h, -h, mu, -w ) # this is an equivalent sigmoid, with h now positive return Bunch( theta=theta, sigma=sigma,
def get_fits(): data = GeneData.load('both').restrict_pathway('17pathways').scale_ages(age_scaler) shape = Sigmoid(priors='sigmoid_wide') fitter = Fitter(shape, sigma_prior='normal') fits = get_all_fits(data, fitter) return fits
#1) GPR50@DFC, delta-R2=0.573 #2) UCHL1@V1C, delta-R2=0.571 #3) GRIN2B@HIP, delta-R2=0.548 #4) SSTR1@A1C, delta-R2=0.533 #5) GRM7@DFC, delta-R2=0.531 #6) GRM7@STC, delta-R2=0.525 #7) TOMM40L@VFC, delta-R2=0.514 #8) CREB5@DFC, delta-R2=0.496 #9) GRM7@S1C, delta-R2=0.474 GRs = [ ('HTR5A','S1C', (6, 10)), # example of extreme parameter values without priors ('GABRA2','HIP', (4, 12)), # delta-R2=0.669 (not actual trend in data) ] fitters = [ Fitter(Sigmoid()), Fitter(Sigmoid('sigmoid_wide'),sigma_prior='normal') ] labels = ['no priors', 'semi-informative'] for g,r,yrange in GRs: print 'Doing {}@{}...'.format(g,r) series = data.get_one_series(g,r) thetas = [] for fitter in fitters: theta,_,_,_ = fitter.fit(series.ages, series.single_expression) thetas.append(theta) fig = plot_one_series(series,fitters,thetas,labels,yrange) save_figure(fig,'RP/fit-examples-{}-{}.png'.format(g,r), under_results=True) for theta,label in zip(thetas,labels): print '{}: {}'.format(label,theta)
probs = counts / float(sum(counts)) width = bin_edges[1] - bin_edges[0] ax.bar(bin_edges[:-1], probs, width=width, color='b') ax.set_xlabel('z score', fontsize=fontsize) ax.set_ylabel('probability', fontsize=fontsize) ax.tick_params(axis='both', labelsize=fontsize) return fig cfg.verbosity = 1 age_scaler = LogScaler() pathway = '17full' data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler) data_shuffled = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler).shuffle() shape = Sigmoid('sigmoid_wide') fitter = Fitter(shape,sigma_prior='normal') fits = get_all_fits(data,fitter,allow_new_computation=False) fits_shuffled = get_all_fits(data_shuffled,fitter,allow_new_computation=False) R2_pairs = [(fit.LOO_score,fit2.LOO_score) for fit,fit2 in iterate_fits(fits,fits_shuffled)] R2 = np.array([r for r,r_shuffled in R2_pairs]) R2_shuffled = np.array([r_shuffled for r,r_shuffled in R2_pairs]) name = '{}-{}'.format(data.pathway,shape.cache_name()) fig = plot_score_distribution(R2,R2_shuffled) save_figure(fig,'RP/R2-distribution-{}.png'.format(name), under_results=True, b_close=True) mu_shuffled = np.mean(R2_shuffled) std_shuffled = np.std(R2_shuffled) z_scores = (R2-mu_shuffled)/std_shuffled fig = plot_z_scores(z_scores)