def show_loo_prediction(): print 'Drawing LOO prediction and error...' iLOO = 18 train_mask = np.arange(len(x)) != iLOO x_train = x[train_mask] y_train = y[train_mask] theta,_,_,_ = fitter.fit(x_train,y_train) fig = plot_one_series(series,shape,theta,yrange,train_mask=train_mask) save_figure(fig,'RP/methods-3-LOO-prediction.png', under_results=True)
def show_loo_prediction(): print 'Drawing LOO prediction and error...' iLOO = 18 train_mask = np.arange(len(x)) != iLOO x_train = x[train_mask] y_train = y[train_mask] theta, _, _, _ = fitter.fit(x_train, y_train) fig = plot_one_series(series, shape, theta, yrange, train_mask=train_mask) save_figure(fig, 'RP/methods-3-LOO-prediction.png', under_results=True)
def show_loo_score(): print 'Drawing LOO prediction for all points and R2 score...' theta, _, test_preds, _ = fitter.fit(x, y, loo=True) fig = plot_one_series(series, shape, theta=None, yrange=yrange, test_preds=test_preds) save_figure(fig, 'RP/methods-4-R2-score.png', under_results=True)
def do_gene_fits(data, gene, fitter, filename, b_show): fig = plot_gene(data,gene) if filename is None: ensure_dir(results_dir()) filename = join(results_dir(), 'fit.png') print 'Saving figure to {}'.format(filename) save_figure(fig, filename) if b_show: plt.show(block=True)
def do_one_fit(series, fitter, loo, filename, b_show): if fitter is not None: theta, sigma, LOO_predictions,_ = fitter.fit(series.ages, series.single_expression, loo=loo) fig = plot_one_series(series, fitter.shape, theta, LOO_predictions) else: fig = plot_one_series(series) if filename is None: ensure_dir(results_dir()) filename = join(results_dir(), 'fit.png') save_figure(fig, filename, print_filename=True) if b_show: plt.show(block=True)
def analyze_pathway(pathway, data, fitter, fits, html_only=False): print 80 * '=' print 'Analyzing pathway {}'.format(pathway) print 80 * '=' dct_tuples = {} for region in data.region_names: dct_region_tuples, region_correlations = analyze_one_region( data, fitter, fits, region) fig = plot_gene_correlations_single_region(region_correlations, region, data.gene_names) filename = join( 'RP', 'correlation-heat-map-{}-{}.png'.format(region, pathway)) save_figure(fig, filename, b_close=True, under_results=True) dct_tuples.update(dct_region_tuples) print_best_improvements(dct_tuples) tuples = dct_tuples.values() pairs = [(x[0], x[1]) for x in tuples] fig = plot_comparison_scatter(pairs, pathway) filename = join('RP', 'correlation-diff-scatter-{}.png'.format(pathway)) save_figure(fig, filename, b_close=True, under_results=True) fig = plot_comparison_bar(tuples) filename = join('RP', 'correlation-diff-bar-{}.png'.format(pathway)) save_figure(fig, filename, b_close=True, under_results=True) fig = plot_comparison_bar(tuples, several_levels=True) filename = join( 'RP', 'correlation-diff-bar-several-levels-{}.png'.format(pathway)) save_figure(fig, filename, b_close=True, under_results=True)
def fit_serveral_genes(series, fitter, loo, filename, b_show): if fitter is not None: theta, L, LOO_predictions,_ = fitter.fit(series.ages, series.expression, loo=loo) print 'L = {}'.format(L) fig = plot_series(series, fitter.shape, theta, LOO_predictions) else: fig = plot_series(series) if filename is None: ensure_dir(results_dir()) filename = join(results_dir(), 'fits.png') print 'Saving figure to {}'.format(filename) save_figure(fig, filename) if b_show: plt.show(block=True)
def analyze_pathway(pathway, data, fitter, fits, html_only=False): print 80 * '=' print 'Analyzing pathway {}'.format(pathway) print 80 * '=' dct_tuples = {} for region in data.region_names: dct_region_tuples, region_correlations = analyze_one_region(data, fitter, fits, region) fig = plot_gene_correlations_single_region(region_correlations, region, data.gene_names) filename = join('RP','correlation-heat-map-{}-{}.png'.format(region,pathway)) save_figure(fig, filename, b_close=True, under_results=True) dct_tuples.update(dct_region_tuples) print_best_improvements(dct_tuples) tuples = dct_tuples.values() pairs = [(x[0],x[1]) for x in tuples] fig = plot_comparison_scatter(pairs,pathway) filename = join('RP','correlation-diff-scatter-{}.png'.format(pathway)) save_figure(fig, filename, b_close=True, under_results=True) fig = plot_comparison_bar(tuples) filename = join('RP','correlation-diff-bar-{}.png'.format(pathway)) save_figure(fig, filename, b_close=True, under_results=True) fig = plot_comparison_bar(tuples, several_levels=True) filename = join('RP','correlation-diff-bar-several-levels-{}.png'.format(pathway)) save_figure(fig, filename, b_close=True, under_results=True)
def plot_means(dataset): min_age = min(dataset.ages) max_age = max(dataset.ages) min_expression = np.nanmin(dataset.expression.flat) max_expression = np.nanmax(dataset.expression.flat) center = np.empty(dataset.ages.shape) std_plus = np.empty(dataset.ages.shape) std_minus = np.empty(dataset.ages.shape) for i, age in enumerate(dataset.ages): a = dataset.expression[i, :, :].flat c = nanmean(a) s = nanstd(a) center[i] = c std_plus[i] = c + s std_minus[i] = c - s fig = plt.figure() ax = fig.add_axes([0.08, 0.15, 0.85, 0.8]) ax.set_ylabel('expression level', fontsize=cfg.fontsize) ax.set_xlabel('age', fontsize=cfg.fontsize) ax.set_title('Mean expression across all genes - {}'.format(dataset.name), fontsize=cfg.fontsize) # set the development stages as x labels stages = [stage.scaled(scaler) for stage in dev_stages] ax.set_xticks([stage.central_age for stage in stages]) ax.set_xticklabels([stage.short_name for stage in stages], fontsize=cfg.xtick_fontsize, fontstretch='condensed', rotation=90) ax.set_xlim([min_age, max_age]) # mark birth time with a vertical line ymin, ymax = ax.get_ylim() birth_age = scaler.scale(0) ax.plot([birth_age, birth_age], [ymin, ymax], '--', color='0.85') ax.plot([min_age, max_age], [min_expression, min_expression], '--g') ax.plot([min_age, max_age], [max_expression, max_expression], '--g') ax.plot(dataset.ages, center, 'bx') ax.plot(dataset.ages, std_plus, 'g-') ax.plot(dataset.ages, std_minus, 'g-') save_figure(fig, 'mean-expression-{}.png'.format(dataset.name), under_results=True)
def fit_serveral_genes(series, fitter, loo, filename, b_show): if fitter is not None: theta, L, LOO_predictions, _ = fitter.fit(series.ages, series.expression, loo=loo) print 'L = {}'.format(L) fig = plot_series(series, fitter.shape, theta, LOO_predictions) else: fig = plot_series(series) if filename is None: ensure_dir(results_dir()) filename = join(results_dir(), 'fits.png') print 'Saving figure to {}'.format(filename) save_figure(fig, filename) if b_show: plt.show(block=True)
def plot_means(dataset): min_age = min(dataset.ages) max_age = max(dataset.ages) min_expression = np.nanmin(dataset.expression.flat) max_expression = np.nanmax(dataset.expression.flat) center = np.empty(dataset.ages.shape) std_plus = np.empty(dataset.ages.shape) std_minus = np.empty(dataset.ages.shape) for i, age in enumerate(dataset.ages): a = dataset.expression[i, :, :].flat c = nanmean(a) s = nanstd(a) center[i] = c std_plus[i] = c + s std_minus[i] = c - s fig = plt.figure() ax = fig.add_axes([0.08, 0.15, 0.85, 0.8]) ax.set_ylabel("expression level", fontsize=cfg.fontsize) ax.set_xlabel("age", fontsize=cfg.fontsize) ax.set_title("Mean expression across all genes - {}".format(dataset.name), fontsize=cfg.fontsize) # set the development stages as x labels stages = [stage.scaled(scaler) for stage in dev_stages] ax.set_xticks([stage.central_age for stage in stages]) ax.set_xticklabels( [stage.short_name for stage in stages], fontsize=cfg.xtick_fontsize, fontstretch="condensed", rotation=90 ) ax.set_xlim([min_age, max_age]) # mark birth time with a vertical line ymin, ymax = ax.get_ylim() birth_age = scaler.scale(0) ax.plot([birth_age, birth_age], [ymin, ymax], "--", color="0.85") ax.plot([min_age, max_age], [min_expression, min_expression], "--g") ax.plot([min_age, max_age], [max_expression, max_expression], "--g") ax.plot(dataset.ages, center, "bx") ax.plot(dataset.ages, std_plus, "g-") ax.plot(dataset.ages, std_minus, "g-") save_figure(fig, "mean-expression-{}.png".format(dataset.name), under_results=True)
def run(self,duration): start = datetime.now() self.env.state = self.child.run() self.agent.on() if self.plot: save_figure(self.env.x,self.env.y,self.env.state,[self.agent.position[0]],[self.agent.position[1]]) while self.agent.active: self.agent.precepts(self.env.state) self.env.state = self.agent.effectors(self.env.state) if self.plot: save_figure(self.env.x,self.env.y,self.env.state,[self.agent.position[0]],[self.agent.position[1]]) else: sleep(0.5) if (datetime.now() - start).seconds >= duration: self.agent.gohome() self.agent.precepts(self.env.state) self.env.state = self.agent.effectors(self.env.state) if self.plot: save_figure(self.env.x,self.env.y,self.env.state,[self.agent.position[0]],[self.agent.position[1]]) else: sleep(0.5) rwd, clnd = self.agent.reward, self.agent.cleaned self.agent.reset() return rwd, clnd
fitters = [Fitter(shape, sigma_prior='normal') for shape in shapes] ##################################################### # Example fits ##################################################### GRs = [ ('ABHD4', 'STC', (5, 8)), ] for g, r, yrange in GRs: for fitter in fitters: print 'Doing {}@{}...'.format(g, r) series = data.get_one_series(g, r) theta, _, _, _ = fitter.fit(series.ages, series.single_expression) fig = plot_one_series(series, fitter.shape, theta, yrange) save_figure(fig, 'RP/fit-examples-{}-{}-{}.png'.format( fitter.shape.cache_name(), g, r), under_results=True) ##################################################### # Comparison for whole pathway ##################################################### pathway = '17full' data = data.restrict_pathway(pathway) fits = [ get_all_fits(data, fitter, allow_new_computation=False) for fitter in fitters ] fig = plot_comparison_bar(data, shapes, fits) save_figure(fig, 'RP/sigslope-comparison-bar-{}.png'.format(data.pathway),
('HTR1A', 'MFC'), ] n_bins = 50 n_samples = 10 disable_all_warnings() cfg.verbosity = 1 age_scaler = LogScaler() data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler) shape = Sigmoid(priors='sigmoid_wide') fitter = Fitter(shape, sigma_prior='normal') fits = get_all_fits(data, fitter, allow_new_computation=False) dirname = 'bootstrap' fits = add_change_distributions(data, fitter, fits, n_bins=n_bins) fig = plot_bootstrap_onset_variance(data, fits) save_figure(fig, '{}/onset-variance-{}.png'.format(dirname, pathway), under_results=True, b_close=True) fig = plot_change_width_scatter(data, fitter, fits) save_figure(fig, '{}/width-scatter-{}.png'.format(dirname, pathway), under_results=True, b_close=True) for g,r in gene_regions: ds_name = data.region_to_dataset()[r] fit = fits[ds_name][(g,r)] fig = plot_bootstrap_fits(data, fit, n_bins=n_bins, n_samples=n_samples) save_figure(fig, '{}/fits-{}-{}.png'.format(dirname,g,r), under_results=True, b_close=True) fig = plot_bootstrap_histograms(data, fit, n_bins=n_bins, n_samples=n_samples) save_figure(fig, '{}/transition-distribution-{}-{}.png'.format(dirname,g,r), under_results=True, b_close=True)
def basic_fit(): print 'Drawing basic fit...' theta, _, _, _ = fitter.fit(x, y) fig = plot_one_series(series, shape, theta, yrange) save_figure(fig, 'RP/methods-1-basic-fit.png', under_results=True)
r,pval = spearmanr(onset_times, range(len(regions))) return r,pval,lst_R2 lst_pathways = [ 'serotonin', 'dopamine', ] for pathway in lst_pathways: data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler) shape = Sigmoid(priors='sigmoid_wide') fitter = Fitter(shape, sigma_prior='normal') fits = get_all_fits(data, fitter, allow_new_computation=False) # R2_threshold = 0.5 YYY problem - we might be using bad fits. regions = ['OFC', 'M1C', 'S1C', 'IPC', 'V1C'] scores = [] for g in data.gene_names: r,pval,lst_R2 = get_gene_correlation(fits,g,regions) scores.append( (g,r,pval,lst_R2) ) fig = plot_correlation_histogram(scores,pathway) save_figure(fig,'{}/gradual-maturation-hist.png'.format(pathway,pathway), under_results=True, b_close=True) for fR2 in [np.mean]: #[min,max,np.mean]: fig = plot_scatter(scores, pathway, fR2) save_figure(fig,'{}/gradual-maturation-scatter-{}.png'.format(pathway,fR2.__name__), under_results=True, b_close=True) create_top_correlations_html(data,fitter,fits,scores,regions)
rho, pval = paired_spearman(pathway_mu) scores.append( (-np.log10(pval), pval, rho, pathway) ) scores.sort(reverse=True) save_scores(singles, scores, order) ############################################################## # main ############################################################## if __name__ == '__main__': cfg.verbosity = 1 parser = argparse.ArgumentParser() parser.add_argument('--list', help='Pathways list name. Default=brain_go_num_genes_min_15', default='brain_go_num_genes_min_15', choices=['all'] + pathway_lists.all_pathway_lists()) parser.add_argument('--cortex_only', help='Use only cortical regions', action='store_true') parser.add_argument('--draw', help='Draw plot for this pathway and exit') args = parser.parse_args() if args.cortex_only: order = 'V1C A1C S1C M1C DFC MFC OFC'.split() else: order = 'MD STR V1C OFC'.split() singles = SingleRegion(args.list) if args.draw is None: timing_vs_region_order(singles, order) else: pathway = args.draw fig = plot_pathway(singles, pathway, order) filename = 'spearman-{}.png'.format(pathway) save_figure(fig, filename, under_results=True)
cfg.verbosity = 1 age_scaler = LogScaler() data = GeneData.load('both').scale_ages(age_scaler) shapes = [ Sigmoid('sigmoid_wide'), Poly(1, 'poly1'), Poly(3, 'poly3'), Spline() ] GRs = [ ('ADRB1', 'A1C', (5, 8)), ('GLRA2', 'STC', (5, 12)), ('TUBA1A', 'V1C', (10, 14)), ] for g, r, yrange in GRs: print 'Doing {}@{}...'.format(g, r) thetas = [] for shape in shapes: series = data.get_one_series(g, r) sigma_prior = 'normal' if not isinstance(shape, Spline) else None fitter = Fitter(shape, sigma_prior=sigma_prior) theta, _, _, _ = fitter.fit(series.ages, series.single_expression) thetas.append(theta) fig = plot_one_series(series, shapes, thetas, yrange) save_figure(fig, 'RP/fit-examples-{}-{}.png'.format(g, r), under_results=True)
age_scaler = LogScaler() pathway = '17full' data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler) sigmoid = Sigslope(priors='sigslope80') spline = Spline() poly1 = Poly(1,priors='poly1') poly2 = Poly(2,priors='poly2') poly3 = Poly(3,priors='poly3') shapes = [sigmoid, spline, poly1, poly2, poly3] fitters = [Fitter(shape, sigma_prior='normal' if not shape.has_special_fitting() else None) for shape in shapes] fits = [get_all_fits(data,fitter,allow_new_computation=False) for fitter in fitters] fig = plot_comparison_bar(data, shapes, fits) save_figure(fig,'RP/shape-comparison-bar-{}.png'.format(data.pathway), under_results=True) #fig = plot_comparison_bar(data, shapes, fits, threshold_percentile=50) #save_figure(fig,'RP/shape-comparison-bar-{}-top-half.png'.format(data.pathway), under_results=True) fig = plot_comparison_over_R2_score(data, shapes, fits) save_figure(fig,'RP/shape-comparison-vs-R2-{}.png'.format(data.pathway), under_results=True) fig = plot_comparison_over_R2_score(data, shapes, fits, zoom=(0.3,1)) save_figure(fig,'RP/shape-comparison-vs-R2-{}-zoom.png'.format(data.pathway), under_results=True) for i in xrange(1,len(shapes)): fig = plot_comparison_scatter(data,shapes[0],fits[0],shapes[i],fits[i]) save_figure(fig,'RP/scatter-{}-{}-{}.png'.format(shapes[0],shapes[i],pathway), under_results=True) plt.close('all')
def basic_fit(): print 'Drawing basic fit...' theta,_,_,_ = fitter.fit(x,y) fig = plot_one_series(series,shape,theta,yrange) save_figure(fig,'RP/methods-1-basic-fit.png', under_results=True)
def annotate_parameters(): print 'Drawing fit with parameters...' theta,_,_,_ = fitter.fit(x,y) fig = plot_one_series(series,shape,theta, yrange, b_annotate=True) save_figure(fig,'RP/methods-2-sigmoid-params.png', under_results=True)
def annotate_parameters(): print 'Drawing fit with parameters...' theta, _, _, _ = fitter.fit(x, y) fig = plot_one_series(series, shape, theta, yrange, b_annotate=True) save_figure(fig, 'RP/methods-2-sigmoid-params.png', under_results=True)
disable_all_warnings() cfg.verbosity = 1 age_scaler = LogScaler() data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler) shape = Sigmoid(priors='sigmoid_wide') fitter = Fitter(shape, sigma_prior='normal') fits = get_all_fits(data, fitter, allow_new_computation=False) dirname = 'bootstrap' fits = add_change_distributions(data, fitter, fits, n_bins=n_bins) fig = plot_bootstrap_onset_variance(data, fits) save_figure(fig, '{}/onset-variance-{}.png'.format(dirname, pathway), under_results=True, b_close=True) fig = plot_change_width_scatter(data, fitter, fits) save_figure(fig, '{}/width-scatter-{}.png'.format(dirname, pathway), under_results=True, b_close=True) for g, r in gene_regions: ds_name = data.region_to_dataset()[r] fit = fits[ds_name][(g, r)] fig = plot_bootstrap_fits(data, fit, n_bins=n_bins, n_samples=n_samples) save_figure(fig, '{}/fits-{}-{}.png'.format(dirname, g, r), under_results=True,
shape = Sigmoid('sigmoid_wide') fitter = Fitter(shape, sigma_prior='normal') fits = get_all_fits(data, fitter, allow_new_computation=False) fits_shuffled = get_all_fits(data_shuffled, fitter, allow_new_computation=False) R2_pairs = [(fit.LOO_score, fit2.LOO_score) for fit, fit2 in iterate_fits(fits, fits_shuffled)] R2 = np.array([r for r, r_shuffled in R2_pairs]) R2_shuffled = np.array([r_shuffled for r, r_shuffled in R2_pairs]) name = '{}-{}'.format(data.pathway, shape.cache_name()) fig = plot_score_distribution(R2, R2_shuffled) save_figure(fig, 'RP/R2-distribution-{}.png'.format(name), under_results=True, b_close=True) mu_shuffled = np.mean(R2_shuffled) std_shuffled = np.std(R2_shuffled) z_scores = (R2 - mu_shuffled) / std_shuffled fig = plot_z_scores(z_scores) save_figure(fig, 'RP/R2-z-scores-{}.png'.format(name), under_results=True, b_close=True) T, signed_rank_p_value = wilcoxon(R2, R2_shuffled) maxShuffled = R2_shuffled.max() nAbove = np.count_nonzero(R2 > maxShuffled) nTotal = len(R2)
age_scaler = LogScaler() pathway = '17full' data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler) data_shuffled = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler).shuffle() shape = Sigmoid('sigmoid_wide') fitter = Fitter(shape,sigma_prior='normal') fits = get_all_fits(data,fitter,allow_new_computation=False) fits_shuffled = get_all_fits(data_shuffled,fitter,allow_new_computation=False) R2_pairs = [(fit.LOO_score,fit2.LOO_score) for fit,fit2 in iterate_fits(fits,fits_shuffled)] R2 = np.array([r for r,r_shuffled in R2_pairs]) R2_shuffled = np.array([r_shuffled for r,r_shuffled in R2_pairs]) name = '{}-{}'.format(data.pathway,shape.cache_name()) fig = plot_score_distribution(R2,R2_shuffled) save_figure(fig,'RP/R2-distribution-{}.png'.format(name), under_results=True, b_close=True) mu_shuffled = np.mean(R2_shuffled) std_shuffled = np.std(R2_shuffled) z_scores = (R2-mu_shuffled)/std_shuffled fig = plot_z_scores(z_scores) save_figure(fig,'RP/R2-z-scores-{}.png'.format(name), under_results=True, b_close=True) T, signed_rank_p_value = wilcoxon(R2, R2_shuffled) maxShuffled = R2_shuffled.max() nAbove = np.count_nonzero(R2 > maxShuffled) nTotal = len(R2) pct = 100.0 * nAbove/nTotal filename = join(results_dir(),'RP/R2-distribution-{}.txt'.format(name)) with open(filename,'w') as f: print('shuffled = {:.2g} +/- {:.2g}'.format(mu_shuffled,std_shuffled), file=f)
for pathway in lst_pathways: data = GeneData.load('both').restrict_pathway(pathway).scale_ages( age_scaler) shape = Sigmoid(priors='sigmoid_wide') fitter = Fitter(shape, sigma_prior='normal') fits = get_all_fits(data, fitter, allow_new_computation=False) # R2_threshold = 0.5 YYY problem - we might be using bad fits. regions = ['OFC', 'M1C', 'S1C', 'IPC', 'V1C'] scores = [] for g in data.gene_names: r, pval, lst_R2 = get_gene_correlation(fits, g, regions) scores.append((g, r, pval, lst_R2)) fig = plot_correlation_histogram(scores, pathway) save_figure(fig, '{}/gradual-maturation-hist.png'.format(pathway, pathway), under_results=True, b_close=True) for fR2 in [np.mean]: #[min,max,np.mean]: fig = plot_scatter(scores, pathway, fR2) save_figure(fig, '{}/gradual-maturation-scatter-{}.png'.format( pathway, fR2.__name__), under_results=True, b_close=True) create_top_correlations_html(data, fitter, fits, scores, regions)
nScores, yScores = zip(*score_pairs) _, pval = scipy.stats.wilcoxon(nScores, yScores) pval = pval/2 # one sided p-value print '*** wilcoxon signed rank p-value (one sided) = {:.3g}'.format(pval) # find examples of best improvements diffs = [(f2.LOO_score-f1.LOO_score, f1.LOO_score, f2.LOO_score, g, r) for dsname,g,r,f1,f2 in iterate_fits(nFits, yFits, R2_threshold=-1, return_keys=True)] diffs.sort(reverse=True) print 'Gene/Regions for which priors produce best R2 improvement:' for i,(delta,R2_without, R2_with, g,r) in enumerate(diffs[:10]): print '{i}) {g}@{r}, delta-R2={delta:.3g}. R2_without={R2_without:.3g}, R2_with={R2_with:.3g}'.format(**locals()) cfg.verbosity = 1 age_scaler = LogScaler() pathway = '17full' data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler) priors_name = 'sigslope80' analyze_paired_scores_with_and_without_priors() variations = [analyze_variant(t,s) for t,s in product([False,True],[False,True])] fig = plot_bar(variations) save_figure(fig,'RP/prior-variations-bar.png', under_results=True) fig = plot_pctiles(variations, min_q=5) save_figure(fig,'RP/prior-variations-percentiles.png', under_results=True) #fig = plot_theta_diff_scatter() #save_figure(fig,'RP/prior-variations-scatter.png', under_results=True)
shape = Sigmoid(priors='sigmoid_wide') fitter = Fitter(shape, sigma_prior='normal') fits = get_all_fits(data, fitter) R2_threshold = 0.5 for b_unique in [False, True]: dct_pathways = load_17_pathways_breakdown(b_unique) dct_pathways['17 pathways'] = None for name, genes in dct_pathways.iteritems(): fig = plot_onset_times(all_data, data, fitter, fits, {name: genes}, R2_threshold, b_unique) str_dir = 'unique' if b_unique else 'overlapping' str_unique = ' (unique)' if b_unique else '' filename = 'RP/{}/change-distributions-{}{}.png'.format( str_dir, name, str_unique) save_figure(fig, filename, under_results=True) # selected plots lst_pathways = [ '17 pathways', 'Amphetamine addiction', 'Cholinergic synapse', 'Cocaine addiction', 'Glutamatergic synapse' ] dct_pathways = {k: dct_pathways[k] for k in lst_pathways} fig = plot_onset_times(all_data, data, fitter, fits, dct_pathways, R2_threshold, b_unique) str_dir = 'unique' if b_unique else 'overlapping' str_unique = ' (unique)' if b_unique else '' filename = 'RP/{}/selected-change-distributions{}.png'.format( str_dir, str_unique) save_figure(fig, filename, under_results=True)
shapes = [sigmoid, sigslope] fitters = [Fitter(shape, sigma_prior='normal') for shape in shapes] ##################################################### # Example fits ##################################################### GRs = [ ('ABHD4','STC', (5, 8)), ] for g,r,yrange in GRs: for fitter in fitters: print 'Doing {}@{}...'.format(g,r) series = data.get_one_series(g,r) theta,_,_,_ = fitter.fit(series.ages, series.single_expression) fig = plot_one_series(series, fitter.shape, theta, yrange) save_figure(fig,'RP/fit-examples-{}-{}-{}.png'.format(fitter.shape.cache_name(), g,r), under_results=True) ##################################################### # Comparison for whole pathway ##################################################### pathway = '17full' data = data.restrict_pathway(pathway) fits = [get_all_fits(data,fitter,allow_new_computation=False) for fitter in fitters] fig = plot_comparison_bar(data, shapes, fits) save_figure(fig,'RP/sigslope-comparison-bar-{}.png'.format(data.pathway), under_results=True) fig = plot_comparison_over_R2_score(data, shapes, fits) save_figure(fig,'RP/sigslope-comparison-vs-R2-{}.png'.format(data.pathway), under_results=True)
if __name__ == '__main__': disable_all_warnings() parser = get_common_parser() parser.add_argument('--shape2', required=True, help='The shape to compare against', choices=allowed_shape_names()) parser.add_argument('--scaling2', help='The scaling used when fitting shape2. Default: none', choices=allowed_scaler_names()) parser.add_argument('--sigma_prior2', help='Prior to use for 1/sigma when fitting shape2. Default: None', choices=get_allowed_priors(is_sigma=True)) parser.add_argument('--priors2', help='The priors used for theta when fitting shape2. Default: None', choices=get_allowed_priors()) parser.add_argument('--filename', help='Where to save the figure. Default: results/comparison.png') parser.add_argument('--show', help='Show figure and wait before exiting', action='store_true') parser.add_argument('--ndiffs', type=int, default=5, help='Number of top diffs to show. Default=5.') args = parser.parse_args() data1, fitter1 = process_common_inputs(args) data2 = get_data_from_args(args.dataset, args.pathway, args.from_age, args.scaling2, args.shuffle) fitter2 = get_fitter_from_args(args.shape2, args.priors2, args.sigma_prior2) fits1 = get_all_fits(data1,fitter1) fits2 = get_all_fits(data2,fitter2) print_diff_points(data1,fitter1,fits1, data2,fitter2,fits2, args.ndiffs) fig = plot_comparison_scatter(data1,fitter1,fits1, data2,fitter2,fits2) filename = args.filename if filename is None: ensure_dir(results_dir()) filename = join(results_dir(), 'shape_comparison.png') save_figure(fig, filename) if args.show: plt.show(block=True)
def show_loo_score(): print 'Drawing LOO prediction for all points and R2 score...' theta,_,test_preds,_ = fitter.fit(x,y,loo=True) fig = plot_one_series(series,shape,theta=None,yrange=yrange,test_preds=test_preds) save_figure(fig,'RP/methods-4-R2-score.png', under_results=True)
ticks = ax.get_yticks() ticks = np.array([ticks[0], ticks[-1]]) ax.set_yticks(ticks) ax.set_yticklabels(['{:g}'.format(t) for t in ticks], fontsize=fontsize) return fig cfg.verbosity = 1 age_scaler = LogScaler() data = GeneData.load('both').scale_ages(age_scaler) shapes = [Sigmoid('sigmoid_wide'), Poly(1,'poly1'), Poly(3,'poly3'), Spline()] GRs = [ ('ADRB1','A1C', (5, 8)), ('GLRA2','STC', (5, 12)), ('TUBA1A','V1C', (10, 14)), ] for g,r,yrange in GRs: print 'Doing {}@{}...'.format(g,r) thetas = [] for shape in shapes: series = data.get_one_series(g,r) sigma_prior = 'normal' if not isinstance(shape,Spline) else None fitter = Fitter(shape, sigma_prior=sigma_prior) theta,_,_,_ = fitter.fit(series.ages, series.single_expression) thetas.append(theta) fig = plot_one_series(series,shapes,thetas,yrange) save_figure(fig,'RP/fit-examples-{}-{}.png'.format(g,r), under_results=True)
print '*** wilcoxon signed rank p-value (one sided) = {:.3g}'.format(pval) # find examples of best improvements diffs = [(f2.LOO_score - f1.LOO_score, f1.LOO_score, f2.LOO_score, g, r) for dsname, g, r, f1, f2 in iterate_fits( nFits, yFits, R2_threshold=-1, return_keys=True)] diffs.sort(reverse=True) print 'Gene/Regions for which priors produce best R2 improvement:' for i, (delta, R2_without, R2_with, g, r) in enumerate(diffs[:10]): print '{i}) {g}@{r}, delta-R2={delta:.3g}. R2_without={R2_without:.3g}, R2_with={R2_with:.3g}'.format( **locals()) cfg.verbosity = 1 age_scaler = LogScaler() pathway = '17full' data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler) priors_name = 'sigslope80' analyze_paired_scores_with_and_without_priors() variations = [ analyze_variant(t, s) for t, s in product([False, True], [False, True]) ] fig = plot_bar(variations) save_figure(fig, 'RP/prior-variations-bar.png', under_results=True) fig = plot_pctiles(variations, min_q=5) save_figure(fig, 'RP/prior-variations-percentiles.png', under_results=True) #fig = plot_theta_diff_scatter() #save_figure(fig,'RP/prior-variations-scatter.png', under_results=True)