Example #1
0
def show_loo_prediction():
    print 'Drawing LOO prediction and error...'
    iLOO = 18
    train_mask = np.arange(len(x)) != iLOO
    x_train = x[train_mask]
    y_train = y[train_mask]
    theta,_,_,_ = fitter.fit(x_train,y_train)
    fig = plot_one_series(series,shape,theta,yrange,train_mask=train_mask)
    save_figure(fig,'RP/methods-3-LOO-prediction.png', under_results=True)
Example #2
0
def show_loo_prediction():
    print 'Drawing LOO prediction and error...'
    iLOO = 18
    train_mask = np.arange(len(x)) != iLOO
    x_train = x[train_mask]
    y_train = y[train_mask]
    theta, _, _, _ = fitter.fit(x_train, y_train)
    fig = plot_one_series(series, shape, theta, yrange, train_mask=train_mask)
    save_figure(fig, 'RP/methods-3-LOO-prediction.png', under_results=True)
Example #3
0
def show_loo_score():
    print 'Drawing LOO prediction for all points and R2 score...'
    theta, _, test_preds, _ = fitter.fit(x, y, loo=True)
    fig = plot_one_series(series,
                          shape,
                          theta=None,
                          yrange=yrange,
                          test_preds=test_preds)
    save_figure(fig, 'RP/methods-4-R2-score.png', under_results=True)
Example #4
0
def do_gene_fits(data, gene, fitter, filename, b_show):
    fig = plot_gene(data,gene)
    if filename is None:
        ensure_dir(results_dir())
        filename = join(results_dir(), 'fit.png')
    print 'Saving figure to {}'.format(filename)
    save_figure(fig, filename)
    if b_show:
        plt.show(block=True)
Example #5
0
def do_one_fit(series, fitter, loo, filename, b_show):
    if fitter is not None:
        theta, sigma, LOO_predictions,_ = fitter.fit(series.ages, series.single_expression, loo=loo)
        fig = plot_one_series(series, fitter.shape, theta, LOO_predictions)
    else:
        fig = plot_one_series(series)
    if filename is None:
        ensure_dir(results_dir())
        filename = join(results_dir(), 'fit.png')
    save_figure(fig, filename, print_filename=True)
    if b_show:
        plt.show(block=True)
Example #6
0
def analyze_pathway(pathway, data, fitter, fits, html_only=False):
    print 80 * '='
    print 'Analyzing pathway {}'.format(pathway)
    print 80 * '='
    dct_tuples = {}
    for region in data.region_names:
        dct_region_tuples, region_correlations = analyze_one_region(
            data, fitter, fits, region)
        fig = plot_gene_correlations_single_region(region_correlations, region,
                                                   data.gene_names)
        filename = join(
            'RP', 'correlation-heat-map-{}-{}.png'.format(region, pathway))
        save_figure(fig, filename, b_close=True, under_results=True)
        dct_tuples.update(dct_region_tuples)

    print_best_improvements(dct_tuples)

    tuples = dct_tuples.values()
    pairs = [(x[0], x[1]) for x in tuples]
    fig = plot_comparison_scatter(pairs, pathway)
    filename = join('RP', 'correlation-diff-scatter-{}.png'.format(pathway))
    save_figure(fig, filename, b_close=True, under_results=True)

    fig = plot_comparison_bar(tuples)
    filename = join('RP', 'correlation-diff-bar-{}.png'.format(pathway))
    save_figure(fig, filename, b_close=True, under_results=True)
    fig = plot_comparison_bar(tuples, several_levels=True)
    filename = join(
        'RP', 'correlation-diff-bar-several-levels-{}.png'.format(pathway))
    save_figure(fig, filename, b_close=True, under_results=True)
Example #7
0
def fit_serveral_genes(series, fitter, loo, filename, b_show):
    if fitter is not None:
        theta, L, LOO_predictions,_ = fitter.fit(series.ages, series.expression, loo=loo)
        print 'L = {}'.format(L)
        fig = plot_series(series, fitter.shape, theta, LOO_predictions)
    else:
        fig = plot_series(series)
    if filename is None:
        ensure_dir(results_dir())
        filename = join(results_dir(), 'fits.png')
    print 'Saving figure to {}'.format(filename)
    save_figure(fig, filename)
    if b_show:
        plt.show(block=True)
def analyze_pathway(pathway, data, fitter, fits, html_only=False):
    print 80 * '='
    print 'Analyzing pathway {}'.format(pathway)
    print 80 * '='
    dct_tuples = {}
    for region in data.region_names:
        dct_region_tuples, region_correlations = analyze_one_region(data, fitter, fits, region)
        fig = plot_gene_correlations_single_region(region_correlations, region, data.gene_names)
        filename = join('RP','correlation-heat-map-{}-{}.png'.format(region,pathway))
        save_figure(fig, filename, b_close=True, under_results=True)
        dct_tuples.update(dct_region_tuples)
        
    print_best_improvements(dct_tuples)
    
    tuples = dct_tuples.values()
    pairs = [(x[0],x[1]) for x in tuples]
    fig = plot_comparison_scatter(pairs,pathway)
    filename = join('RP','correlation-diff-scatter-{}.png'.format(pathway))
    save_figure(fig, filename, b_close=True, under_results=True)

    fig = plot_comparison_bar(tuples)
    filename = join('RP','correlation-diff-bar-{}.png'.format(pathway))
    save_figure(fig, filename, b_close=True, under_results=True)
    fig = plot_comparison_bar(tuples, several_levels=True)
    filename = join('RP','correlation-diff-bar-several-levels-{}.png'.format(pathway))
    save_figure(fig, filename, b_close=True, under_results=True)
Example #9
0
def plot_means(dataset):
    min_age = min(dataset.ages)
    max_age = max(dataset.ages)
    min_expression = np.nanmin(dataset.expression.flat)
    max_expression = np.nanmax(dataset.expression.flat)

    center = np.empty(dataset.ages.shape)
    std_plus = np.empty(dataset.ages.shape)
    std_minus = np.empty(dataset.ages.shape)
    for i, age in enumerate(dataset.ages):
        a = dataset.expression[i, :, :].flat
        c = nanmean(a)
        s = nanstd(a)
        center[i] = c
        std_plus[i] = c + s
        std_minus[i] = c - s

    fig = plt.figure()
    ax = fig.add_axes([0.08, 0.15, 0.85, 0.8])

    ax.set_ylabel('expression level', fontsize=cfg.fontsize)
    ax.set_xlabel('age', fontsize=cfg.fontsize)
    ax.set_title('Mean expression across all genes - {}'.format(dataset.name),
                 fontsize=cfg.fontsize)

    # set the development stages as x labels
    stages = [stage.scaled(scaler) for stage in dev_stages]
    ax.set_xticks([stage.central_age for stage in stages])
    ax.set_xticklabels([stage.short_name for stage in stages],
                       fontsize=cfg.xtick_fontsize,
                       fontstretch='condensed',
                       rotation=90)
    ax.set_xlim([min_age, max_age])

    # mark birth time with a vertical line
    ymin, ymax = ax.get_ylim()
    birth_age = scaler.scale(0)
    ax.plot([birth_age, birth_age], [ymin, ymax], '--', color='0.85')

    ax.plot([min_age, max_age], [min_expression, min_expression], '--g')
    ax.plot([min_age, max_age], [max_expression, max_expression], '--g')
    ax.plot(dataset.ages, center, 'bx')
    ax.plot(dataset.ages, std_plus, 'g-')
    ax.plot(dataset.ages, std_minus, 'g-')

    save_figure(fig,
                'mean-expression-{}.png'.format(dataset.name),
                under_results=True)
Example #10
0
def fit_serveral_genes(series, fitter, loo, filename, b_show):
    if fitter is not None:
        theta, L, LOO_predictions, _ = fitter.fit(series.ages,
                                                  series.expression,
                                                  loo=loo)
        print 'L = {}'.format(L)
        fig = plot_series(series, fitter.shape, theta, LOO_predictions)
    else:
        fig = plot_series(series)
    if filename is None:
        ensure_dir(results_dir())
        filename = join(results_dir(), 'fits.png')
    print 'Saving figure to {}'.format(filename)
    save_figure(fig, filename)
    if b_show:
        plt.show(block=True)
Example #11
0
def plot_means(dataset):
    min_age = min(dataset.ages)
    max_age = max(dataset.ages)
    min_expression = np.nanmin(dataset.expression.flat)
    max_expression = np.nanmax(dataset.expression.flat)

    center = np.empty(dataset.ages.shape)
    std_plus = np.empty(dataset.ages.shape)
    std_minus = np.empty(dataset.ages.shape)
    for i, age in enumerate(dataset.ages):
        a = dataset.expression[i, :, :].flat
        c = nanmean(a)
        s = nanstd(a)
        center[i] = c
        std_plus[i] = c + s
        std_minus[i] = c - s

    fig = plt.figure()
    ax = fig.add_axes([0.08, 0.15, 0.85, 0.8])

    ax.set_ylabel("expression level", fontsize=cfg.fontsize)
    ax.set_xlabel("age", fontsize=cfg.fontsize)
    ax.set_title("Mean expression across all genes - {}".format(dataset.name), fontsize=cfg.fontsize)

    # set the development stages as x labels
    stages = [stage.scaled(scaler) for stage in dev_stages]
    ax.set_xticks([stage.central_age for stage in stages])
    ax.set_xticklabels(
        [stage.short_name for stage in stages], fontsize=cfg.xtick_fontsize, fontstretch="condensed", rotation=90
    )
    ax.set_xlim([min_age, max_age])

    # mark birth time with a vertical line
    ymin, ymax = ax.get_ylim()
    birth_age = scaler.scale(0)
    ax.plot([birth_age, birth_age], [ymin, ymax], "--", color="0.85")

    ax.plot([min_age, max_age], [min_expression, min_expression], "--g")
    ax.plot([min_age, max_age], [max_expression, max_expression], "--g")
    ax.plot(dataset.ages, center, "bx")
    ax.plot(dataset.ages, std_plus, "g-")
    ax.plot(dataset.ages, std_minus, "g-")

    save_figure(fig, "mean-expression-{}.png".format(dataset.name), under_results=True)
Example #12
0
 def run(self,duration):
     start = datetime.now()
     self.env.state = self.child.run()
     self.agent.on()
     if self.plot: save_figure(self.env.x,self.env.y,self.env.state,[self.agent.position[0]],[self.agent.position[1]])
     while self.agent.active:
         self.agent.precepts(self.env.state)
         self.env.state = self.agent.effectors(self.env.state)
         if self.plot: save_figure(self.env.x,self.env.y,self.env.state,[self.agent.position[0]],[self.agent.position[1]])
         else: sleep(0.5)
         if (datetime.now() - start).seconds >= duration:
             self.agent.gohome()
             self.agent.precepts(self.env.state)
             self.env.state = self.agent.effectors(self.env.state)
             if self.plot: save_figure(self.env.x,self.env.y,self.env.state,[self.agent.position[0]],[self.agent.position[1]])
             else: sleep(0.5)
     rwd, clnd = self.agent.reward, self.agent.cleaned
     self.agent.reset()
     return rwd, clnd
Example #13
0
fitters = [Fitter(shape, sigma_prior='normal') for shape in shapes]

#####################################################
# Example fits
#####################################################
GRs = [
    ('ABHD4', 'STC', (5, 8)),
]
for g, r, yrange in GRs:
    for fitter in fitters:
        print 'Doing {}@{}...'.format(g, r)
        series = data.get_one_series(g, r)
        theta, _, _, _ = fitter.fit(series.ages, series.single_expression)
        fig = plot_one_series(series, fitter.shape, theta, yrange)
        save_figure(fig,
                    'RP/fit-examples-{}-{}-{}.png'.format(
                        fitter.shape.cache_name(), g, r),
                    under_results=True)

#####################################################
# Comparison for whole pathway
#####################################################
pathway = '17full'
data = data.restrict_pathway(pathway)
fits = [
    get_all_fits(data, fitter, allow_new_computation=False)
    for fitter in fitters
]

fig = plot_comparison_bar(data, shapes, fits)
save_figure(fig,
            'RP/sigslope-comparison-bar-{}.png'.format(data.pathway),
Example #14
0
    ('HTR1A', 'MFC'),
]
n_bins = 50
n_samples = 10

disable_all_warnings()   
cfg.verbosity = 1
age_scaler = LogScaler()

data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler)
shape = Sigmoid(priors='sigmoid_wide')
fitter = Fitter(shape, sigma_prior='normal')
fits = get_all_fits(data, fitter, allow_new_computation=False)

dirname = 'bootstrap'
fits = add_change_distributions(data, fitter, fits, n_bins=n_bins)

fig = plot_bootstrap_onset_variance(data, fits)
save_figure(fig, '{}/onset-variance-{}.png'.format(dirname, pathway), under_results=True, b_close=True)

fig = plot_change_width_scatter(data, fitter, fits)
save_figure(fig, '{}/width-scatter-{}.png'.format(dirname, pathway), under_results=True, b_close=True)

for g,r in gene_regions:
    ds_name = data.region_to_dataset()[r]
    fit = fits[ds_name][(g,r)]
    fig = plot_bootstrap_fits(data, fit, n_bins=n_bins, n_samples=n_samples)
    save_figure(fig, '{}/fits-{}-{}.png'.format(dirname,g,r), under_results=True, b_close=True)
    fig = plot_bootstrap_histograms(data, fit, n_bins=n_bins, n_samples=n_samples)
    save_figure(fig, '{}/transition-distribution-{}-{}.png'.format(dirname,g,r), under_results=True, b_close=True)
Example #15
0
def basic_fit():
    print 'Drawing basic fit...'
    theta, _, _, _ = fitter.fit(x, y)
    fig = plot_one_series(series, shape, theta, yrange)
    save_figure(fig, 'RP/methods-1-basic-fit.png', under_results=True)
Example #16
0
    r,pval = spearmanr(onset_times, range(len(regions)))
    return r,pval,lst_R2

lst_pathways = [
    'serotonin',
    'dopamine',
]

for pathway in lst_pathways:
    data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler)
    shape = Sigmoid(priors='sigmoid_wide')
    fitter = Fitter(shape, sigma_prior='normal')
    fits = get_all_fits(data, fitter, allow_new_computation=False)
    # R2_threshold = 0.5 YYY problem - we might be using bad fits.
    
    regions = ['OFC', 'M1C', 'S1C', 'IPC', 'V1C']
    
    scores = []
    for g in data.gene_names:
        r,pval,lst_R2 = get_gene_correlation(fits,g,regions)
        scores.append( (g,r,pval,lst_R2) )
    
    fig = plot_correlation_histogram(scores,pathway)
    save_figure(fig,'{}/gradual-maturation-hist.png'.format(pathway,pathway), under_results=True, b_close=True)
    
    for fR2 in [np.mean]: #[min,max,np.mean]:
        fig = plot_scatter(scores, pathway, fR2)
        save_figure(fig,'{}/gradual-maturation-scatter-{}.png'.format(pathway,fR2.__name__), under_results=True, b_close=True)
    
    create_top_correlations_html(data,fitter,fits,scores,regions)
        rho, pval = paired_spearman(pathway_mu)
        scores.append( (-np.log10(pval), pval, rho, pathway) )
    scores.sort(reverse=True) 
    save_scores(singles, scores, order)
    
##############################################################
# main
##############################################################
if __name__ == '__main__':
    cfg.verbosity = 1
    
    parser = argparse.ArgumentParser()
    parser.add_argument('--list', help='Pathways list name. Default=brain_go_num_genes_min_15', default='brain_go_num_genes_min_15', choices=['all'] + pathway_lists.all_pathway_lists())
    parser.add_argument('--cortex_only', help='Use only cortical regions', action='store_true')
    parser.add_argument('--draw', help='Draw plot for this pathway and exit')
    args = parser.parse_args()

    if args.cortex_only:
        order = 'V1C A1C S1C M1C DFC MFC OFC'.split()
    else:
        order = 'MD STR V1C OFC'.split()

    singles = SingleRegion(args.list)
    if args.draw is None:
        timing_vs_region_order(singles, order)
    else:
        pathway = args.draw
        fig = plot_pathway(singles, pathway, order)
        filename = 'spearman-{}.png'.format(pathway)
        save_figure(fig, filename, under_results=True)
Example #18
0
cfg.verbosity = 1
age_scaler = LogScaler()

data = GeneData.load('both').scale_ages(age_scaler)

shapes = [
    Sigmoid('sigmoid_wide'),
    Poly(1, 'poly1'),
    Poly(3, 'poly3'),
    Spline()
]
GRs = [
    ('ADRB1', 'A1C', (5, 8)),
    ('GLRA2', 'STC', (5, 12)),
    ('TUBA1A', 'V1C', (10, 14)),
]

for g, r, yrange in GRs:
    print 'Doing {}@{}...'.format(g, r)
    thetas = []
    for shape in shapes:
        series = data.get_one_series(g, r)
        sigma_prior = 'normal' if not isinstance(shape, Spline) else None
        fitter = Fitter(shape, sigma_prior=sigma_prior)
        theta, _, _, _ = fitter.fit(series.ages, series.single_expression)
        thetas.append(theta)
    fig = plot_one_series(series, shapes, thetas, yrange)
    save_figure(fig,
                'RP/fit-examples-{}-{}.png'.format(g, r),
                under_results=True)
Example #19
0
age_scaler = LogScaler()
pathway = '17full'
data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler)

sigmoid = Sigslope(priors='sigslope80')
spline = Spline()
poly1 = Poly(1,priors='poly1')
poly2 = Poly(2,priors='poly2')
poly3 = Poly(3,priors='poly3')
shapes = [sigmoid, spline, poly1, poly2, poly3]

fitters = [Fitter(shape, sigma_prior='normal' if not shape.has_special_fitting() else None) for shape in shapes]
fits = [get_all_fits(data,fitter,allow_new_computation=False) for fitter in fitters]

fig = plot_comparison_bar(data, shapes, fits)
save_figure(fig,'RP/shape-comparison-bar-{}.png'.format(data.pathway), under_results=True)

#fig = plot_comparison_bar(data, shapes, fits, threshold_percentile=50)
#save_figure(fig,'RP/shape-comparison-bar-{}-top-half.png'.format(data.pathway), under_results=True)

fig = plot_comparison_over_R2_score(data, shapes, fits)
save_figure(fig,'RP/shape-comparison-vs-R2-{}.png'.format(data.pathway), under_results=True)

fig = plot_comparison_over_R2_score(data, shapes, fits, zoom=(0.3,1))
save_figure(fig,'RP/shape-comparison-vs-R2-{}-zoom.png'.format(data.pathway), under_results=True)

for i in xrange(1,len(shapes)):
    fig = plot_comparison_scatter(data,shapes[0],fits[0],shapes[i],fits[i])
    save_figure(fig,'RP/scatter-{}-{}-{}.png'.format(shapes[0],shapes[i],pathway), under_results=True)

plt.close('all')
Example #20
0
def basic_fit():
    print 'Drawing basic fit...'
    theta,_,_,_ = fitter.fit(x,y)
    fig = plot_one_series(series,shape,theta,yrange)
    save_figure(fig,'RP/methods-1-basic-fit.png', under_results=True)
Example #21
0
def annotate_parameters():
    print 'Drawing fit with parameters...'
    theta,_,_,_ = fitter.fit(x,y)
    fig = plot_one_series(series,shape,theta, yrange, b_annotate=True)
    save_figure(fig,'RP/methods-2-sigmoid-params.png', under_results=True)
Example #22
0
def annotate_parameters():
    print 'Drawing fit with parameters...'
    theta, _, _, _ = fitter.fit(x, y)
    fig = plot_one_series(series, shape, theta, yrange, b_annotate=True)
    save_figure(fig, 'RP/methods-2-sigmoid-params.png', under_results=True)
Example #23
0
disable_all_warnings()
cfg.verbosity = 1
age_scaler = LogScaler()

data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler)
shape = Sigmoid(priors='sigmoid_wide')
fitter = Fitter(shape, sigma_prior='normal')
fits = get_all_fits(data, fitter, allow_new_computation=False)

dirname = 'bootstrap'
fits = add_change_distributions(data, fitter, fits, n_bins=n_bins)

fig = plot_bootstrap_onset_variance(data, fits)
save_figure(fig,
            '{}/onset-variance-{}.png'.format(dirname, pathway),
            under_results=True,
            b_close=True)

fig = plot_change_width_scatter(data, fitter, fits)
save_figure(fig,
            '{}/width-scatter-{}.png'.format(dirname, pathway),
            under_results=True,
            b_close=True)

for g, r in gene_regions:
    ds_name = data.region_to_dataset()[r]
    fit = fits[ds_name][(g, r)]
    fig = plot_bootstrap_fits(data, fit, n_bins=n_bins, n_samples=n_samples)
    save_figure(fig,
                '{}/fits-{}-{}.png'.format(dirname, g, r),
                under_results=True,
Example #24
0
shape = Sigmoid('sigmoid_wide')
fitter = Fitter(shape, sigma_prior='normal')
fits = get_all_fits(data, fitter, allow_new_computation=False)
fits_shuffled = get_all_fits(data_shuffled,
                             fitter,
                             allow_new_computation=False)
R2_pairs = [(fit.LOO_score, fit2.LOO_score)
            for fit, fit2 in iterate_fits(fits, fits_shuffled)]
R2 = np.array([r for r, r_shuffled in R2_pairs])
R2_shuffled = np.array([r_shuffled for r, r_shuffled in R2_pairs])

name = '{}-{}'.format(data.pathway, shape.cache_name())
fig = plot_score_distribution(R2, R2_shuffled)
save_figure(fig,
            'RP/R2-distribution-{}.png'.format(name),
            under_results=True,
            b_close=True)

mu_shuffled = np.mean(R2_shuffled)
std_shuffled = np.std(R2_shuffled)
z_scores = (R2 - mu_shuffled) / std_shuffled
fig = plot_z_scores(z_scores)
save_figure(fig,
            'RP/R2-z-scores-{}.png'.format(name),
            under_results=True,
            b_close=True)

T, signed_rank_p_value = wilcoxon(R2, R2_shuffled)
maxShuffled = R2_shuffled.max()
nAbove = np.count_nonzero(R2 > maxShuffled)
nTotal = len(R2)
Example #25
0
age_scaler = LogScaler()
pathway = '17full'
data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler)
data_shuffled = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler).shuffle()

shape = Sigmoid('sigmoid_wide')
fitter = Fitter(shape,sigma_prior='normal')
fits = get_all_fits(data,fitter,allow_new_computation=False)
fits_shuffled = get_all_fits(data_shuffled,fitter,allow_new_computation=False)
R2_pairs = [(fit.LOO_score,fit2.LOO_score) for fit,fit2 in iterate_fits(fits,fits_shuffled)]
R2 = np.array([r for r,r_shuffled in R2_pairs])
R2_shuffled = np.array([r_shuffled for r,r_shuffled in R2_pairs])

name = '{}-{}'.format(data.pathway,shape.cache_name())
fig = plot_score_distribution(R2,R2_shuffled)
save_figure(fig,'RP/R2-distribution-{}.png'.format(name), under_results=True, b_close=True)

mu_shuffled = np.mean(R2_shuffled)
std_shuffled = np.std(R2_shuffled)
z_scores = (R2-mu_shuffled)/std_shuffled
fig = plot_z_scores(z_scores)
save_figure(fig,'RP/R2-z-scores-{}.png'.format(name), under_results=True, b_close=True)

T, signed_rank_p_value = wilcoxon(R2, R2_shuffled)
maxShuffled = R2_shuffled.max()
nAbove = np.count_nonzero(R2 > maxShuffled)
nTotal = len(R2)
pct = 100.0 * nAbove/nTotal
filename = join(results_dir(),'RP/R2-distribution-{}.txt'.format(name))
with open(filename,'w') as f:
    print('shuffled = {:.2g} +/- {:.2g}'.format(mu_shuffled,std_shuffled), file=f)
Example #26
0
for pathway in lst_pathways:
    data = GeneData.load('both').restrict_pathway(pathway).scale_ages(
        age_scaler)
    shape = Sigmoid(priors='sigmoid_wide')
    fitter = Fitter(shape, sigma_prior='normal')
    fits = get_all_fits(data, fitter, allow_new_computation=False)
    # R2_threshold = 0.5 YYY problem - we might be using bad fits.

    regions = ['OFC', 'M1C', 'S1C', 'IPC', 'V1C']

    scores = []
    for g in data.gene_names:
        r, pval, lst_R2 = get_gene_correlation(fits, g, regions)
        scores.append((g, r, pval, lst_R2))

    fig = plot_correlation_histogram(scores, pathway)
    save_figure(fig,
                '{}/gradual-maturation-hist.png'.format(pathway, pathway),
                under_results=True,
                b_close=True)

    for fR2 in [np.mean]:  #[min,max,np.mean]:
        fig = plot_scatter(scores, pathway, fR2)
        save_figure(fig,
                    '{}/gradual-maturation-scatter-{}.png'.format(
                        pathway, fR2.__name__),
                    under_results=True,
                    b_close=True)

    create_top_correlations_html(data, fitter, fits, scores, regions)
    nScores, yScores = zip(*score_pairs)
    
    _, pval = scipy.stats.wilcoxon(nScores, yScores)
    pval = pval/2  # one sided p-value
    print '*** wilcoxon signed rank p-value (one sided) = {:.3g}'.format(pval)
    
    # find examples of best improvements
    diffs = [(f2.LOO_score-f1.LOO_score, f1.LOO_score, f2.LOO_score, g, r) for dsname,g,r,f1,f2 in iterate_fits(nFits, yFits, R2_threshold=-1, return_keys=True)]
    diffs.sort(reverse=True)
    print 'Gene/Regions for which priors produce best R2 improvement:'
    for i,(delta,R2_without, R2_with, g,r) in enumerate(diffs[:10]):
        print '{i}) {g}@{r}, delta-R2={delta:.3g}. R2_without={R2_without:.3g}, R2_with={R2_with:.3g}'.format(**locals())


cfg.verbosity = 1
age_scaler = LogScaler()
pathway = '17full'
data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler)
priors_name = 'sigslope80'

analyze_paired_scores_with_and_without_priors()

variations = [analyze_variant(t,s) for t,s in product([False,True],[False,True])]
fig = plot_bar(variations)
save_figure(fig,'RP/prior-variations-bar.png', under_results=True)
fig = plot_pctiles(variations, min_q=5)
save_figure(fig,'RP/prior-variations-percentiles.png', under_results=True)
#fig = plot_theta_diff_scatter()
#save_figure(fig,'RP/prior-variations-scatter.png', under_results=True)

Example #28
0
shape = Sigmoid(priors='sigmoid_wide')
fitter = Fitter(shape, sigma_prior='normal')
fits = get_all_fits(data, fitter)

R2_threshold = 0.5
for b_unique in [False, True]:
    dct_pathways = load_17_pathways_breakdown(b_unique)
    dct_pathways['17 pathways'] = None
    for name, genes in dct_pathways.iteritems():
        fig = plot_onset_times(all_data, data, fitter, fits, {name: genes},
                               R2_threshold, b_unique)
        str_dir = 'unique' if b_unique else 'overlapping'
        str_unique = ' (unique)' if b_unique else ''
        filename = 'RP/{}/change-distributions-{}{}.png'.format(
            str_dir, name, str_unique)
        save_figure(fig, filename, under_results=True)

    # selected plots
    lst_pathways = [
        '17 pathways', 'Amphetamine addiction', 'Cholinergic synapse',
        'Cocaine addiction', 'Glutamatergic synapse'
    ]
    dct_pathways = {k: dct_pathways[k] for k in lst_pathways}
    fig = plot_onset_times(all_data, data, fitter, fits, dct_pathways,
                           R2_threshold, b_unique)
    str_dir = 'unique' if b_unique else 'overlapping'
    str_unique = ' (unique)' if b_unique else ''
    filename = 'RP/{}/selected-change-distributions{}.png'.format(
        str_dir, str_unique)
    save_figure(fig, filename, under_results=True)
Example #29
0
shapes = [sigmoid, sigslope]
fitters = [Fitter(shape, sigma_prior='normal') for shape in shapes]

#####################################################
# Example fits
#####################################################
GRs = [
    ('ABHD4','STC', (5, 8)), 
]
for g,r,yrange in GRs:
    for fitter in fitters:
        print 'Doing {}@{}...'.format(g,r)
        series = data.get_one_series(g,r)
        theta,_,_,_ = fitter.fit(series.ages, series.single_expression)
        fig = plot_one_series(series, fitter.shape, theta, yrange)
        save_figure(fig,'RP/fit-examples-{}-{}-{}.png'.format(fitter.shape.cache_name(), g,r), under_results=True)


#####################################################
# Comparison for whole pathway
#####################################################
pathway = '17full'
data = data.restrict_pathway(pathway)
fits = [get_all_fits(data,fitter,allow_new_computation=False) for fitter in fitters]


fig = plot_comparison_bar(data, shapes, fits)
save_figure(fig,'RP/sigslope-comparison-bar-{}.png'.format(data.pathway), under_results=True)

fig = plot_comparison_over_R2_score(data, shapes, fits)
save_figure(fig,'RP/sigslope-comparison-vs-R2-{}.png'.format(data.pathway), under_results=True)
Example #30
0
if __name__ == '__main__':
    disable_all_warnings()
    parser = get_common_parser()
    parser.add_argument('--shape2', required=True, help='The shape to compare against', choices=allowed_shape_names())
    parser.add_argument('--scaling2', help='The scaling used when fitting shape2. Default: none', choices=allowed_scaler_names())
    parser.add_argument('--sigma_prior2', help='Prior to use for 1/sigma when fitting shape2. Default: None', choices=get_allowed_priors(is_sigma=True))
    parser.add_argument('--priors2', help='The priors used for theta when fitting shape2. Default: None', choices=get_allowed_priors())
    parser.add_argument('--filename', help='Where to save the figure. Default: results/comparison.png')
    parser.add_argument('--show', help='Show figure and wait before exiting', action='store_true')
    parser.add_argument('--ndiffs', type=int, default=5, help='Number of top diffs to show. Default=5.')
    args = parser.parse_args()
    data1, fitter1 = process_common_inputs(args)    
    data2 = get_data_from_args(args.dataset, args.pathway, args.from_age, args.scaling2, args.shuffle)
    fitter2 = get_fitter_from_args(args.shape2, args.priors2, args.sigma_prior2)

    fits1 = get_all_fits(data1,fitter1)
    fits2 = get_all_fits(data2,fitter2)

    print_diff_points(data1,fitter1,fits1, data2,fitter2,fits2, args.ndiffs)

    fig = plot_comparison_scatter(data1,fitter1,fits1, data2,fitter2,fits2)

    filename = args.filename    
    if filename is None:
        ensure_dir(results_dir())
        filename = join(results_dir(), 'shape_comparison.png')
    save_figure(fig, filename)    

    if args.show:
        plt.show(block=True)
Example #31
0
def show_loo_score():
    print 'Drawing LOO prediction for all points and R2 score...'
    theta,_,test_preds,_ = fitter.fit(x,y,loo=True)
    fig = plot_one_series(series,shape,theta=None,yrange=yrange,test_preds=test_preds)
    save_figure(fig,'RP/methods-4-R2-score.png', under_results=True)
Example #32
0
    ticks = ax.get_yticks()
    ticks = np.array([ticks[0], ticks[-1]])
    ax.set_yticks(ticks)
    ax.set_yticklabels(['{:g}'.format(t) for t in ticks], fontsize=fontsize)
    
    return fig

cfg.verbosity = 1
age_scaler = LogScaler()

data = GeneData.load('both').scale_ages(age_scaler)

shapes = [Sigmoid('sigmoid_wide'), Poly(1,'poly1'), Poly(3,'poly3'), Spline()]
GRs = [
    ('ADRB1','A1C', (5, 8)), 
    ('GLRA2','STC', (5, 12)), 
    ('TUBA1A','V1C', (10, 14)),
]

for g,r,yrange in GRs:
    print 'Doing {}@{}...'.format(g,r)
    thetas = []
    for shape in shapes:
        series = data.get_one_series(g,r)
        sigma_prior = 'normal' if not isinstance(shape,Spline) else None
        fitter = Fitter(shape, sigma_prior=sigma_prior)
        theta,_,_,_ = fitter.fit(series.ages, series.single_expression)
        thetas.append(theta)
    fig = plot_one_series(series,shapes,thetas,yrange)
    save_figure(fig,'RP/fit-examples-{}-{}.png'.format(g,r), under_results=True)
Example #33
0
    print '*** wilcoxon signed rank p-value (one sided) = {:.3g}'.format(pval)

    # find examples of best improvements
    diffs = [(f2.LOO_score - f1.LOO_score, f1.LOO_score, f2.LOO_score, g, r)
             for dsname, g, r, f1, f2 in iterate_fits(
                 nFits, yFits, R2_threshold=-1, return_keys=True)]
    diffs.sort(reverse=True)
    print 'Gene/Regions for which priors produce best R2 improvement:'
    for i, (delta, R2_without, R2_with, g, r) in enumerate(diffs[:10]):
        print '{i}) {g}@{r}, delta-R2={delta:.3g}. R2_without={R2_without:.3g}, R2_with={R2_with:.3g}'.format(
            **locals())


cfg.verbosity = 1
age_scaler = LogScaler()
pathway = '17full'
data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler)
priors_name = 'sigslope80'

analyze_paired_scores_with_and_without_priors()

variations = [
    analyze_variant(t, s) for t, s in product([False, True], [False, True])
]
fig = plot_bar(variations)
save_figure(fig, 'RP/prior-variations-bar.png', under_results=True)
fig = plot_pctiles(variations, min_q=5)
save_figure(fig, 'RP/prior-variations-percentiles.png', under_results=True)
#fig = plot_theta_diff_scatter()
#save_figure(fig,'RP/prior-variations-scatter.png', under_results=True)