def analyze_paired_scores_with_and_without_priors(n_best=10):
    nFitter = Fitter(Sigslope())
    yFitter = Fitter(Sigslope(priors_name), 'normal')

    nFits = get_all_fits(data,nFitter,allow_new_computation=False)
    yFits = get_all_fits(data,yFitter,allow_new_computation=False)

    score_pairs = [(f1.LOO_score, f2.LOO_score) for f1,f2 in iterate_fits(nFits, yFits)]
    nScores, yScores = zip(*score_pairs)
    
    _, pval = scipy.stats.wilcoxon(nScores, yScores)
    pval = pval/2  # one sided p-value
    print '*** wilcoxon signed rank p-value (one sided) = {:.3g}'.format(pval)
    
    # find examples of best improvements
    diffs = [(f2.LOO_score-f1.LOO_score, f1.LOO_score, f2.LOO_score, g, r) for dsname,g,r,f1,f2 in iterate_fits(nFits, yFits, R2_threshold=-1, return_keys=True)]
    diffs.sort(reverse=True)
    print 'Gene/Regions for which priors produce best R2 improvement:'
    for i,(delta,R2_without, R2_with, g,r) in enumerate(diffs[:10]):
        print '{i}) {g}@{r}, delta-R2={delta:.3g}. R2_without={R2_without:.3g}, R2_with={R2_with:.3g}'.format(**locals())
Exemple #2
0
def do_fits(data, fitter, k_of_n, add_correlations, correlations_k_of_n):
    n_correlation_iterations = 4 if add_correlations else 0
    print """
==============================================================================================
==============================================================================================
==== Computing Fits with {}
==============================================================================================
==============================================================================================
""".format(fitter)
    fits = get_all_fits(data, fitter, k_of_n, n_correlation_iterations=n_correlation_iterations, correlations_k_of_n=correlations_k_of_n)    
    return fits
Exemple #3
0
def do_fits(data, fitter, k_of_n, add_correlations, correlations_k_of_n):
    n_correlation_iterations = 4 if add_correlations else 0
    print """
==============================================================================================
==============================================================================================
==== Computing Fits with {}
==============================================================================================
==============================================================================================
""".format(fitter)
    fits = get_all_fits(data, fitter, k_of_n, n_correlation_iterations=n_correlation_iterations, correlations_k_of_n=correlations_k_of_n)    
    return fits
def plot_theta_diff_scatter(show_title=False):
    yFitter = Fitter(Sigslope(priors_name),'normal')
    nFitter = Fitter(Sigslope())
    yFits = get_all_fits(data,yFitter)
    nFits = get_all_fits(data,nFitter)
    pairs = [(nFit.LOO_score,yFit.LOO_score) for nFit,yFit in iterate_fits(nFits,yFits)]
    diff_pairs = [(n,y-n) for n,y in pairs if n is not None and y is not None]
    n,d = zip(*diff_pairs)
    
    fig = plt.figure()
    ax = fig.add_axes([0.15,0.12,0.8,0.8])
    ax.scatter(n, d, alpha=0.5)
    xlims = ax.get_xlim()
    ax.plot(xlims,[0, 0],'k--')
    ax.set_xlim(*xlims)
    if show_title:
        ax.title(r'Improvement from prior on $\theta$ vs. baseline $R^2$', fontsize=fontsize)
    ax.set_xlabel(r'$R^2$(no priors)', fontsize=fontsize)
    ax.set_ylabel(r'$R^2$($\theta$) - $R^2$(no priors)', fontsize=fontsize) 
    ax.tick_params(axis='both', labelsize=fontsize)
    return fig
def analyze_variant(theta,sigma):
    theta_priors = priors_name if theta else None
    sigma_prior = 'normal' if sigma else None
    shape = Sigslope(theta_priors)
    fitter = Fitter(shape,sigma_prior)
    fits = get_all_fits(data,fitter,allow_new_computation=False)
    LOO_scores = [f.LOO_score for f in iterate_fits(fits) if f.LOO_score is not None]
    mu,sem = bootstrap(LOO_scores, np.mean)
    return Bunch(
        theta = theta,
        sigma = sigma,
        LOO_scores = LOO_scores,
        mu = mu,
        sem = sem,
    )
Exemple #6
0
def plot_theta_diff_scatter(show_title=False):
    yFitter = Fitter(Sigslope(priors_name), 'normal')
    nFitter = Fitter(Sigslope())
    yFits = get_all_fits(data, yFitter)
    nFits = get_all_fits(data, nFitter)
    pairs = [(nFit.LOO_score, yFit.LOO_score)
             for nFit, yFit in iterate_fits(nFits, yFits)]
    diff_pairs = [(n, y - n) for n, y in pairs
                  if n is not None and y is not None]
    n, d = zip(*diff_pairs)

    fig = plt.figure()
    ax = fig.add_axes([0.15, 0.12, 0.8, 0.8])
    ax.scatter(n, d, alpha=0.5)
    xlims = ax.get_xlim()
    ax.plot(xlims, [0, 0], 'k--')
    ax.set_xlim(*xlims)
    if show_title:
        ax.title(r'Improvement from prior on $\theta$ vs. baseline $R^2$',
                 fontsize=fontsize)
    ax.set_xlabel(r'$R^2$(no priors)', fontsize=fontsize)
    ax.set_ylabel(r'$R^2$($\theta$) - $R^2$(no priors)', fontsize=fontsize)
    ax.tick_params(axis='both', labelsize=fontsize)
    return fig
Exemple #7
0
def analyze_paired_scores_with_and_without_priors(n_best=10):
    nFitter = Fitter(Sigslope())
    yFitter = Fitter(Sigslope(priors_name), 'normal')

    nFits = get_all_fits(data, nFitter, allow_new_computation=False)
    yFits = get_all_fits(data, yFitter, allow_new_computation=False)

    score_pairs = [(f1.LOO_score, f2.LOO_score)
                   for f1, f2 in iterate_fits(nFits, yFits)]
    nScores, yScores = zip(*score_pairs)

    _, pval = scipy.stats.wilcoxon(nScores, yScores)
    pval = pval / 2  # one sided p-value
    print '*** wilcoxon signed rank p-value (one sided) = {:.3g}'.format(pval)

    # find examples of best improvements
    diffs = [(f2.LOO_score - f1.LOO_score, f1.LOO_score, f2.LOO_score, g, r)
             for dsname, g, r, f1, f2 in iterate_fits(
                 nFits, yFits, R2_threshold=-1, return_keys=True)]
    diffs.sort(reverse=True)
    print 'Gene/Regions for which priors produce best R2 improvement:'
    for i, (delta, R2_without, R2_with, g, r) in enumerate(diffs[:10]):
        print '{i}) {g}@{r}, delta-R2={delta:.3g}. R2_without={R2_without:.3g}, R2_with={R2_with:.3g}'.format(
            **locals())
Exemple #8
0
def analyze_variant(theta, sigma):
    theta_priors = priors_name if theta else None
    sigma_prior = 'normal' if sigma else None
    shape = Sigslope(theta_priors)
    fitter = Fitter(shape, sigma_prior)
    fits = get_all_fits(data, fitter, allow_new_computation=False)
    LOO_scores = [
        f.LOO_score for f in iterate_fits(fits) if f.LOO_score is not None
    ]
    mu, sem = bootstrap(LOO_scores, np.mean)
    return Bunch(
        theta=theta,
        sigma=sigma,
        LOO_scores=LOO_scores,
        mu=mu,
        sem=sem,
    )
Exemple #9
0
def save_fits_and_create_html(data, fitter, fits=None, basedir=None, 
                              do_genes=True, do_series=True, do_hist=True, do_html=True, only_main_html=False,
                              k_of_n=None, 
                              use_correlations=False, correlations=None,
                              show_change_distributions=False,
                              html_kw=None,
                              figure_kw=None):
    if fits is None:
        fits = get_all_fits(data,fitter,k_of_n)
    if basedir is None:
        basedir = join(results_dir(), fit_results_relative_path(data,fitter))
        if use_correlations:
            basedir = join(basedir,'with-correlations')
    if html_kw is None:
        html_kw = {}
    if figure_kw is None:
        figure_kw = {}
    print 'Writing HTML under {}'.format(basedir)
    ensure_dir(basedir)
    gene_dir = 'gene-subplot'
    series_dir = 'gene-region-fits'
    correlations_dir = 'gene-correlations'
    scores_dir = 'score_distributions'
    if do_genes and not only_main_html: # relies on the sharding of the fits respecting gene boundaries
        plot_and_save_all_genes(data, fitter, fits, join(basedir,gene_dir), show_change_distributions)
    if do_series and not only_main_html:
        plot_and_save_all_series(data, fitter, fits, join(basedir,series_dir), use_correlations, show_change_distributions, figure_kw)
    if do_hist and k_of_n is None and not only_main_html:
        create_score_distribution_html(fits, use_correlations, join(basedir,scores_dir))
    if do_html and k_of_n is None:
        link_to_correlation_plots = use_correlations and correlations is not None
        if link_to_correlation_plots and not only_main_html:
            plot_and_save_all_gene_correlations(data, correlations, join(basedir,correlations_dir))
        dct_pathways = load_17_pathways_breakdown()
        pathway_genes = set.union(*dct_pathways.values())
        data_genes = set(data.gene_names)
        missing = pathway_genes - data_genes
        b_pathways = len(missing) < len(pathway_genes)/2 # simple heuristic to create pathways only if we have most of the genes (currently 61 genes are missing)
        create_html(
            data, fitter, fits, basedir, gene_dir, series_dir, scores_dir, correlations_dir=correlations_dir,
            use_correlations=use_correlations, link_to_correlation_plots=link_to_correlation_plots, 
            b_pathways=b_pathways, **html_kw
        )
Exemple #10
0
def get_onset_times(data, fitter, R2_threshold, b_force=False):
    filename = join(cache_dir(),fit_results_relative_path(data,fitter) + '.pkl')
    if isfile(filename):
        print 'Loading onset distribution from {}'.format(filename)
        with open(filename) as f:
            bin_edges, change_vals = pickle.load(f)
    else:
        print 'Computing...'
        fits = get_all_fits(data, fitter)        
        thetas = [fit.theta for fit in iterate_fits(fits, R2_threshold=R2_threshold)]
        stages = [stage.scaled(age_scaler) for stage in dev_stages]
        low = min(stage.from_age for stage in stages)
        high = max(stage.to_age for stage in stages) 
        bin_edges, change_vals = compute_change_distribution(fitter.shape, thetas, low, high, n_bins=50)    

        print 'Saving result to {}'.format(filename)
        ensure_dir(dirname(filename))   
        with open(filename,'w') as f:
            pickle.dump((bin_edges,change_vals),f)
    return bin_edges, change_vals
        return mu,fit.LOO_score
    lst_mu_R2 = [get_onset_time(r) for r in regions]
    onset_times, lst_R2 = zip(*lst_mu_R2)
    r,pval = spearmanr(onset_times, range(len(regions)))
    return r,pval,lst_R2

lst_pathways = [
    'serotonin',
    'dopamine',
]

for pathway in lst_pathways:
    data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler)
    shape = Sigmoid(priors='sigmoid_wide')
    fitter = Fitter(shape, sigma_prior='normal')
    fits = get_all_fits(data, fitter, allow_new_computation=False)
    # R2_threshold = 0.5 YYY problem - we might be using bad fits.
    
    regions = ['OFC', 'M1C', 'S1C', 'IPC', 'V1C']
    
    scores = []
    for g in data.gene_names:
        r,pval,lst_R2 = get_gene_correlation(fits,g,regions)
        scores.append( (g,r,pval,lst_R2) )
    
    fig = plot_correlation_histogram(scores,pathway)
    save_figure(fig,'{}/gradual-maturation-hist.png'.format(pathway,pathway), under_results=True, b_close=True)
    
    for fR2 in [np.mean]: #[min,max,np.mean]:
        fig = plot_scatter(scores, pathway, fR2)
        save_figure(fig,'{}/gradual-maturation-scatter-{}.png'.format(pathway,fR2.__name__), under_results=True, b_close=True)
Exemple #12
0
    onset_times, lst_R2 = zip(*lst_mu_R2)
    r, pval = spearmanr(onset_times, range(len(regions)))
    return r, pval, lst_R2


lst_pathways = [
    'serotonin',
    'dopamine',
]

for pathway in lst_pathways:
    data = GeneData.load('both').restrict_pathway(pathway).scale_ages(
        age_scaler)
    shape = Sigmoid(priors='sigmoid_wide')
    fitter = Fitter(shape, sigma_prior='normal')
    fits = get_all_fits(data, fitter, allow_new_computation=False)
    # R2_threshold = 0.5 YYY problem - we might be using bad fits.

    regions = ['OFC', 'M1C', 'S1C', 'IPC', 'V1C']

    scores = []
    for g in data.gene_names:
        r, pval, lst_R2 = get_gene_correlation(fits, g, regions)
        scores.append((g, r, pval, lst_R2))

    fig = plot_correlation_histogram(scores, pathway)
    save_figure(fig,
                '{}/gradual-maturation-hist.png'.format(pathway, pathway),
                under_results=True,
                b_close=True)
Exemple #13
0
    ax.set_xlabel('z score', fontsize=fontsize)
    ax.set_ylabel('probability', fontsize=fontsize)
    ax.tick_params(axis='both', labelsize=fontsize)
    return fig


cfg.verbosity = 1
age_scaler = LogScaler()
pathway = '17full'
data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler)
data_shuffled = GeneData.load('both').restrict_pathway(pathway).scale_ages(
    age_scaler).shuffle()

shape = Sigmoid('sigmoid_wide')
fitter = Fitter(shape, sigma_prior='normal')
fits = get_all_fits(data, fitter, allow_new_computation=False)
fits_shuffled = get_all_fits(data_shuffled,
                             fitter,
                             allow_new_computation=False)
R2_pairs = [(fit.LOO_score, fit2.LOO_score)
            for fit, fit2 in iterate_fits(fits, fits_shuffled)]
R2 = np.array([r for r, r_shuffled in R2_pairs])
R2_shuffled = np.array([r_shuffled for r, r_shuffled in R2_pairs])

name = '{}-{}'.format(data.pathway, shape.cache_name())
fig = plot_score_distribution(R2, R2_shuffled)
save_figure(fig,
            'RP/R2-distribution-{}.png'.format(name),
            under_results=True,
            b_close=True)
Exemple #14
0
if __name__ == '__main__':
    disable_all_warnings()
    parser = get_common_parser()
    parser.add_argument('--shape2', required=True, help='The shape to compare against', choices=allowed_shape_names())
    parser.add_argument('--scaling2', help='The scaling used when fitting shape2. Default: none', choices=allowed_scaler_names())
    parser.add_argument('--sigma_prior2', help='Prior to use for 1/sigma when fitting shape2. Default: None', choices=get_allowed_priors(is_sigma=True))
    parser.add_argument('--priors2', help='The priors used for theta when fitting shape2. Default: None', choices=get_allowed_priors())
    parser.add_argument('--filename', help='Where to save the figure. Default: results/comparison.png')
    parser.add_argument('--show', help='Show figure and wait before exiting', action='store_true')
    parser.add_argument('--ndiffs', type=int, default=5, help='Number of top diffs to show. Default=5.')
    args = parser.parse_args()
    data1, fitter1 = process_common_inputs(args)    
    data2 = get_data_from_args(args.dataset, args.pathway, args.from_age, args.scaling2, args.shuffle)
    fitter2 = get_fitter_from_args(args.shape2, args.priors2, args.sigma_prior2)

    fits1 = get_all_fits(data1,fitter1)
    fits2 = get_all_fits(data2,fitter2)

    print_diff_points(data1,fitter1,fits1, data2,fitter2,fits2, args.ndiffs)

    fig = plot_comparison_scatter(data1,fitter1,fits1, data2,fitter2,fits2)

    filename = args.filename    
    if filename is None:
        ensure_dir(results_dir())
        filename = join(results_dir(), 'shape_comparison.png')
    save_figure(fig, filename)    

    if args.show:
        plt.show(block=True)
#####################################################
GRs = [
    ('ABHD4','STC', (5, 8)), 
]
for g,r,yrange in GRs:
    for fitter in fitters:
        print 'Doing {}@{}...'.format(g,r)
        series = data.get_one_series(g,r)
        theta,_,_,_ = fitter.fit(series.ages, series.single_expression)
        fig = plot_one_series(series, fitter.shape, theta, yrange)
        save_figure(fig,'RP/fit-examples-{}-{}-{}.png'.format(fitter.shape.cache_name(), g,r), under_results=True)


#####################################################
# Comparison for whole pathway
#####################################################
pathway = '17full'
data = data.restrict_pathway(pathway)
fits = [get_all_fits(data,fitter,allow_new_computation=False) for fitter in fitters]


fig = plot_comparison_bar(data, shapes, fits)
save_figure(fig,'RP/sigslope-comparison-bar-{}.png'.format(data.pathway), under_results=True)

fig = plot_comparison_over_R2_score(data, shapes, fits)
save_figure(fig,'RP/sigslope-comparison-vs-R2-{}.png'.format(data.pathway), under_results=True)

fig = plot_comparison_scatter(data,shapes[0],fits[0],shapes[1],fits[1])
save_figure(fig,'RP/scatter-{}-{}-{}.png'.format(shapes[0],shapes[1],pathway), under_results=True)

plt.close('all')
        save_figure(fig, filename, b_close=True, under_results=True)
        dct_tuples.update(dct_region_tuples)
        
    print_best_improvements(dct_tuples)
    
    tuples = dct_tuples.values()
    pairs = [(x[0],x[1]) for x in tuples]
    fig = plot_comparison_scatter(pairs,pathway)
    filename = join('RP','correlation-diff-scatter-{}.png'.format(pathway))
    save_figure(fig, filename, b_close=True, under_results=True)

    fig = plot_comparison_bar(tuples)
    filename = join('RP','correlation-diff-bar-{}.png'.format(pathway))
    save_figure(fig, filename, b_close=True, under_results=True)
    fig = plot_comparison_bar(tuples, several_levels=True)
    filename = join('RP','correlation-diff-bar-several-levels-{}.png'.format(pathway))
    save_figure(fig, filename, b_close=True, under_results=True)

disable_all_warnings()
cfg.verbosity = 1
age_scaler = LogScaler()
shape = Sigslope('sigslope80')
fitter = Fitter(shape, sigma_prior='normal')

pathways = ['cannabinoids', 'serotonin']
for pathway in pathways:
    data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler)
    fits = get_all_fits(data, fitter, n_correlation_iterations=4, allow_new_computation=False)
    analyze_pathway(pathway, data, fitter, fits)

def get_fits():
    data = GeneData.load('both').restrict_pathway('17pathways').scale_ages(age_scaler)
    shape = Sigmoid(priors='sigmoid_wide')
    fitter = Fitter(shape, sigma_prior='normal')
    fits = get_all_fits(data, fitter)
    return fits
Exemple #18
0
    tuples = dct_tuples.values()
    pairs = [(x[0], x[1]) for x in tuples]
    fig = plot_comparison_scatter(pairs, pathway)
    filename = join('RP', 'correlation-diff-scatter-{}.png'.format(pathway))
    save_figure(fig, filename, b_close=True, under_results=True)

    fig = plot_comparison_bar(tuples)
    filename = join('RP', 'correlation-diff-bar-{}.png'.format(pathway))
    save_figure(fig, filename, b_close=True, under_results=True)
    fig = plot_comparison_bar(tuples, several_levels=True)
    filename = join(
        'RP', 'correlation-diff-bar-several-levels-{}.png'.format(pathway))
    save_figure(fig, filename, b_close=True, under_results=True)


disable_all_warnings()
cfg.verbosity = 1
age_scaler = LogScaler()
shape = Sigslope('sigslope80')
fitter = Fitter(shape, sigma_prior='normal')

pathways = ['cannabinoids', 'serotonin']
for pathway in pathways:
    data = GeneData.load('both').restrict_pathway(pathway).scale_ages(
        age_scaler)
    fits = get_all_fits(data,
                        fitter,
                        n_correlation_iterations=4,
                        allow_new_computation=False)
    analyze_pathway(pathway, data, fitter, fits)
import setup
import config as cfg
from load_data import GeneData
from shapes.sigmoid import Sigmoid
from fitter import Fitter
from all_fits import get_all_fits, iterate_fits
from scalers import LogScaler

cfg.verbosity = 1
age_scaler = LogScaler()
pathway = 'serotonin'
data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler)
fitter = Fitter(Sigmoid(priors=None))
fits = get_all_fits(data, fitter)

extreme = [(g, r) for dsname, g, r, fit in iterate_fits(
    fits, R2_threshold=0.5, return_keys=True) if abs(fit.theta[0]) > 100]
Exemple #20
0
def save_fits_and_create_html(data,
                              fitter,
                              fits=None,
                              basedir=None,
                              do_genes=True,
                              do_series=True,
                              do_hist=True,
                              do_html=True,
                              only_main_html=False,
                              k_of_n=None,
                              use_correlations=False,
                              correlations=None,
                              show_change_distributions=False,
                              exons_layout=False,
                              html_kw=None,
                              figure_kw=None):
    if fits is None:
        fits = get_all_fits(data, fitter, k_of_n)
    if basedir is None:
        basedir = join(results_dir(), fit_results_relative_path(data, fitter))
        if use_correlations:
            basedir = join(basedir, 'with-correlations')
    if html_kw is None:
        html_kw = {}
    if figure_kw is None:
        figure_kw = {}
    print 'Writing HTML under {}'.format(basedir)
    ensure_dir(basedir)
    gene_dir = 'gene-subplot'
    exons_dir = 'exons_subplot_series' if cfg.exons_plots_from_series else 'exons_subplot'
    series_dir = 'gene-region-fits'
    correlations_dir = 'gene-correlations'
    scores_dir = 'score_distributions'
    if do_genes and not only_main_html:  # relies on the sharding of the fits respecting gene boundaries
        plot_and_save_all_genes(data, fitter, fits, join(basedir, gene_dir),
                                show_change_distributions)
    if do_series and not only_main_html:
        plot_and_save_all_series(data, fitter, fits, join(basedir, series_dir),
                                 use_correlations, show_change_distributions,
                                 exons_layout, figure_kw)
    if exons_layout and not only_main_html:
        if cfg.exons_plots_from_series:
            plot_and_save_all_exons_from_series(fits, join(basedir, exons_dir),
                                                join(basedir, series_dir))
        else:
            plot_and_save_all_exons(data, fitter, fits,
                                    join(basedir, exons_dir))
    if do_hist and k_of_n is None and not only_main_html:
        create_score_distribution_html(fits, use_correlations,
                                       join(basedir, scores_dir))
    if do_html and k_of_n is None:
        link_to_correlation_plots = use_correlations and correlations is not None
        if link_to_correlation_plots and not only_main_html:
            plot_and_save_all_gene_correlations(
                data, correlations, join(basedir, correlations_dir))
        dct_pathways = load_17_pathways_breakdown()
        pathway_genes = set.union(*dct_pathways.values())
        data_genes = set(data.gene_names)
        missing = pathway_genes - data_genes
        b_pathways = len(missing) < len(
            pathway_genes
        ) / 2  # simple heuristic to create pathways only if we have most of the genes (currently 61 genes are missing)
        create_html(data,
                    fitter,
                    fits,
                    basedir,
                    gene_dir,
                    exons_dir,
                    series_dir,
                    scores_dir,
                    correlations_dir=correlations_dir,
                    use_correlations=use_correlations,
                    link_to_correlation_plots=link_to_correlation_plots,
                    b_pathways=b_pathways,
                    exons_layout=exons_layout,
                    **html_kw)
import setup
import config as cfg
from load_data import GeneData
from shapes.sigmoid import Sigmoid
from fitter import Fitter
from all_fits import get_all_fits, iterate_fits
from scalers import LogScaler

cfg.verbosity = 1
age_scaler = LogScaler()
pathway = 'serotonin'
data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler)
fitter = Fitter(Sigmoid(priors=None))
fits = get_all_fits(data,fitter)

extreme = [(g,r) for dsname,g,r,fit in iterate_fits(fits, R2_threshold=0.5, return_keys=True) if abs(fit.theta[0]) > 100]
Exemple #22
0
    ax.bar(bin_edges[:-1], probs, width=width, color='b')

    ax.set_xlabel('z score', fontsize=fontsize)
    ax.set_ylabel('probability', fontsize=fontsize)   
    ax.tick_params(axis='both', labelsize=fontsize)
    return fig

cfg.verbosity = 1
age_scaler = LogScaler()
pathway = '17full'
data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler)
data_shuffled = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler).shuffle()

shape = Sigmoid('sigmoid_wide')
fitter = Fitter(shape,sigma_prior='normal')
fits = get_all_fits(data,fitter,allow_new_computation=False)
fits_shuffled = get_all_fits(data_shuffled,fitter,allow_new_computation=False)
R2_pairs = [(fit.LOO_score,fit2.LOO_score) for fit,fit2 in iterate_fits(fits,fits_shuffled)]
R2 = np.array([r for r,r_shuffled in R2_pairs])
R2_shuffled = np.array([r_shuffled for r,r_shuffled in R2_pairs])

name = '{}-{}'.format(data.pathway,shape.cache_name())
fig = plot_score_distribution(R2,R2_shuffled)
save_figure(fig,'RP/R2-distribution-{}.png'.format(name), under_results=True, b_close=True)

mu_shuffled = np.mean(R2_shuffled)
std_shuffled = np.std(R2_shuffled)
z_scores = (R2-mu_shuffled)/std_shuffled
fig = plot_z_scores(z_scores)
save_figure(fig,'RP/R2-z-scores-{}.png'.format(name), under_results=True, b_close=True)