Example #1
0
def get_change_distribution_for_whole_genome(all_data, fitter):
    # NOTE: the distribution for all genes should be precomputed by running onset_times_whole_genome.py
    filename = join(cache_dir(),fit_results_relative_path(all_data,fitter) + '.pkl')
    print 'Loading whole genome onset distribution from {}'.format(filename)
    with open(filename) as f:
        bin_edges, change_vals = pickle.load(f)
    return bin_edges, change_vals
Example #2
0
def get_change_distribution_for_whole_genome(all_data, fitter):
    # NOTE: the distribution for all genes should be precomputed by running onset_times_whole_genome.py
    filename = join(cache_dir(),
                    fit_results_relative_path(all_data, fitter) + '.pkl')
    print 'Loading whole genome onset distribution from {}'.format(filename)
    with open(filename) as f:
        bin_edges, change_vals = pickle.load(f)
    return bin_edges, change_vals
def export_timing_info_for_all_fits(data, fitter, fits):
    change_dist = compute_timing_info_for_all_fits(data, fitter, fits)
    README = """\
mu:
The mean age of the change distribution for given gene and region.
Dimensions: <n-genes> X <n-regions>

std:
The standard deviation of the change distribution for given gene and region.
Dimensions: <n-genes> X <n-regions>

genes: 
Gene names for the genes represented in other arrays

weights:
The change distributions for each gene and region.
Dimensions: <n-genes> X <n-regions> X <n-bins>

bin_centers:
The ages for the center of each bin used in calculating the histogram in "weights".
Dimensions: <n-bins> X 1

bin_edges:
The edges of the bins used in calculating the change histogram.
(centers can be calculated from the bin_edges, but it's convenient to have it pre-calculated)
Dimensions: <n-bins + 1> X 1

regions: 
Region names for the regions represented in other arrays

age_scaler: 
The scaling used for ages (i.e. 'log' means x' = log(x + 38/52))
"""
    mdict = dict(
        README_CHANGE_DISTRIBUTIONS=README,
        genes=list_of_strings_to_matlab_cell_array(change_dist.genes),
        regions=list_of_strings_to_matlab_cell_array(change_dist.regions),
        age_scaler=scalers.unify(change_dist.age_scaler).cache_name(),
        mu=change_dist.mu,
        std=change_dist.std,
        bin_edges=change_dist.bin_edges,
        bin_centers=change_dist.bin_centers,
        weights=change_dist.weights,
    )
    filename = join(
        cache_dir(),
        fit_results_relative_path(data, fitter) + '-change-dist.mat')
    save_matfile(mdict, filename)
Example #4
0
 def save_to_mat(self):
     filename = join(cache_dir(), 'both', 'dprime-all-pathways-and-regions-{}.mat'.format(self._filename_suffix))
     mdict = dict(
         pathway = list_of_strings_to_matlab_cell_array([x.pathway for x in self.res]),
         r1 = list_of_strings_to_matlab_cell_array([x.r1 for x in self.res]),
         r2 = list_of_strings_to_matlab_cell_array([x.r2 for x in self.res]),
         score = np.array([x.score for x in self.res]),
         delta = np.array([x.delta for x in self.res]),
         weighted_delta = np.array([x.weighted_delta for x in self.res]),
         mu1_years = np.array([x.mu1_years for x in self.res]),
         mu2_years = np.array([x.mu2_years for x in self.res]),
         pval = np.array([x.pval for x in self.res]),
         pathway_size = np.array([x.pathway_size for x in self.res]),
     )
     print 'Saving results to {}'.format(filename)
     savemat(filename, mdict, oned_as='column')
def export_timing_info_for_all_fits(data, fitter, fits):
    change_dist = compute_timing_info_for_all_fits(data, fitter, fits)
    README = """\
mu:
The mean age of the change distribution for given gene and region.
Dimensions: <n-genes> X <n-regions>

std:
The standard deviation of the change distribution for given gene and region.
Dimensions: <n-genes> X <n-regions>

genes: 
Gene names for the genes represented in other arrays

weights:
The change distributions for each gene and region.
Dimensions: <n-genes> X <n-regions> X <n-bins>

bin_centers:
The ages for the center of each bin used in calculating the histogram in "weights".
Dimensions: <n-bins> X 1

bin_edges:
The edges of the bins used in calculating the change histogram.
(centers can be calculated from the bin_edges, but it's convenient to have it pre-calculated)
Dimensions: <n-bins + 1> X 1

regions: 
Region names for the regions represented in other arrays

age_scaler: 
The scaling used for ages (i.e. 'log' means x' = log(x + 38/52))
"""
    mdict = dict(
        README_CHANGE_DISTRIBUTIONS = README,
        genes = list_of_strings_to_matlab_cell_array(change_dist.genes),
        regions = list_of_strings_to_matlab_cell_array(change_dist.regions),
        age_scaler = scalers.unify(change_dist.age_scaler).cache_name(),
        mu = change_dist.mu,
        std = change_dist.std,
        bin_edges = change_dist.bin_edges,
        bin_centers = change_dist.bin_centers,
        weights = change_dist.weights,
    )
    filename = join(cache_dir(), fit_results_relative_path(data,fitter) + '-change-dist.mat')
    save_matfile(mdict, filename)
Example #6
0
def get_onset_times(data, fitter, R2_threshold, b_force=False):
    filename = join(cache_dir(),fit_results_relative_path(data,fitter) + '.pkl')
    if isfile(filename):
        print 'Loading onset distribution from {}'.format(filename)
        with open(filename) as f:
            bin_edges, change_vals = pickle.load(f)
    else:
        print 'Computing...'
        fits = get_all_fits(data, fitter)        
        thetas = [fit.theta for fit in iterate_fits(fits, R2_threshold=R2_threshold)]
        stages = [stage.scaled(age_scaler) for stage in dev_stages]
        low = min(stage.from_age for stage in stages)
        high = max(stage.to_age for stage in stages) 
        bin_edges, change_vals = compute_change_distribution(fitter.shape, thetas, low, high, n_bins=50)    

        print 'Saving result to {}'.format(filename)
        ensure_dir(dirname(filename))   
        with open(filename,'w') as f:
            pickle.dump((bin_edges,change_vals),f)
    return bin_edges, change_vals
Example #7
0
 def save_to_mat(self):
     filename = join(
         cache_dir(), 'both',
         'dprime-all-pathways-and-regions-{}.mat'.format(
             self._filename_suffix))
     mdict = dict(
         pathway=list_of_strings_to_matlab_cell_array(
             [x.pathway for x in self.res]),
         r1=list_of_strings_to_matlab_cell_array([x.r1 for x in self.res]),
         r2=list_of_strings_to_matlab_cell_array([x.r2 for x in self.res]),
         score=np.array([x.score for x in self.res]),
         delta=np.array([x.delta for x in self.res]),
         weighted_delta=np.array([x.weighted_delta for x in self.res]),
         mu1_years=np.array([x.mu1_years for x in self.res]),
         mu2_years=np.array([x.mu2_years for x in self.res]),
         pval=np.array([x.pval for x in self.res]),
         pathway_size=np.array([x.pathway_size for x in self.res]),
     )
     print 'Saving results to {}'.format(filename)
     savemat(filename, mdict, oned_as='column')
Example #8
0
def save_theta_text_files(data, fitter, fits):
    assert fitter.shape.cache_name() == 'spline', "save to text is only supported for splines at the moment"
    for dataset in data.datasets:
        filename = join(cache_dir(), fit_results_relative_path(dataset,fitter) + '.txt')
        dataset_fits = fits[dataset.name]    
        print 'Saving text file to {}'.format(filename)
        with open(filename, 'w') as f:
            for (g,r),fit in dataset_fits.iteritems():
                if fit.theta is None:
                    continue
                knots, coeffs, degree = fit.theta[0]
                knots = list(knots)
                coeffs = list(coeffs)
                gr_text = """\
Gene symbol: {g}
Region: {r}
Spline knots: {knots}
Spline coefficients: {coeffs}
Spline degree: {degree}
""".format(**locals())
                print >>f, gr_text
Example #9
0
class SingleRegion(object):
    change_dist_filename = join(
        cache_dir(), 'both',
        'fits-log-all-sigslope-theta-sigslope80-sigma-normal-change-dist.pkl')

    def __init__(self, listname='all'):
        self.listname = listname
        self.pathways = pathway_lists.read_all_pathways(listname)

        self.change_dist = load_pickle(
            SingleRegion.change_dist_filename,
            'change distribution for all genes and regions')
        self.genes = self.change_dist.genes
        self.regions = self.change_dist.regions
        self.g2i = {g: i for i, g in enumerate(self.genes)}
        self.r2i = {r: i for i, r in enumerate(self.regions)}
        self.age_scaler = self.change_dist.age_scaler
        self.mu = self.change_dist.mu
        self.std = self.change_dist.std
        self.bin_edges = self.change_dist.bin_edges
        self.bin_centers = self.change_dist.bin_centers
        self.weights = self.change_dist.weights

    def region_timings_per_pathway(self):
        def mean_age(pathway_genes, r):
            pathway_ig = [self.g2i[g] for g in pathway_genes]
            ir = self.r2i[r]
            ages = self.mu[pathway_ig, ir]
            weights = 1 / self.std[pathway_ig, ir]
            age = np.dot(weights, ages) / sum(weights)
            return self.age_scaler.unscale(age)

        res = {}  # pathway -> { r -> mu }
        for pathway in self.pathways.iterkeys():
            pathway_genes = self.pathways[pathway]
            res[pathway] = {
                r: mean_age(pathway_genes, r)
                for r in self.regions
            }
        return res
Example #10
0
def save_theta_text_files(data, fitter, fits):
    assert fitter.shape.cache_name(
    ) == 'spline', "save to text is only supported for splines at the moment"
    for dataset in data.datasets:
        filename = join(cache_dir(),
                        fit_results_relative_path(dataset, fitter) + '.txt')
        dataset_fits = fits[dataset.name]
        print 'Saving text file to {}'.format(filename)
        with open(filename, 'w') as f:
            for (g, r), fit in dataset_fits.iteritems():
                if fit.theta is None:
                    continue
                knots, coeffs, degree = fit.theta[0]
                knots = list(knots)
                coeffs = list(coeffs)
                gr_text = """\
Gene symbol: {g}
Region: {r}
Spline knots: {knots}
Spline coefficients: {coeffs}
Spline degree: {degree}
""".format(**locals())
                print >> f, gr_text
Example #11
0
def _cache_filename(base_filename, k_of_n):
    filename = join(cache_dir(), base_filename + '.pkl')
    if k_of_n is not None:
        k, n = k_of_n
        filename = '{}.{}-of-{}'.format(filename, k, n)
    return filename
Example #12
0
def _batch_dir(base_filename):
    return join(cache_dir(), base_filename + '-batches')
Example #13
0
import setup
from os.path import join
import project_dirs
from all_fits import Bunch, convert_format


def f_convert(fit):
    "added fitter and shape params"
    return Bunch(
        fitter=fit.fitter,
        seed=fit.seed,
        theta=fit.theta,
        sigma=fit.sigma,
        fit_predictions=fit.fit_predictions,
        LOO_predictions=fit.LOO_predictions,
    )


filename = join(project_dirs.cache_dir(), 'kang2011',
                'fits-serotonin-poly1-t0-s0.pkl')
convert_format(filename, f_convert)
Example #14
0
import setup
from os.path import join
import project_dirs
from all_fits import Bunch, convert_format

def f_convert(fit):
    "added fitter and shape params"
    return Bunch(
        fitter = fit.fitter,
        seed = fit.seed,
        theta = fit.theta,
        sigma = fit.sigma,
        fit_predictions = fit.fit_predictions,
        LOO_predictions = fit.LOO_predictions,
    )

filename = join(project_dirs.cache_dir(), 'kang2011', 'fits-serotonin-poly1-t0-s0.pkl')
convert_format(filename, f_convert)
Example #15
0
class RegionPairTiming(object):
    cube_filename = join(
        cache_dir(), 'both',
        'fits-log-all-sigslope-theta-sigslope80-sigma-normal-dprime-cube.pkl')

    def __init__(self, listname='all'):
        self.listname = listname
        self.single = SingleRegion(listname)
        self.pathways = self.single.pathways
        self.genes = self.single.genes
        self.regions = self.single.regions
        self.g2i = self.single.g2i
        self.r2i = self.single.r2i
        self.age_scaler = self.single.age_scaler
        self.mu = self.single.mu
        self.single_std = self.single.std

        cube = load_pickle(
            RegionPairTiming.cube_filename,
            name='timing d-prime info for all genes and region pairs')
        self.d_mu = cube.d_mu
        self.pair_std = cube.std
        self.scores = self.d_mu / self.pair_std

        self.baseline = self.baseline_distribution_all_pairs(100, 10000)

    @cache(lambda self: join(
        cache_dir(), 'both', 'dprime-all-pathways-and-regions-{}.pkl'.format(
            self.listname)))
    def analyze_all_pathways(self):
        res = {}  # (pathway,r1,r2) -> timing results
        for pathway in self.pathways.iterkeys():
            print 'Analyzing region pairs for pathway {}'.format(pathway)
            pathway_genes = self.pathways[pathway]
            for r1 in self.regions:
                for r2 in self.regions:
                    if r2 <= r1:  # keep only results "above the diagonal" (r1 < r2 lexicographically)
                        continue
                    pathway_res = self.analyze_pathway_and_region_pair(
                        pathway_genes, r1, r2)
                    res[(pathway, r1, r2)] = pathway_res
        return TimingResults.fromResultsDct(res, self.listname, self.pathways)

    def analyze_pathway_and_region_pair(self, pathway_genes, r1, r2):
        ir1, ir2 = self.r2i[r1], self.r2i[r2]
        pathway_ig = [self.g2i[g] for g in pathway_genes]

        all_pathway_scores = self.scores[pathway_ig, ir1, ir2]
        score = nanmean(all_pathway_scores)
        mu, sigma = self.baseline[(r1, r2)]
        sigma = sigma / np.sqrt(len(pathway_ig))
        z = (score - mu) / sigma
        pval = z_score_to_p_value(z)

        pathway_d_mu = self.d_mu[pathway_ig, ir1, ir2]
        pathway_pair_std = self.pair_std[pathway_ig, ir1, ir2]
        weights = 1 / pathway_pair_std
        valid = ~np.isnan(
            pathway_d_mu
        )  # needed for the PFC region from colantuoni which doesn't contain all genes\
        weights, pathway_d_mu = weights[valid], pathway_d_mu[valid]
        weighted_delta = np.dot(weights, pathway_d_mu) / sum(weights)
        delta = np.mean(pathway_d_mu)
        too_many_nans = False
        if not valid.all():
            assert r1 == 'PFC' or r2 == 'PFC', "r1={}, r2={}".format(r1, r2)
            n_genes = len(valid)
            n_non_valid = n_genes - np.count_nonzero(valid)
            if float(n_non_valid) / n_genes > 0.05:
                too_many_nans = True

        def mean_age(ir):
            if too_many_nans:
                return np.NaN
            ages = self.mu[pathway_ig, ir]
            weights = 1 / self.single_std[pathway_ig, ir]
            valid = ~np.isnan(weights)
            weights, ages = weights[valid], ages[valid]
            age = np.dot(weights, ages) / sum(weights)
            return self.age_scaler.unscale(age)

        return Bunch(
            score=score if not too_many_nans else np.nan,
            delta=delta if not too_many_nans else np.nan,
            weighted_delta=weighted_delta if not too_many_nans else np.nan,
            mu1_years=mean_age(ir1),
            mu2_years=mean_age(ir2),
            pval=pval if not too_many_nans else np.nan,
            pathway_size=len(pathway_genes),
        )

    @cache(filename=join(cache_dir(), 'both', 'dprime-baseline.pkl'))
    def baseline_distribution_all_pairs(self, sample_size, n_samples):
        res = {}
        for r1 in self.regions:
            print 'Sampling baseline distribution of {} vs. all other regions'.format(
                r1)
            for r2 in self.regions:
                if (r2, r1) in res:
                    mu, sigma = res[(r2, r1)]
                    res[(r1, r2)] = -mu, sigma
                else:
                    res[(r1, r2)] = self.baseline_distribution_one_pair(
                        r1, r2, sample_size, n_samples)
        return res

    def baseline_distribution_one_pair(self, r1, r2, sample_size, n_samples):
        ir1, ir2 = self.r2i[r1], self.r2i[r2]
        pair_scores = self.scores[:, ir1, ir2]
        x = np.empty(n_samples)
        for i in xrange(n_samples):
            inds = np.random.random_integers(0,
                                             len(pair_scores) - 1, sample_size)
            x[i] = nanmean(pair_scores[inds])
        mu = x.mean()
        sigma = x.std() * np.sqrt(sample_size)
        return mu, sigma
Example #16
0
def save_as_mat_files(data, fitter, fits, has_change_distributions):
    for dataset in data.datasets:
        filename = join(cache_dir(), fit_results_relative_path(dataset,fitter) + '.mat')
        dataset_fits = fits[dataset.name]
    
        print 'Saving mat file to {}'.format(filename)
        shape = fitter.shape
        
        gene_names = dataset.gene_names
        gene_idx = {g:i for i,g in enumerate(gene_names)}
        n_genes = len(gene_names)
        region_names = dataset.region_names
        region_idx = {r:i for i,r in enumerate(region_names)}
        n_regions = len(region_names)
        
        write_theta = shape.can_export_params_to_matlab()
        if write_theta:
            theta = init_array(np.NaN, shape.n_params(), n_genes,n_regions)
        else:
            theta = np.NaN
        
        fit_scores = init_array(np.NaN, n_genes,n_regions)
        LOO_scores = init_array(np.NaN, n_genes,n_regions)
        fit_predictions = init_array(np.NaN, *dataset.expression.shape)
        LOO_predictions = init_array(np.NaN, *dataset.expression.shape)
        high_res_predictions = init_array(np.NaN, cfg.n_curve_points_to_plot, n_genes, n_regions)
        scaled_high_res_ages = np.linspace(dataset.ages.min(), dataset.ages.max(), cfg.n_curve_points_to_plot)
        original_high_res_ages = scalers.unify(dataset.age_scaler).unscale(scaled_high_res_ages)
        if has_change_distributions:
            change_distribution_bin_centers = fits.change_distribution_params.bin_centers
            n_bins = len(change_distribution_bin_centers)
            change_distribution_weights = init_array(np.NaN, n_bins, n_genes, n_regions)
        else:
            change_distribution_bin_centers = []
            change_distribution_weights = []
        for (g,r),fit in dataset_fits.iteritems():
            series = dataset.get_one_series(g,r)
            ig = gene_idx[g]
            ir = region_idx[r]
            fit_scores[ig,ir] = fit.fit_score
            LOO_scores[ig,ir] = fit.LOO_score
            if write_theta and fit.theta is not None:
                theta[:,ig,ir] = fit.theta
            if fit.fit_predictions is not None:
                fit_predictions[series.original_inds,ig,ir] = fit.fit_predictions
            if fit.LOO_predictions is not None:
                LOO_predictions[series.original_inds,ig,ir] = fit.LOO_predictions
            if fit.theta is not None:
                high_res_predictions[:,ig,ir] = shape.f(fit.theta, scaled_high_res_ages)
            change_weights = getattr(fit,'change_distribution_weights',None)
            if change_weights is not None:
                change_distribution_weights[:,ig,ir] = change_weights
        mdict = dict(
            gene_names = list_of_strings_to_matlab_cell_array(gene_names),
            region_names = list_of_strings_to_matlab_cell_array(region_names),
            theta = theta,
            fit_scores = fit_scores,
            LOO_scores = LOO_scores,
            fit_predictions = fit_predictions,
            LOO_predictions = LOO_predictions,
            high_res_predictions = high_res_predictions,
            high_res_ages = original_high_res_ages,
            change_distribution_bin_centers = change_distribution_bin_centers,
            change_distribution_weights = change_distribution_weights,
        )
        savemat(filename, mdict, oned_as='column')
Example #17
0
def _batch_dir(base_filename):
    return join(cache_dir(),base_filename + '-batches')
Example #18
0
def _cache_filename(base_filename, k_of_n):
    filename = join(cache_dir(), base_filename + '.pkl')
    if k_of_n is not None:
        k,n = k_of_n
        filename = '{}.{}-of-{}'.format(filename,k,n)
    return filename
    for dsname,g,r,fit in iterate_fits(fits, return_keys=True):
        weights = calc_bootstrap_change_distribution(shape, fit.theta_samples, bin_edges)
        fit.change_distribution_weights = weights
        fit.change_distribution_spread = change_distribution_spread_cumsum(bin_centers, weights)
        fit.change_distribution_mean_std = change_distribution_mean_and_std(bin_centers, weights)

def calc_bootstrap_change_distribution(shape, theta_samples, bin_edges):
    bin_centers = bin_edges_to_centers(bin_edges)
    n_params, n_samples = theta_samples.shape
    weights = np.zeros(bin_centers.shape)
    for i in xrange(n_samples):
        weights += calc_change_distribution(shape, theta_samples[:,i], bin_edges)
    weights /= n_samples # now values are in fraction of total change (doesn't have to sum up to 1 if ages don't cover the whole transition range)
    return weights

@cache(lambda data, fitter, fits: join(cache_dir(), fit_results_relative_path(data,fitter) + '-dprime-cube.pkl'))
def compute_dprime_measures_for_all_pairs(data, fitter, fits):
    genes = data.gene_names
    regions = data.region_names 
    r2ds = data.region_to_dataset()        
    cube_shape = (len(genes), len(regions), len(regions))
    d_mu = np.empty(cube_shape) # mu2-mu1 for all genes and region pairs
    std = np.empty(cube_shape) # std (combined) for all genes and region pairs
    def get_mu_std(g,r):
        dsfits = fits[r2ds[r]]
        fit = dsfits.get((g,r))
        if fit is None:
            return np.nan, np.nan
        else:
            return fit.change_distribution_mean_std
    for ig,g in enumerate(genes):
Example #20
0
def save_as_mat_files(data, fitter, fits, has_change_distributions):
    for dataset in data.datasets:
        filename = join(cache_dir(),
                        fit_results_relative_path(dataset, fitter) + '.mat')
        dataset_fits = fits[dataset.name]

        print 'Saving mat file to {}'.format(filename)
        shape = fitter.shape

        gene_names = dataset.gene_names
        gene_idx = {g: i for i, g in enumerate(gene_names)}
        n_genes = len(gene_names)
        region_names = dataset.region_names
        region_idx = {r: i for i, r in enumerate(region_names)}
        n_regions = len(region_names)

        write_theta = shape.can_export_params_to_matlab()
        if write_theta:
            theta = init_array(np.NaN, shape.n_params(), n_genes, n_regions)
        else:
            theta = np.NaN

        fit_scores = init_array(np.NaN, n_genes, n_regions)
        LOO_scores = init_array(np.NaN, n_genes, n_regions)
        fit_predictions = init_array(np.NaN, *dataset.expression.shape)
        LOO_predictions = init_array(np.NaN, *dataset.expression.shape)
        high_res_predictions = init_array(np.NaN, cfg.n_curve_points_to_plot,
                                          n_genes, n_regions)
        scaled_high_res_ages = np.linspace(dataset.ages.min(),
                                           dataset.ages.max(),
                                           cfg.n_curve_points_to_plot)
        original_high_res_ages = scalers.unify(
            dataset.age_scaler).unscale(scaled_high_res_ages)
        if has_change_distributions:
            change_distribution_bin_centers = fits.change_distribution_params.bin_centers
            n_bins = len(change_distribution_bin_centers)
            change_distribution_weights = init_array(np.NaN, n_bins, n_genes,
                                                     n_regions)
        else:
            change_distribution_bin_centers = []
            change_distribution_weights = []
        for (g, r), fit in dataset_fits.iteritems():
            series = dataset.get_one_series(g, r)
            ig = gene_idx[g]
            ir = region_idx[r]
            fit_scores[ig, ir] = fit.fit_score
            LOO_scores[ig, ir] = fit.LOO_score
            if write_theta and fit.theta is not None:
                theta[:, ig, ir] = fit.theta
            if fit.fit_predictions is not None:
                fit_predictions[series.original_inds, ig,
                                ir] = fit.fit_predictions
            if fit.LOO_predictions is not None:
                LOO_predictions[series.original_inds, ig,
                                ir] = fit.LOO_predictions
            if fit.theta is not None:
                high_res_predictions[:, ig,
                                     ir] = shape.f(fit.theta,
                                                   scaled_high_res_ages)
            change_weights = getattr(fit, 'change_distribution_weights', None)
            if change_weights is not None:
                change_distribution_weights[:, ig, ir] = change_weights
        mdict = dict(
            gene_names=list_of_strings_to_matlab_cell_array(gene_names),
            region_names=list_of_strings_to_matlab_cell_array(region_names),
            theta=theta,
            fit_scores=fit_scores,
            LOO_scores=LOO_scores,
            fit_predictions=fit_predictions,
            LOO_predictions=LOO_predictions,
            high_res_predictions=high_res_predictions,
            high_res_ages=original_high_res_ages,
            change_distribution_bin_centers=change_distribution_bin_centers,
            change_distribution_weights=change_distribution_weights,
        )
        savemat(filename, mdict, oned_as='column')
            bin_centers, weights)


def calc_bootstrap_change_distribution(shape, theta_samples, bin_edges):
    bin_centers = bin_edges_to_centers(bin_edges)
    n_params, n_samples = theta_samples.shape
    weights = np.zeros(bin_centers.shape)
    for i in xrange(n_samples):
        weights += calc_change_distribution(shape, theta_samples[:, i],
                                            bin_edges)
    weights /= n_samples  # now values are in fraction of total change (doesn't have to sum up to 1 if ages don't cover the whole transition range)
    return weights


@cache(lambda data, fitter, fits: join(
    cache_dir(),
    fit_results_relative_path(data, fitter) + '-dprime-cube.pkl'))
def compute_dprime_measures_for_all_pairs(data, fitter, fits):
    genes = data.gene_names
    regions = data.region_names
    r2ds = data.region_to_dataset()
    cube_shape = (len(genes), len(regions), len(regions))
    d_mu = np.empty(cube_shape)  # mu2-mu1 for all genes and region pairs
    std = np.empty(cube_shape)  # std (combined) for all genes and region pairs

    def get_mu_std(g, r):
        dsfits = fits[r2ds[r]]
        fit = dsfits.get((g, r))
        if fit is None:
            return np.nan, np.nan
        else: