def compute_timing_info_for_all_fits(data, fitter, fits):
    genes = data.gene_names
    regions = data.region_names 
    r2ds = data.region_to_dataset()        
    bin_edges = fits.change_distribution_params.bin_edges
    bin_centers = fits.change_distribution_params.bin_centers

    mu = init_array(np.NaN, len(genes), len(regions))
    std = init_array(np.NaN, len(genes), len(regions))
    weights = init_array(np.NaN, len(genes), len(regions), len(bin_centers))
    for ig,g in enumerate(genes):
        for ir,r in enumerate(regions):
            dsfits = fits[r2ds[r]]
            fit = dsfits.get((g,r))
            if fit is None:
                continue
            mu[ig,ir], std[ig,ir] = fit.change_distribution_mean_std
            weights[ig,ir,:] = fit.change_distribution_weights
            
    return Bunch(
        bin_edges = bin_edges,
        bin_centers = bin_centers,
        weights = weights,
        mu=mu, 
        std=std, 
        genes=genes, 
        regions=regions, 
        age_scaler=data.age_scaler,
    )
def compute_timing_info_for_all_fits(data, fitter, fits):
    genes = data.gene_names
    regions = data.region_names
    r2ds = data.region_to_dataset()
    bin_edges = fits.change_distribution_params.bin_edges
    bin_centers = fits.change_distribution_params.bin_centers

    mu = init_array(np.NaN, len(genes), len(regions))
    std = init_array(np.NaN, len(genes), len(regions))
    weights = init_array(np.NaN, len(genes), len(regions), len(bin_centers))
    for ig, g in enumerate(genes):
        for ir, r in enumerate(regions):
            dsfits = fits[r2ds[r]]
            fit = dsfits.get((g, r))
            if fit is None:
                continue
            mu[ig, ir], std[ig, ir] = fit.change_distribution_mean_std
            weights[ig, ir, :] = fit.change_distribution_weights

    return Bunch(
        bin_edges=bin_edges,
        bin_centers=bin_centers,
        weights=weights,
        mu=mu,
        std=std,
        genes=genes,
        regions=regions,
        age_scaler=data.age_scaler,
    )
Example #3
0
def _add_dataset_correlation_fits_from_results_dictionary(dataset, ds_fits, dct_results):
    """This function converts the results of the job_splitting which is a flat dictionary to structures which 
       are easier to use and integrated into the dataset fits
    """
    region_to_ix_original_inds = {}
    for ir,r in enumerate(dataset.region_names):
        series = dataset.get_several_series(dataset.gene_names,r)
        region_to_ix_original_inds[r] = series.original_inds
        
    for (ir,loo_point), levels in dct_results.iteritems():
        n_iterations = len(levels)
        r = dataset.region_names[ir]
        if loo_point is None:
            # Global fit - collect the parameters (theta, sigma, L) and compute a correlation matrix for the region
            # the hack of using the key (None,r) to store these results can be removed if/when dataset fits is changed from a dictionary to a class with several fields
            k = (None,r)
            if k not in ds_fits:
                ds_fits[k] = n_iterations*[None]
            for iLevel, level in enumerate(levels):
                ds_fits[k][iLevel] = level
                level.correlations = covariance_to_correlation(level.sigma)
        else:
            # LOO point - collect the predictions
            ix,iy = loo_point
            g = dataset.gene_names[iy]
            fit = ds_fits[(g,r)]
            if not hasattr(fit, 'with_correlations'):
                fit.with_correlations = [
                    Bunch(LOO_predictions=init_array(np.NaN, len(dataset.ages)))  # NOTE: we place the predictions at the original indexes (before NaN were removed by the get_series)
                    for _ in xrange(n_iterations)
                ]
            for iLevel, level_prediction in enumerate(levels):
                orig_ix = region_to_ix_original_inds[r][ix]
                fit.with_correlations[iLevel].LOO_predictions[orig_ix] = level_prediction
Example #4
0
def _add_dataset_correlation_fits_from_results_dictionary(
        dataset, ds_fits, dct_results):
    """This function converts the results of the job_splitting which is a flat dictionary to structures which 
       are easier to use and integrated into the dataset fits
    """
    region_to_ix_original_inds = {}
    for ir, r in enumerate(dataset.region_names):
        series = dataset.get_several_series(dataset.gene_names, r)
        region_to_ix_original_inds[r] = series.original_inds

    for (ir, loo_point), levels in dct_results.iteritems():
        n_iterations = len(levels)
        r = dataset.region_names[ir]
        if loo_point is None:
            # Global fit - collect the parameters (theta, sigma, L) and compute a correlation matrix for the region
            # the hack of using the key (None,r) to store these results can be removed if/when dataset fits is changed from a dictionary to a class with several fields
            k = (None, r)
            if k not in ds_fits:
                ds_fits[k] = n_iterations * [None]
            for iLevel, level in enumerate(levels):
                ds_fits[k][iLevel] = level
                level.correlations = covariance_to_correlation(level.sigma)
        else:
            # LOO point - collect the predictions
            ix, iy = loo_point
            g = dataset.gene_names[iy]
            fit = ds_fits[(g, r)]
            if not hasattr(fit, 'with_correlations'):
                fit.with_correlations = [
                    Bunch(
                        LOO_predictions=init_array(np.NaN, len(dataset.ages))
                    )  # NOTE: we place the predictions at the original indexes (before NaN were removed by the get_series)
                    for _ in xrange(n_iterations)
                ]
            for iLevel, level_prediction in enumerate(levels):
                orig_ix = region_to_ix_original_inds[r][ix]
                fit.with_correlations[iLevel].LOO_predictions[
                    orig_ix] = level_prediction
Example #5
0
def save_as_mat_files(data, fitter, fits, has_change_distributions):
    for dataset in data.datasets:
        filename = join(cache_dir(), fit_results_relative_path(dataset,fitter) + '.mat')
        dataset_fits = fits[dataset.name]
    
        print 'Saving mat file to {}'.format(filename)
        shape = fitter.shape
        
        gene_names = dataset.gene_names
        gene_idx = {g:i for i,g in enumerate(gene_names)}
        n_genes = len(gene_names)
        region_names = dataset.region_names
        region_idx = {r:i for i,r in enumerate(region_names)}
        n_regions = len(region_names)
        
        write_theta = shape.can_export_params_to_matlab()
        if write_theta:
            theta = init_array(np.NaN, shape.n_params(), n_genes,n_regions)
        else:
            theta = np.NaN
        
        fit_scores = init_array(np.NaN, n_genes,n_regions)
        LOO_scores = init_array(np.NaN, n_genes,n_regions)
        fit_predictions = init_array(np.NaN, *dataset.expression.shape)
        LOO_predictions = init_array(np.NaN, *dataset.expression.shape)
        high_res_predictions = init_array(np.NaN, cfg.n_curve_points_to_plot, n_genes, n_regions)
        scaled_high_res_ages = np.linspace(dataset.ages.min(), dataset.ages.max(), cfg.n_curve_points_to_plot)
        original_high_res_ages = scalers.unify(dataset.age_scaler).unscale(scaled_high_res_ages)
        if has_change_distributions:
            change_distribution_bin_centers = fits.change_distribution_params.bin_centers
            n_bins = len(change_distribution_bin_centers)
            change_distribution_weights = init_array(np.NaN, n_bins, n_genes, n_regions)
        else:
            change_distribution_bin_centers = []
            change_distribution_weights = []
        for (g,r),fit in dataset_fits.iteritems():
            series = dataset.get_one_series(g,r)
            ig = gene_idx[g]
            ir = region_idx[r]
            fit_scores[ig,ir] = fit.fit_score
            LOO_scores[ig,ir] = fit.LOO_score
            if write_theta and fit.theta is not None:
                theta[:,ig,ir] = fit.theta
            if fit.fit_predictions is not None:
                fit_predictions[series.original_inds,ig,ir] = fit.fit_predictions
            if fit.LOO_predictions is not None:
                LOO_predictions[series.original_inds,ig,ir] = fit.LOO_predictions
            if fit.theta is not None:
                high_res_predictions[:,ig,ir] = shape.f(fit.theta, scaled_high_res_ages)
            change_weights = getattr(fit,'change_distribution_weights',None)
            if change_weights is not None:
                change_distribution_weights[:,ig,ir] = change_weights
        mdict = dict(
            gene_names = list_of_strings_to_matlab_cell_array(gene_names),
            region_names = list_of_strings_to_matlab_cell_array(region_names),
            theta = theta,
            fit_scores = fit_scores,
            LOO_scores = LOO_scores,
            fit_predictions = fit_predictions,
            LOO_predictions = LOO_predictions,
            high_res_predictions = high_res_predictions,
            high_res_ages = original_high_res_ages,
            change_distribution_bin_centers = change_distribution_bin_centers,
            change_distribution_weights = change_distribution_weights,
        )
        savemat(filename, mdict, oned_as='column')
Example #6
0
def save_as_mat_files(data, fitter, fits, has_change_distributions):
    for dataset in data.datasets:
        filename = join(cache_dir(),
                        fit_results_relative_path(dataset, fitter) + '.mat')
        dataset_fits = fits[dataset.name]

        print 'Saving mat file to {}'.format(filename)
        shape = fitter.shape

        gene_names = dataset.gene_names
        gene_idx = {g: i for i, g in enumerate(gene_names)}
        n_genes = len(gene_names)
        region_names = dataset.region_names
        region_idx = {r: i for i, r in enumerate(region_names)}
        n_regions = len(region_names)

        write_theta = shape.can_export_params_to_matlab()
        if write_theta:
            theta = init_array(np.NaN, shape.n_params(), n_genes, n_regions)
        else:
            theta = np.NaN

        fit_scores = init_array(np.NaN, n_genes, n_regions)
        LOO_scores = init_array(np.NaN, n_genes, n_regions)
        fit_predictions = init_array(np.NaN, *dataset.expression.shape)
        LOO_predictions = init_array(np.NaN, *dataset.expression.shape)
        high_res_predictions = init_array(np.NaN, cfg.n_curve_points_to_plot,
                                          n_genes, n_regions)
        scaled_high_res_ages = np.linspace(dataset.ages.min(),
                                           dataset.ages.max(),
                                           cfg.n_curve_points_to_plot)
        original_high_res_ages = scalers.unify(
            dataset.age_scaler).unscale(scaled_high_res_ages)
        if has_change_distributions:
            change_distribution_bin_centers = fits.change_distribution_params.bin_centers
            n_bins = len(change_distribution_bin_centers)
            change_distribution_weights = init_array(np.NaN, n_bins, n_genes,
                                                     n_regions)
        else:
            change_distribution_bin_centers = []
            change_distribution_weights = []
        for (g, r), fit in dataset_fits.iteritems():
            series = dataset.get_one_series(g, r)
            ig = gene_idx[g]
            ir = region_idx[r]
            fit_scores[ig, ir] = fit.fit_score
            LOO_scores[ig, ir] = fit.LOO_score
            if write_theta and fit.theta is not None:
                theta[:, ig, ir] = fit.theta
            if fit.fit_predictions is not None:
                fit_predictions[series.original_inds, ig,
                                ir] = fit.fit_predictions
            if fit.LOO_predictions is not None:
                LOO_predictions[series.original_inds, ig,
                                ir] = fit.LOO_predictions
            if fit.theta is not None:
                high_res_predictions[:, ig,
                                     ir] = shape.f(fit.theta,
                                                   scaled_high_res_ages)
            change_weights = getattr(fit, 'change_distribution_weights', None)
            if change_weights is not None:
                change_distribution_weights[:, ig, ir] = change_weights
        mdict = dict(
            gene_names=list_of_strings_to_matlab_cell_array(gene_names),
            region_names=list_of_strings_to_matlab_cell_array(region_names),
            theta=theta,
            fit_scores=fit_scores,
            LOO_scores=LOO_scores,
            fit_predictions=fit_predictions,
            LOO_predictions=LOO_predictions,
            high_res_predictions=high_res_predictions,
            high_res_ages=original_high_res_ages,
            change_distribution_bin_centers=change_distribution_bin_centers,
            change_distribution_weights=change_distribution_weights,
        )
        savemat(filename, mdict, oned_as='column')