def compute_timing_info_for_all_fits(data, fitter, fits): genes = data.gene_names regions = data.region_names r2ds = data.region_to_dataset() bin_edges = fits.change_distribution_params.bin_edges bin_centers = fits.change_distribution_params.bin_centers mu = init_array(np.NaN, len(genes), len(regions)) std = init_array(np.NaN, len(genes), len(regions)) weights = init_array(np.NaN, len(genes), len(regions), len(bin_centers)) for ig,g in enumerate(genes): for ir,r in enumerate(regions): dsfits = fits[r2ds[r]] fit = dsfits.get((g,r)) if fit is None: continue mu[ig,ir], std[ig,ir] = fit.change_distribution_mean_std weights[ig,ir,:] = fit.change_distribution_weights return Bunch( bin_edges = bin_edges, bin_centers = bin_centers, weights = weights, mu=mu, std=std, genes=genes, regions=regions, age_scaler=data.age_scaler, )
def compute_timing_info_for_all_fits(data, fitter, fits): genes = data.gene_names regions = data.region_names r2ds = data.region_to_dataset() bin_edges = fits.change_distribution_params.bin_edges bin_centers = fits.change_distribution_params.bin_centers mu = init_array(np.NaN, len(genes), len(regions)) std = init_array(np.NaN, len(genes), len(regions)) weights = init_array(np.NaN, len(genes), len(regions), len(bin_centers)) for ig, g in enumerate(genes): for ir, r in enumerate(regions): dsfits = fits[r2ds[r]] fit = dsfits.get((g, r)) if fit is None: continue mu[ig, ir], std[ig, ir] = fit.change_distribution_mean_std weights[ig, ir, :] = fit.change_distribution_weights return Bunch( bin_edges=bin_edges, bin_centers=bin_centers, weights=weights, mu=mu, std=std, genes=genes, regions=regions, age_scaler=data.age_scaler, )
def _add_dataset_correlation_fits_from_results_dictionary(dataset, ds_fits, dct_results): """This function converts the results of the job_splitting which is a flat dictionary to structures which are easier to use and integrated into the dataset fits """ region_to_ix_original_inds = {} for ir,r in enumerate(dataset.region_names): series = dataset.get_several_series(dataset.gene_names,r) region_to_ix_original_inds[r] = series.original_inds for (ir,loo_point), levels in dct_results.iteritems(): n_iterations = len(levels) r = dataset.region_names[ir] if loo_point is None: # Global fit - collect the parameters (theta, sigma, L) and compute a correlation matrix for the region # the hack of using the key (None,r) to store these results can be removed if/when dataset fits is changed from a dictionary to a class with several fields k = (None,r) if k not in ds_fits: ds_fits[k] = n_iterations*[None] for iLevel, level in enumerate(levels): ds_fits[k][iLevel] = level level.correlations = covariance_to_correlation(level.sigma) else: # LOO point - collect the predictions ix,iy = loo_point g = dataset.gene_names[iy] fit = ds_fits[(g,r)] if not hasattr(fit, 'with_correlations'): fit.with_correlations = [ Bunch(LOO_predictions=init_array(np.NaN, len(dataset.ages))) # NOTE: we place the predictions at the original indexes (before NaN were removed by the get_series) for _ in xrange(n_iterations) ] for iLevel, level_prediction in enumerate(levels): orig_ix = region_to_ix_original_inds[r][ix] fit.with_correlations[iLevel].LOO_predictions[orig_ix] = level_prediction
def _add_dataset_correlation_fits_from_results_dictionary( dataset, ds_fits, dct_results): """This function converts the results of the job_splitting which is a flat dictionary to structures which are easier to use and integrated into the dataset fits """ region_to_ix_original_inds = {} for ir, r in enumerate(dataset.region_names): series = dataset.get_several_series(dataset.gene_names, r) region_to_ix_original_inds[r] = series.original_inds for (ir, loo_point), levels in dct_results.iteritems(): n_iterations = len(levels) r = dataset.region_names[ir] if loo_point is None: # Global fit - collect the parameters (theta, sigma, L) and compute a correlation matrix for the region # the hack of using the key (None,r) to store these results can be removed if/when dataset fits is changed from a dictionary to a class with several fields k = (None, r) if k not in ds_fits: ds_fits[k] = n_iterations * [None] for iLevel, level in enumerate(levels): ds_fits[k][iLevel] = level level.correlations = covariance_to_correlation(level.sigma) else: # LOO point - collect the predictions ix, iy = loo_point g = dataset.gene_names[iy] fit = ds_fits[(g, r)] if not hasattr(fit, 'with_correlations'): fit.with_correlations = [ Bunch( LOO_predictions=init_array(np.NaN, len(dataset.ages)) ) # NOTE: we place the predictions at the original indexes (before NaN were removed by the get_series) for _ in xrange(n_iterations) ] for iLevel, level_prediction in enumerate(levels): orig_ix = region_to_ix_original_inds[r][ix] fit.with_correlations[iLevel].LOO_predictions[ orig_ix] = level_prediction
def save_as_mat_files(data, fitter, fits, has_change_distributions): for dataset in data.datasets: filename = join(cache_dir(), fit_results_relative_path(dataset,fitter) + '.mat') dataset_fits = fits[dataset.name] print 'Saving mat file to {}'.format(filename) shape = fitter.shape gene_names = dataset.gene_names gene_idx = {g:i for i,g in enumerate(gene_names)} n_genes = len(gene_names) region_names = dataset.region_names region_idx = {r:i for i,r in enumerate(region_names)} n_regions = len(region_names) write_theta = shape.can_export_params_to_matlab() if write_theta: theta = init_array(np.NaN, shape.n_params(), n_genes,n_regions) else: theta = np.NaN fit_scores = init_array(np.NaN, n_genes,n_regions) LOO_scores = init_array(np.NaN, n_genes,n_regions) fit_predictions = init_array(np.NaN, *dataset.expression.shape) LOO_predictions = init_array(np.NaN, *dataset.expression.shape) high_res_predictions = init_array(np.NaN, cfg.n_curve_points_to_plot, n_genes, n_regions) scaled_high_res_ages = np.linspace(dataset.ages.min(), dataset.ages.max(), cfg.n_curve_points_to_plot) original_high_res_ages = scalers.unify(dataset.age_scaler).unscale(scaled_high_res_ages) if has_change_distributions: change_distribution_bin_centers = fits.change_distribution_params.bin_centers n_bins = len(change_distribution_bin_centers) change_distribution_weights = init_array(np.NaN, n_bins, n_genes, n_regions) else: change_distribution_bin_centers = [] change_distribution_weights = [] for (g,r),fit in dataset_fits.iteritems(): series = dataset.get_one_series(g,r) ig = gene_idx[g] ir = region_idx[r] fit_scores[ig,ir] = fit.fit_score LOO_scores[ig,ir] = fit.LOO_score if write_theta and fit.theta is not None: theta[:,ig,ir] = fit.theta if fit.fit_predictions is not None: fit_predictions[series.original_inds,ig,ir] = fit.fit_predictions if fit.LOO_predictions is not None: LOO_predictions[series.original_inds,ig,ir] = fit.LOO_predictions if fit.theta is not None: high_res_predictions[:,ig,ir] = shape.f(fit.theta, scaled_high_res_ages) change_weights = getattr(fit,'change_distribution_weights',None) if change_weights is not None: change_distribution_weights[:,ig,ir] = change_weights mdict = dict( gene_names = list_of_strings_to_matlab_cell_array(gene_names), region_names = list_of_strings_to_matlab_cell_array(region_names), theta = theta, fit_scores = fit_scores, LOO_scores = LOO_scores, fit_predictions = fit_predictions, LOO_predictions = LOO_predictions, high_res_predictions = high_res_predictions, high_res_ages = original_high_res_ages, change_distribution_bin_centers = change_distribution_bin_centers, change_distribution_weights = change_distribution_weights, ) savemat(filename, mdict, oned_as='column')
def save_as_mat_files(data, fitter, fits, has_change_distributions): for dataset in data.datasets: filename = join(cache_dir(), fit_results_relative_path(dataset, fitter) + '.mat') dataset_fits = fits[dataset.name] print 'Saving mat file to {}'.format(filename) shape = fitter.shape gene_names = dataset.gene_names gene_idx = {g: i for i, g in enumerate(gene_names)} n_genes = len(gene_names) region_names = dataset.region_names region_idx = {r: i for i, r in enumerate(region_names)} n_regions = len(region_names) write_theta = shape.can_export_params_to_matlab() if write_theta: theta = init_array(np.NaN, shape.n_params(), n_genes, n_regions) else: theta = np.NaN fit_scores = init_array(np.NaN, n_genes, n_regions) LOO_scores = init_array(np.NaN, n_genes, n_regions) fit_predictions = init_array(np.NaN, *dataset.expression.shape) LOO_predictions = init_array(np.NaN, *dataset.expression.shape) high_res_predictions = init_array(np.NaN, cfg.n_curve_points_to_plot, n_genes, n_regions) scaled_high_res_ages = np.linspace(dataset.ages.min(), dataset.ages.max(), cfg.n_curve_points_to_plot) original_high_res_ages = scalers.unify( dataset.age_scaler).unscale(scaled_high_res_ages) if has_change_distributions: change_distribution_bin_centers = fits.change_distribution_params.bin_centers n_bins = len(change_distribution_bin_centers) change_distribution_weights = init_array(np.NaN, n_bins, n_genes, n_regions) else: change_distribution_bin_centers = [] change_distribution_weights = [] for (g, r), fit in dataset_fits.iteritems(): series = dataset.get_one_series(g, r) ig = gene_idx[g] ir = region_idx[r] fit_scores[ig, ir] = fit.fit_score LOO_scores[ig, ir] = fit.LOO_score if write_theta and fit.theta is not None: theta[:, ig, ir] = fit.theta if fit.fit_predictions is not None: fit_predictions[series.original_inds, ig, ir] = fit.fit_predictions if fit.LOO_predictions is not None: LOO_predictions[series.original_inds, ig, ir] = fit.LOO_predictions if fit.theta is not None: high_res_predictions[:, ig, ir] = shape.f(fit.theta, scaled_high_res_ages) change_weights = getattr(fit, 'change_distribution_weights', None) if change_weights is not None: change_distribution_weights[:, ig, ir] = change_weights mdict = dict( gene_names=list_of_strings_to_matlab_cell_array(gene_names), region_names=list_of_strings_to_matlab_cell_array(region_names), theta=theta, fit_scores=fit_scores, LOO_scores=LOO_scores, fit_predictions=fit_predictions, LOO_predictions=LOO_predictions, high_res_predictions=high_res_predictions, high_res_ages=original_high_res_ages, change_distribution_bin_centers=change_distribution_bin_centers, change_distribution_weights=change_distribution_weights, ) savemat(filename, mdict, oned_as='column')