def export_cube(): cube = load_pickle(RegionPairTiming.cube_filename) README = """\ d_mu: mu(r2)-mu(r1) for every gene and region pair. Dimensions: <n-genes> X <n-regions> X <n-regions> combined_std: The combined standard deviation of the two change distributions. std = sqrt(0.5*(std1^2 + std2^2)) Dimensions: <n-genes> X <n-regions> X <n-regions> score: The d' for the two change distributions. Equal to d_mu ./ combined_std. Dimensions: <n-genes> X <n-regions> X <n-regions> genes: Gene names for the genes represented in other arrays regions: Region names for the regions represented in other arrays age_scaler: The scaling used for ages (i.e. 'log' means x' = log(x + 38/52)) """ mdict = dict( README_CUBE=README, genes=list_of_strings_to_matlab_cell_array(cube.genes), regions=list_of_strings_to_matlab_cell_array(cube.regions), age_scaler=scalers.unify(cube.age_scaler).cache_name(), d_mu=cube.d_mu, combined_std=cube.std, scores=cube.d_mu / cube.std, ) save_matfile(mdict, join(results_dir(), 'export', 'cube.mat'))
def export_cube(): cube = load_pickle(RegionPairTiming.cube_filename) README = """\ d_mu: mu(r2)-mu(r1) for every gene and region pair. Dimensions: <n-genes> X <n-regions> X <n-regions> combined_std: The combined standard deviation of the two change distributions. std = sqrt(0.5*(std1^2 + std2^2)) Dimensions: <n-genes> X <n-regions> X <n-regions> score: The d' for the two change distributions. Equal to d_mu ./ combined_std. Dimensions: <n-genes> X <n-regions> X <n-regions> genes: Gene names for the genes represented in other arrays regions: Region names for the regions represented in other arrays age_scaler: The scaling used for ages (i.e. 'log' means x' = log(x + 38/52)) """ mdict = dict( README_CUBE = README, genes = list_of_strings_to_matlab_cell_array(cube.genes), regions = list_of_strings_to_matlab_cell_array(cube.regions), age_scaler = scalers.unify(cube.age_scaler).cache_name(), d_mu = cube.d_mu, combined_std = cube.std, scores = cube.d_mu / cube.std, ) save_matfile(mdict, join(results_dir(), 'export', 'cube.mat'))
def scaled(self, scaler): import scalers scaler = scalers.unify(scaler) # handle None return DevStage( num=self.num, name=self.name, short_name=self.short_name, from_age=scaler.scale(self.from_age), to_age=scaler.scale(self.to_age), )
def scaled(self, scaler): import scalers scaler = scalers.unify(scaler) # handle None return DevStage( num = self.num, name = self.name, short_name = self.short_name, from_age = scaler.scale(self.from_age), to_age = scaler.scale(self.to_age), )
def add_age_ticks(ax, age_scaler, fontsize=None): if fontsize is None: fontsize = cfg.fontsize # set the development stages as x labels stages = [stage.scaled(age_scaler) for stage in dev_stages] ax.set_xticks([stage.central_age for stage in stages]) ax.set_xticklabels([stage.short_name for stage in stages], fontsize=fontsize, fontstretch='condensed', rotation=90) # mark birth time with a vertical line ymin, ymax = ax.get_ylim() birth_age = scalers.unify(age_scaler).scale(0) ax.plot([birth_age, birth_age], [ymin, ymax], '--', color='0.85')
def export_timing_info_for_all_fits(data, fitter, fits): change_dist = compute_timing_info_for_all_fits(data, fitter, fits) README = """\ mu: The mean age of the change distribution for given gene and region. Dimensions: <n-genes> X <n-regions> std: The standard deviation of the change distribution for given gene and region. Dimensions: <n-genes> X <n-regions> genes: Gene names for the genes represented in other arrays weights: The change distributions for each gene and region. Dimensions: <n-genes> X <n-regions> X <n-bins> bin_centers: The ages for the center of each bin used in calculating the histogram in "weights". Dimensions: <n-bins> X 1 bin_edges: The edges of the bins used in calculating the change histogram. (centers can be calculated from the bin_edges, but it's convenient to have it pre-calculated) Dimensions: <n-bins + 1> X 1 regions: Region names for the regions represented in other arrays age_scaler: The scaling used for ages (i.e. 'log' means x' = log(x + 38/52)) """ mdict = dict( README_CHANGE_DISTRIBUTIONS=README, genes=list_of_strings_to_matlab_cell_array(change_dist.genes), regions=list_of_strings_to_matlab_cell_array(change_dist.regions), age_scaler=scalers.unify(change_dist.age_scaler).cache_name(), mu=change_dist.mu, std=change_dist.std, bin_edges=change_dist.bin_edges, bin_centers=change_dist.bin_centers, weights=change_dist.weights, ) filename = join( cache_dir(), fit_results_relative_path(data, fitter) + '-change-dist.mat') save_matfile(mdict, filename)
def export_timing_info_for_all_fits(data, fitter, fits): change_dist = compute_timing_info_for_all_fits(data, fitter, fits) README = """\ mu: The mean age of the change distribution for given gene and region. Dimensions: <n-genes> X <n-regions> std: The standard deviation of the change distribution for given gene and region. Dimensions: <n-genes> X <n-regions> genes: Gene names for the genes represented in other arrays weights: The change distributions for each gene and region. Dimensions: <n-genes> X <n-regions> X <n-bins> bin_centers: The ages for the center of each bin used in calculating the histogram in "weights". Dimensions: <n-bins> X 1 bin_edges: The edges of the bins used in calculating the change histogram. (centers can be calculated from the bin_edges, but it's convenient to have it pre-calculated) Dimensions: <n-bins + 1> X 1 regions: Region names for the regions represented in other arrays age_scaler: The scaling used for ages (i.e. 'log' means x' = log(x + 38/52)) """ mdict = dict( README_CHANGE_DISTRIBUTIONS = README, genes = list_of_strings_to_matlab_cell_array(change_dist.genes), regions = list_of_strings_to_matlab_cell_array(change_dist.regions), age_scaler = scalers.unify(change_dist.age_scaler).cache_name(), mu = change_dist.mu, std = change_dist.std, bin_edges = change_dist.bin_edges, bin_centers = change_dist.bin_centers, weights = change_dist.weights, ) filename = join(cache_dir(), fit_results_relative_path(data,fitter) + '-change-dist.mat') save_matfile(mdict, filename)
def save_as_mat_files(data, fitter, fits, has_change_distributions): for dataset in data.datasets: filename = join(cache_dir(), fit_results_relative_path(dataset,fitter) + '.mat') dataset_fits = fits[dataset.name] print 'Saving mat file to {}'.format(filename) shape = fitter.shape gene_names = dataset.gene_names gene_idx = {g:i for i,g in enumerate(gene_names)} n_genes = len(gene_names) region_names = dataset.region_names region_idx = {r:i for i,r in enumerate(region_names)} n_regions = len(region_names) write_theta = shape.can_export_params_to_matlab() if write_theta: theta = init_array(np.NaN, shape.n_params(), n_genes,n_regions) else: theta = np.NaN fit_scores = init_array(np.NaN, n_genes,n_regions) LOO_scores = init_array(np.NaN, n_genes,n_regions) fit_predictions = init_array(np.NaN, *dataset.expression.shape) LOO_predictions = init_array(np.NaN, *dataset.expression.shape) high_res_predictions = init_array(np.NaN, cfg.n_curve_points_to_plot, n_genes, n_regions) scaled_high_res_ages = np.linspace(dataset.ages.min(), dataset.ages.max(), cfg.n_curve_points_to_plot) original_high_res_ages = scalers.unify(dataset.age_scaler).unscale(scaled_high_res_ages) if has_change_distributions: change_distribution_bin_centers = fits.change_distribution_params.bin_centers n_bins = len(change_distribution_bin_centers) change_distribution_weights = init_array(np.NaN, n_bins, n_genes, n_regions) else: change_distribution_bin_centers = [] change_distribution_weights = [] for (g,r),fit in dataset_fits.iteritems(): series = dataset.get_one_series(g,r) ig = gene_idx[g] ir = region_idx[r] fit_scores[ig,ir] = fit.fit_score LOO_scores[ig,ir] = fit.LOO_score if write_theta and fit.theta is not None: theta[:,ig,ir] = fit.theta if fit.fit_predictions is not None: fit_predictions[series.original_inds,ig,ir] = fit.fit_predictions if fit.LOO_predictions is not None: LOO_predictions[series.original_inds,ig,ir] = fit.LOO_predictions if fit.theta is not None: high_res_predictions[:,ig,ir] = shape.f(fit.theta, scaled_high_res_ages) change_weights = getattr(fit,'change_distribution_weights',None) if change_weights is not None: change_distribution_weights[:,ig,ir] = change_weights mdict = dict( gene_names = list_of_strings_to_matlab_cell_array(gene_names), region_names = list_of_strings_to_matlab_cell_array(region_names), theta = theta, fit_scores = fit_scores, LOO_scores = LOO_scores, fit_predictions = fit_predictions, LOO_predictions = LOO_predictions, high_res_predictions = high_res_predictions, high_res_ages = original_high_res_ages, change_distribution_bin_centers = change_distribution_bin_centers, change_distribution_weights = change_distribution_weights, ) savemat(filename, mdict, oned_as='column')
def save_as_mat_files(data, fitter, fits, has_change_distributions): for dataset in data.datasets: filename = join(cache_dir(), fit_results_relative_path(dataset, fitter) + '.mat') dataset_fits = fits[dataset.name] print 'Saving mat file to {}'.format(filename) shape = fitter.shape gene_names = dataset.gene_names gene_idx = {g: i for i, g in enumerate(gene_names)} n_genes = len(gene_names) region_names = dataset.region_names region_idx = {r: i for i, r in enumerate(region_names)} n_regions = len(region_names) write_theta = shape.can_export_params_to_matlab() if write_theta: theta = init_array(np.NaN, shape.n_params(), n_genes, n_regions) else: theta = np.NaN fit_scores = init_array(np.NaN, n_genes, n_regions) LOO_scores = init_array(np.NaN, n_genes, n_regions) fit_predictions = init_array(np.NaN, *dataset.expression.shape) LOO_predictions = init_array(np.NaN, *dataset.expression.shape) high_res_predictions = init_array(np.NaN, cfg.n_curve_points_to_plot, n_genes, n_regions) scaled_high_res_ages = np.linspace(dataset.ages.min(), dataset.ages.max(), cfg.n_curve_points_to_plot) original_high_res_ages = scalers.unify( dataset.age_scaler).unscale(scaled_high_res_ages) if has_change_distributions: change_distribution_bin_centers = fits.change_distribution_params.bin_centers n_bins = len(change_distribution_bin_centers) change_distribution_weights = init_array(np.NaN, n_bins, n_genes, n_regions) else: change_distribution_bin_centers = [] change_distribution_weights = [] for (g, r), fit in dataset_fits.iteritems(): series = dataset.get_one_series(g, r) ig = gene_idx[g] ir = region_idx[r] fit_scores[ig, ir] = fit.fit_score LOO_scores[ig, ir] = fit.LOO_score if write_theta and fit.theta is not None: theta[:, ig, ir] = fit.theta if fit.fit_predictions is not None: fit_predictions[series.original_inds, ig, ir] = fit.fit_predictions if fit.LOO_predictions is not None: LOO_predictions[series.original_inds, ig, ir] = fit.LOO_predictions if fit.theta is not None: high_res_predictions[:, ig, ir] = shape.f(fit.theta, scaled_high_res_ages) change_weights = getattr(fit, 'change_distribution_weights', None) if change_weights is not None: change_distribution_weights[:, ig, ir] = change_weights mdict = dict( gene_names=list_of_strings_to_matlab_cell_array(gene_names), region_names=list_of_strings_to_matlab_cell_array(region_names), theta=theta, fit_scores=fit_scores, LOO_scores=LOO_scores, fit_predictions=fit_predictions, LOO_predictions=LOO_predictions, high_res_predictions=high_res_predictions, high_res_ages=original_high_res_ages, change_distribution_bin_centers=change_distribution_bin_centers, change_distribution_weights=change_distribution_weights, ) savemat(filename, mdict, oned_as='column')