def validate_data(self, check_prior=True, check_gold_standard=True): """ Make sure that the data that's loaded is acceptable """ if check_prior: # Create a null prior if the flag is set if self.use_no_prior and self.priors_data is not None: warnings.warn("The use_no_prior flag will be ignored because prior data exists") elif self.use_no_prior: Debug.vprint("A null prior is has been created", level=0) self.priors_data = self._create_null_prior(self._gene_names, self.tf_names) if check_gold_standard: # Create a null gold standard if the flag is set if self.use_no_gold_standard and self.gold_standard is not None: warnings.warn("The use_no_gold_standard flag will be ignored because gold standard data exists") elif self.use_no_gold_standard: Debug.vprint("A null gold standard has been created", level=0) self.gold_standard = self._create_null_prior(self._gene_names, self.tf_names) elif self.gold_standard is None: _msg = "No gold standard found. Model scoring will be invalid. " _msg += "Set worker.set_network_data_flags(use_no_gold_standard=True) to explicitly continue." raise ValueError(_msg) if check_prior and check_gold_standard: # Validate that some network information exists and has been loaded if self.priors_data is None and self.gold_standard is None: raise ValueError("No gold standard or priors have been provided")
def validate_data(self): """ Make sure that the data that's loaded is acceptable """ # Create a null prior if the flag is set if self.use_no_prior and self.priors_data is not None: warnings.warn( "The use_no_prior flag will be ignored because prior data exists" ) elif self.use_no_prior: Debug.vprint("A null prior is has been created", level=0) self.priors_data = self._create_null_prior(self.data.gene_names, self.tf_names) # Create a null gold standard if the flag is set if self.use_no_gold_standard and self.gold_standard is not None: warnings.warn( "The use_no_gold_standard flag will be ignored because gold standard data exists" ) elif self.use_no_gold_standard: Debug.vprint("A null gold standard has been created", level=0) self.gold_standard = self._create_null_prior( self.data.gene_names, self.tf_names) # Validate that some network information exists and has been loaded if self.priors_data is None and self.gold_standard is None: raise ValueError("No gold standard or priors have been provided")
def load_activity(self, file=None, file_type=None): file = self._tfa_input_file if file is None else file file_type = self._tfa_input_file_type if file_type is None else file_type loader = InferelatorDataLoader( input_dir=self.input_dir, file_format_settings=self._file_format_settings) if file_type.lower() == "h5ad": self.design = loader.load_data_h5ad(file) elif self._expression_loader.lower() == "tsv": self.design = loader.load_data_tsv(file) Debug.vprint("Loaded {f} as design matrix {d}".format( d=self.design.shape, f=file), level=1) self.design.trim_genes( remove_constant_genes=False, trim_gene_list=self.design.gene_names.intersection(self.tf_names)) Debug.vprint("Trimmed to {d} for TF activity".format( d=self.design.shape, f=file), level=1) assert check.indexes_align( [self.design.sample_names, self.response.sample_names])
def load_velocity(self, velocity_file=None, loader_type=None): velocity_file = self._velocity_file_name if velocity_file is None else velocity_file loader_type = self._velocity_file_type if loader_type is None else loader_type transpose = not self.expression_matrix_columns_are_genes loader = InferelatorDataLoader( input_dir=self.input_dir, file_format_settings=self._file_format_settings) Debug.vprint("Loading velocity data from {f}".format(f=velocity_file), level=1) if loader_type == _TSV or loader_type is None: self._velocity_data = loader.load_data_tsv( velocity_file, transpose_expression_data=transpose) elif loader_type == _H5AD: self._velocity_data = loader.load_data_h5ad( velocity_file, use_layer=self._velocity_h5_layer) elif loader_type == _HDF5: self._velocity_data = loader.load_data_hdf5( velocity_file, transpose_expression_data=transpose, use_layer=self._velocity_h5_layer) else: raise ValueError( "Invalid velocity_file_type: {a}".format(a=loader_type)) self._velocity_data.name = "Velocity"
def read_priors(self, priors_file=None, gold_standard_file=None): """ Read in the priors and gold standard files """ priors_file = priors_file if priors_file is not None else self.priors_file gold_standard_file = gold_standard_file if gold_standard_file is not None else self.gold_standard_file loader = InferelatorDataLoader(input_dir=self.input_dir, file_format_settings=self._file_format_settings) if priors_file is not None: Debug.vprint("Loading prior data from file {file}".format(file=priors_file), level=1) self.priors_data = loader.input_dataframe(priors_file) # Print debug info & check prior for duplicate indices (which will raise errors later) self.loaded_file_info("Priors data", self.priors_data) self._check_network_labels_unique("Priors_data", priors_file, self.priors_data) if gold_standard_file is not None: Debug.vprint("Loading gold_standard data from file {file}".format(file=gold_standard_file), level=1) self.gold_standard = loader.input_dataframe(gold_standard_file) # Print debug info & check gold standard for duplicate indices (which will raise errors later) self.loaded_file_info("Gold standard", self.gold_standard) self._check_network_labels_unique("Gold standard", gold_standard_file, self.gold_standard)
def make_data_noisy(data, random_seed=42): """ Generate a new data object of random data which matches the provided data :param data: Raw read data :type data: InferelatorData :param random_seed: Random seed for data generation :type random_seed: int :return: Simulated data :rtype: InferelatorData """ # Calculate probability vector for gene expression # Discrete sampling for count data sample_counts = data.sample_counts if data._is_integer: Debug.vprint("Simulating integer count data for {n} samples".format( n=data.num_obs), level=0) # Data is centered already if np.any(sample_counts <= 0.): p_vec = np.ones(data.num_genes, dtype=float) # Normalize to mean counts per sample and sum counts per gene by matrix multiplication else: p_vec = (np.mean(sample_counts) / sample_counts).reshape( 1, -1) @ data.expression_data # Flatten and convert counts to a probability vector p_vec = p_vec.flatten() p_vec = p_vec / p_vec.sum() data.expression_data = _sim_ints(p_vec, sample_counts, sparse=data.is_sparse, random_seed=random_seed) else: # Data is centered already if np.any(sample_counts <= 0.): p_vec = np.zeros(data.num_genes, dtype=float) # Normalize to mean total measured values per sample and sum counts per gene by matrix multiplication else: p_vec = (np.mean(sample_counts) / sample_counts).reshape( 1, -1) @ data.expression_data p_vec /= data.num_obs Debug.vprint( "Simulating float data for {n} samples".format(n=data.num_obs), level=0) data.expression_data = _sim_float(p_vec.flatten(), data.gene_stdev, data.num_obs, random_seed=random_seed)
def __init__(self, X, Y): """ Create a regression object and do basic data transforms :param X: Expression or Activity data [N x K] :type X: InferelatorData :param Y: Response expression data [N x G] :type Y: InferelatorData """ # Get the IDs and total count for the genes and predictors self.K = X.num_genes self.tfs = X.gene_names self.G = Y.num_genes self.genes = Y.gene_names # Rescale the design expression or activity data on features self.X = X self.X.zscore() self.Y = Y Debug.vprint( "Predictor matrix {pr} and response matrix {re} ready".format( pr=X.shape, re=Y.shape))
def run_bootstrap(self, bootstrap_idx): betas, betas_resc = [], [] # Select the appropriate bootstrap from each task and stash the data into X and Y for k in range(self._n_tasks): X = self._task_design[k].get_bootstrap(self._task_bootstraps[k][bootstrap_idx]) Y = self._task_response[k].get_bootstrap(self._task_bootstraps[k][bootstrap_idx]) # Make sure that the priors align to the expression matrix priors_data = self._task_priors[k].reindex(labels=self._targets, axis=0). \ reindex(labels=self._regulators, axis=1). \ fillna(value=0) if self.clr_only: # Create a mock prior with no information if clr_only is set priors_data = pd.DataFrame(0, index=priors_data.index, columns=priors_data.columns) MPControl.sync_processes(pref="bbsr_pre") Debug.vprint('Calculating MI, Background MI, and CLR Matrix', level=0) clr_matrix, _ = self.mi_driver().run(Y, X, return_mi=False) Debug.vprint('Calculating task {k} betas using BBSR'.format(k=k), level=0) t_beta, t_br = BBSR(X, Y, clr_matrix, priors_data, prior_weight=self.prior_weight, no_prior_weight=self.no_prior_weight, nS=self.bsr_feature_num).run() betas.append(t_beta) betas_resc.append(t_br) return betas, betas_resc
def compute_common_data(self): """ Compute common data structures like design and response matrices. """ drd = self.drd_driver( metadata_handler=self.metadata_handler, return_half_tau=True) if self.drd_driver is not None else None # If there is no design-response driver set, use the expression data for design and response # Also do this if there is no usable metadata if drd is None or not drd.validate_run(self.data.meta_data): self.design, self.response, self.half_tau_response = self.data, self.data, self.data # Otherwise calculate the design-response ODE # TODO: Rewrite DRD for InferelatorData # TODO: This is *horrifying* as is from a memory perspective # TODO: Really fix this soon else: Debug.vprint('Creating design and response matrix ... ') drd.delTmin, drd.delTmax, drd.tau = self.delTmin, self.delTmax, self.tau design, response, half_tau_response = drd.run( self.data.to_df().T, self.data.meta_data) self.design = InferelatorData(design.T) self.response = InferelatorData(response.T) self.half_tau_response = InferelatorData(half_tau_response.T) Debug.vprint("Constructed design {d} and response {r} matrices".format( d=self.design.shape, r=self.response.shape), level=1) self.data = None
def _check_file_exists(self, file_name): """ Print a warning if a file doesn't exist :param file_name: str """ if file_name is not None and not os.path.isfile(self.input_path(file_name)): Debug.vprint("File {f} does not exist".format(f=file_name), level=0)
def filter_to_gene_list(self): """ Filter the priors and expression matrix to just genes in gene_metadata """ Debug.vprint("Trimming expression matrix", level=1) self.data.trim_genes(trim_gene_list=self.gene_names) self.priors_data = self.prior_manager.filter_priors_to_genes(self.priors_data, self.data.gene_names)
def trim_genes(self, remove_constant_genes=True, trim_gene_list=None): """ Remove genes (columns) that are unwanted from the data set. Do this in-place. :param remove_constant_genes: :type remove_constant_genes: bool :param trim_gene_list: This is a list of genes to KEEP. :type trim_gene_list: list, pd.Series, pd.Index """ keep_column_bool = np.ones((len(self._adata.var_names),), dtype=bool) if trim_gene_list is not None: keep_column_bool &= self._adata.var_names.isin(trim_gene_list) if "trim_gene_list" in self._adata.uns: keep_column_bool &= self._adata.var_names.isin(self._adata.uns["trim_gene_list"]) list_trim = len(self._adata.var_names) - np.sum(keep_column_bool) comp = 0 if self._is_integer else np.finfo(self.values.dtype).eps * 10 if remove_constant_genes: nz_var = self.values.max(axis=0) - self.values.min(axis=0) nz_var = nz_var.A.flatten() if self.is_sparse else nz_var if np.any(np.isnan(nz_var)): raise ValueError("NaN values are present in the expression matrix; unable to remove var=0 genes") nz_var = comp < nz_var keep_column_bool &= nz_var var_zero_trim = np.sum(nz_var) else: var_zero_trim = 0 if np.sum(keep_column_bool) == 0: err_msg = "No genes remain after trimming. ({lst} removed to match list, {v} removed for var=0)" raise ValueError(err_msg.format(lst=list_trim, v=var_zero_trim)) if np.sum(keep_column_bool) == self._adata.shape[1]: pass else: Debug.vprint("Trimming {name} matrix {sh} to {n} columns".format(name=self.name, sh=self._adata.X.shape, n=np.sum(keep_column_bool)), level=1) # This explicit copy allows the original to be deallocated # Otherwise the GC leaves the original because the view reference keeps it alive # At some point it will need to copy so why not now self._adata = AnnData(self._adata.X[:, keep_column_bool], obs=self._adata.obs.copy(), var=self._adata.var.loc[keep_column_bool, :].copy(), dtype=self._adata.X.dtype) # Make sure that there's no hanging reference to the original object gc.collect()
def _recalculate_design(self): """ Use the TFA driver to recalculate the design matrix """ self.design.convert_to_float() self.half_tau_response.convert_to_float() self.design = self.tfa_driver().compute_transcription_factor_activity( self.priors_data, self.design, self.half_tau_response) Debug.vprint("Rebuilt design matrix {d} with TF activity".format( d=self.design.shape), level=1)
def set_expression_file(self, tsv=None, hdf5=None, h5ad=None, tenx_path=None, mtx=None, mtx_barcode=None, mtx_feature=None, h5_layer=None): """ Set the type of expression data file. Current loaders include TSV, hdf5, h5ad (AnnData), and MTX sparse files. Only one of these loaders can be used; passing arguments for multiple loaders will raise a ValueError. :param tsv: A path to a TSV (or tsv.gz) file which can be loaded by pandas.read_csv() :type tsv: str, optional :param hdf5: A path to a hdf5 file which can be loaded by pandas.HDFStore :type hdf5: str, optional :param h5ad: A path to an AnnData hd5 file :type h5ad: str, optional :param tenx_path: A path to the folder containing the 10x mtx, barcode, and feature files :type tenx_path: Path, optional :param mtx: A path to an mtx file :type mtx: str, optional :param mtx_barcode: A path to a list of observation names (i.e. barcodes, etc) for the mtx file :type mtx_barcode: str, optional :param mtx_feature: A path to a list of gene names for the mtx file :type mtx_feature: str, optional :param h5_layer: The layer (in an AnnData h5) or the store key (in an hdf5) file to use. Defaults to using the first key. :type h5_layer: str, optional """ nones = [tsv is None, hdf5 is None, h5ad is None, tenx_path is None, mtx is None] if all(nones): Debug.vprint("No file provided", level=0) elif sum(nones) != (len(nones) - 1): raise ValueError("Only one type of input expression file can be set") if tsv is not None: self._set_file_name("expression_matrix_file", tsv) self._expression_loader = _TSV elif hdf5 is not None: self._set_file_name("expression_matrix_file", hdf5) self._expression_loader = _HDF5 self._h5_layer = h5_layer elif h5ad is not None: self._set_file_name("expression_matrix_file", h5ad) self._expression_loader = _H5AD self._h5_layer = h5_layer elif mtx is not None: self._check_file_exists(mtx) self._check_file_exists(mtx_barcode) self._check_file_exists(mtx_feature) self.expression_matrix_file = (mtx, mtx_barcode, mtx_feature) self._expression_loader = _MTX elif tenx_path is not None: self.expression_matrix_file = tenx_path self._expression_loader = _TENX
def filter_to_gene_list(self): """ Filter the priors and expression matrix to just genes in gene_metadata """ # Most operations will be column-wise; change sparse type if needed here Debug.vprint("Preparing to trim expression matrix", level=2) self.data.to_csc() Debug.vprint("Trimming expression matrix", level=1) self.data.trim_genes(trim_gene_list=self.gene_names) self.priors_data = self.prior_manager.filter_priors_to_genes( self.priors_data, self.data.gene_names)
def mi_make(i): level = 2 if i % 1000 == 0 else 3 Debug.allprint("Mutual Information Calculation [{i} / {total}]".format( i=i, total=m1), level=level) discrete_X = _make_discrete( X[:, i].A.flatten() if sps.isspmatrix(X) else X[:, i].flatten(), bins) return [ _calc_mi(_make_table(discrete_X, Y[:, j], bins), logtype=logtype) for j in range(m2) ]
def _get_file_name_from_attribute(self, file_name): """ Check and see if a file name is an object attribute that holds a file namee :param file_name: str :return file_name: str """ # Check and see if file_name is actually an object attribute holding a file name. Use that if so. if file_name not in self._file_format_settings: if hasattr(self, file_name) and getattr(self, file_name) in self._file_format_settings: file_name = getattr(self, file_name) else: Debug.vprint("File {f} is unknown".format(f=file_name), level=0) return None return file_name
def run_regression(self): betas = [[] for _ in range(self._n_tasks)] rescaled_betas = [[] for _ in range(self._n_tasks)] for idx in range(self.num_bootstraps): Debug.vprint('Bootstrap {} of {}'.format((idx + 1), self.num_bootstraps), level=0) current_betas, current_rescaled_betas = self.run_bootstrap(idx) for k in range(self._n_tasks): betas[k].append(current_betas[k]) rescaled_betas[k].append(current_rescaled_betas[k]) return betas, rescaled_betas
def get_data(self): """ Load all the data and then return a list of references to TaskData objects There will be multiple objects returned if tasks_from_metadata is set. If tasks_from_metadata is not set, the list contains only this task (self) :return: List of TaskData objects with loaded data :rtype: list(TaskData) """ Debug.vprint("Loading data for task {task_name}".format(task_name=self.task_name)) super(TaskData, self).get_data() if self.tasks_from_metadata: return self.separate_tasks_by_metadata() else: return [self]
def run_regression(self): betas = [] rescaled_betas = [] for idx, bootstrap in enumerate(self.get_bootstraps()): Debug.vprint('Bootstrap {} of {}'.format((idx + 1), self.num_bootstraps), level=0) np.random.seed(self.random_seed + idx) current_betas, current_rescaled_betas = self.run_bootstrap( bootstrap) betas.append(current_betas) rescaled_betas.append(current_rescaled_betas) return betas, rescaled_betas
def print_file_loading_arguments(self, file_name): """ Print the settings that will be used to load a given file name. :param file_name: The name of the variable containing the file name (from `set_file_properties`) :type file_name: str """ # Check and see if file_name is actually an object attribute holding a file name. Use that if so. file_name = self._get_file_name_from_attribute(file_name) if file_name is None: return msg = "File {f} has the following settings:".format(f=file_name) msg += "\n\t".join([str(k) + " = " + str(v) for k, v in self._file_format_settings[file_name].items()]) Debug.vprint(msg, level=0)
def _check_network_labels_unique(df_name, file_name, df, raise_on_duplicate=False): _msg = None if not df.columns.is_unique: _repeated = df.columns[df.columns.duplicated()] _msg = "{name} {f}: {n} TFs are duplicated ({g})" Debug.vprint(_msg.format(name=df_name, f=file_name, n=len(_repeated), g=" ".join(_repeated)), level=0) if not df.index.is_unique: _repeated = df.index[df.index.duplicated()] _msg = "{name} {f}: {n} Genes are duplicated ({g})" Debug.vprint(_msg.format(name=df_name, f=file_name, n=len(_repeated), g=" ".join(_repeated)), level=0) if _msg is not None and raise_on_duplicate: raise ValueError(_msg)
def separate_tasks_by_metadata(self, meta_data_column=None): """ Take a single expression matrix and break it into multiple dataframes based on meta_data. Return a list of TaskData objects which have the task-specific data loaded into them :param meta_data_column: Meta_data column which corresponds to task ID :type meta_data_column: str :return new_task_objects: List of the TaskData objects with only one task's data each :rtype: list(TaskData) """ if self.data is None: raise ValueError("No data has been loaded prior to `separate_tasks_by_metadata`") meta_data_column = meta_data_column if meta_data_column is not None else self.meta_data_task_column if meta_data_column is None: raise ValueError("tasks_from_metadata is set but meta_data_task_column is not") elif meta_data_column not in self.data.meta_data: msg = "meta_data_task_column is not found in task {t}".format(t=str(self)) raise ValueError(msg) new_task_objects = list() tasks = self.data.meta_data[meta_data_column].unique().tolist() Debug.vprint("Creating {n} tasks from metadata column {col}".format(n=len(tasks), col=meta_data_column), level=0) # Remove data references from self data = self.data self.data = None for task in tasks: # Copy this object task_obj = copy.deepcopy(self) # Get an index of the stuff to keep task_idx = data.meta_data[meta_data_column] == task # Reset expression matrix, metadata, and task_name in the copy task_obj.data = data.subset_copy(row_index=task_idx) task_obj.data.name = task task_obj.task_name = task new_task_objects.append(task_obj) Debug.vprint("Separated data into {ntask} tasks".format(ntask=len(new_task_objects)), level=0) return new_task_objects
def read_tfs(self, file=None): """ Read tf names file into tf_names """ # Load the class variable if no file is passed file = self.tf_names_file if file is None else file if file is not None: Debug.vprint("Loading TF feature names from file {file}".format(file=file), level=1) # Read in a dataframe with no header or index loader = InferelatorDataLoader(input_dir=self.input_dir, file_format_settings=self._file_format_settings) tfs = loader.input_dataframe(file, header=None, index_col=None) # Cast the dataframe into a list assert tfs.shape[1] == 1 self.tf_names = tfs.values.flatten().tolist()
def _align_velocity(self): keep_genes = self._velocity_data.gene_names.intersection( self.data.gene_names) Debug.vprint( "Aligning velocity and expression data on {n} genes".format( n=len(keep_genes))) self._velocity_data.trim_genes(remove_constant_genes=False, trim_gene_list=keep_genes) self.data.trim_genes(remove_constant_genes=False, trim_gene_list=keep_genes) assert check.indexes_align( (self._velocity_data.gene_names, self.data.gene_names)) assert check.indexes_align( (self._velocity_data.sample_names, self.data.sample_names))
def pileup_data(self, run_data): """ Take the completed run data and pack it up into a DataFrame of betas :param run_data: list A list of regression result dicts ordered by gene. Each regression result should have `ind`, `pp`, `betas` and `betas_resc` keys with the appropriate data. :return betas, betas_rescale: (pd.DataFrame [G x K], pd.DataFrame [G x K]) """ # Create G x K arrays of 0s to populate with the regression data betas = np.zeros((self.G, self.K), dtype=np.dtype(float)) betas_rescale = np.zeros((self.G, self.K), dtype=np.dtype(float)) # Populate the zero arrays with the BBSR betas for data in run_data: # If data is None assume a null model if data is None: raise RuntimeError("No model produced by regression method") xidx = data['ind'] # Int yidx = data['pp'] # Boolean array of size K betas[xidx, yidx] = data['betas'] betas_rescale[xidx, yidx] = data['betas_resc'] d_len, b_avg, null_m = self._summary_stats(betas) Debug.vprint("Regression complete:", end=" ", level=0) Debug.vprint( "{d_len} Models, {b_avg} Preds per Model ({nom} Null)".format( d_len=d_len, b_avg=round(b_avg, 4), nom=null_m), level=0) # Convert arrays into pd.DataFrames to return results betas = pd.DataFrame(betas, index=self.Y.gene_names, columns=self.X.gene_names) betas_rescale = pd.DataFrame(betas_rescale, index=self.Y.gene_names, columns=self.X.gene_names) return betas, betas_rescale
def run_regression(self): betas = [] rescaled_betas = [] MPControl.sync_processes("pre_regression") for idx, bootstrap in enumerate(self.get_bootstraps()): Debug.vprint('Bootstrap {} of {}'.format((idx + 1), self.num_bootstraps), level=0) np.random.seed(self.random_seed + idx) current_betas, current_rescaled_betas = self.run_bootstrap( bootstrap) if self.is_master(): betas.append(current_betas) rescaled_betas.append(current_rescaled_betas) MPControl.sync_processes("post_bootstrap") return betas, rescaled_betas
def read_genes(self, file=None): """ Read gene names file into gene_names """ # Load the class variable if no file is passed file = self.gene_names_file if file is None else file if file is not None: Debug.vprint("Loading Gene feature names from file {file}".format(file=file), level=1) # Read in a dataframe with no header or index loader = InferelatorDataLoader(input_dir=self.input_dir, file_format_settings=self._file_format_settings) genes = loader.input_dataframe(file, header=None, index_col=None) # Cast the dataframe into a list assert genes.shape[1] == 1 self.gene_names = genes.values.flatten().tolist() # Use the gene names in the data file if no restrictive list is provided if self.gene_names is None and self.data is not None: self.gene_names = self.data.gene_names.copy()
def set_mkl(cls, mkl=True): # If the MKL flag is None, don't change anything if mkl is None: pass # If the MKL flag is True, use the dot_product_mkl function when .dot() is called if mkl: try: from sparse_dot_mkl import get_version_string, dot_product_mkl as dp msg = "Matrix multiplication will use sparse_dot_mkl package with MKL: {m}" vstring = get_version_string() Debug.vprint(msg.format(m=vstring if vstring is not None else "Install mkl-service for details"), level=2) cls._dot_func = dp # If it isn't available, use the scipy/numpy functions instead except ImportError as err: Debug.vprint("Unable to load MKL with sparse_dot_mkl:\n" + str(err), level=0) cls._dot_func = dot_product # If the MKL flag is True, use the python (numpy/scipy) functions when .dot() is called else: Debug.vprint("Matrix multiplication will use Numpy; this is not advised for sparse data", level=2) cls._dot_func = dot_product
def _combine_expression_velocity(self, expression, velocity): """ Calculate dX/dt + lambda * X :param expression: :param velocity: :return: """ assert check.indexes_align( (expression.gene_names, velocity.gene_names)) assert check.indexes_align( (expression.sample_names, velocity.sample_names)) if self._decay_constants is not None: Debug.vprint("Using preloaded decay constants in _decay_constants") decay_constants = self._decay_constants elif self.tau is not None: Debug.vprint( "Calculating decay constants for tau {t}".format(t=self.tau)) decay_constants = np.repeat(1 / self.tau, expression.num_genes) elif "decay_constants" in velocity.gene_data.columns and self._use_precalculated_decay_constants: Debug.vprint( "Extracting decay constants from {n}".format(n=velocity.name)) decay_constants = velocity.gene_data["decay_constants"].values elif "decay_constants" in expression.gene_data.columns and self._use_precalculated_decay_constants: Debug.vprint("Extracting decay constants from {n}".format( n=expression.name)) decay_constants = expression.gene_data["decay_constants"].values else: Debug.vprint( "No decay information found. Solving dX/dt = AB for Betas") return velocity x = np.multiply(expression.values, decay_constants[None, :]) return InferelatorData(np.add(velocity.values, x), gene_names=expression.gene_names, sample_names=expression.sample_names, meta_data=expression.meta_data)