def load_velocity(self, velocity_file=None, loader_type=None): velocity_file = self._velocity_file_name if velocity_file is None else velocity_file loader_type = self._velocity_file_type if loader_type is None else loader_type transpose = not self.expression_matrix_columns_are_genes loader = InferelatorDataLoader( input_dir=self.input_dir, file_format_settings=self._file_format_settings) Debug.vprint("Loading velocity data from {f}".format(f=velocity_file), level=1) if loader_type == _TSV or loader_type is None: self._velocity_data = loader.load_data_tsv( velocity_file, transpose_expression_data=transpose) elif loader_type == _H5AD: self._velocity_data = loader.load_data_h5ad( velocity_file, use_layer=self._velocity_h5_layer) elif loader_type == _HDF5: self._velocity_data = loader.load_data_hdf5( velocity_file, transpose_expression_data=transpose, use_layer=self._velocity_h5_layer) else: raise ValueError( "Invalid velocity_file_type: {a}".format(a=loader_type)) self._velocity_data.name = "Velocity"
def read_priors(self, priors_file=None, gold_standard_file=None): """ Read in the priors and gold standard files """ priors_file = priors_file if priors_file is not None else self.priors_file gold_standard_file = gold_standard_file if gold_standard_file is not None else self.gold_standard_file loader = InferelatorDataLoader(input_dir=self.input_dir, file_format_settings=self._file_format_settings) if priors_file is not None: Debug.vprint("Loading prior data from file {file}".format(file=priors_file), level=1) self.priors_data = loader.input_dataframe(priors_file) # Print debug info & check prior for duplicate indices (which will raise errors later) self.loaded_file_info("Priors data", self.priors_data) self._check_network_labels_unique("Priors_data", priors_file, self.priors_data) if gold_standard_file is not None: Debug.vprint("Loading gold_standard data from file {file}".format(file=gold_standard_file), level=1) self.gold_standard = loader.input_dataframe(gold_standard_file) # Print debug info & check gold standard for duplicate indices (which will raise errors later) self.loaded_file_info("Gold standard", self.gold_standard) self._check_network_labels_unique("Gold standard", gold_standard_file, self.gold_standard)
def load_activity(self, file=None, file_type=None): file = self._tfa_input_file if file is None else file file_type = self._tfa_input_file_type if file_type is None else file_type loader = InferelatorDataLoader( input_dir=self.input_dir, file_format_settings=self._file_format_settings) if file_type.lower() == "h5ad": self.design = loader.load_data_h5ad(file) elif self._expression_loader.lower() == "tsv": self.design = loader.load_data_tsv(file) Debug.vprint("Loaded {f} as design matrix {d}".format( d=self.design.shape, f=file), level=1) self.design.trim_genes( remove_constant_genes=False, trim_gene_list=self.design.gene_names.intersection(self.tf_names)) Debug.vprint("Trimmed to {d} for TF activity".format( d=self.design.shape, f=file), level=1) assert check.indexes_align( [self.design.sample_names, self.response.sample_names])
def create_output_dir(self): """ Set a default output_dir if nothing is set. Create the path if it doesn't exist. """ if self.output_dir is None: new_path = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S') self.output_dir = InferelatorDataLoader.make_path_safe(os.path.join(self.input_dir, new_path)) else: self.output_dir = InferelatorDataLoader.make_path_safe(self.output_dir) try: os.makedirs(os.path.expanduser(self.output_dir)) except FileExistsError: pass
def read_tfs(self, file=None): """ Read tf names file into tf_names """ # Load the class variable if no file is passed file = self.tf_names_file if file is None else file if file is not None: Debug.vprint("Loading TF feature names from file {file}".format(file=file), level=1) # Read in a dataframe with no header or index loader = InferelatorDataLoader(input_dir=self.input_dir, file_format_settings=self._file_format_settings) tfs = loader.input_dataframe(file, header=None, index_col=None) # Cast the dataframe into a list assert tfs.shape[1] == 1 self.tf_names = tfs.values.flatten().tolist()
def output_path(self, filename): """ Join filename to output_dir :param filename: Path to some file that needs to be attached to the output path :type filename: str :return: File joined to output_dir instance variable :rtype: str """ return InferelatorDataLoader.filename_path_join(self.output_dir, filename)
def read_genes(self, file=None): """ Read gene names file into gene_names """ # Load the class variable if no file is passed file = self.gene_names_file if file is None else file if file is not None: Debug.vprint("Loading Gene feature names from file {file}".format(file=file), level=1) # Read in a dataframe with no header or index loader = InferelatorDataLoader(input_dir=self.input_dir, file_format_settings=self._file_format_settings) genes = loader.input_dataframe(file, header=None, index_col=None) # Cast the dataframe into a list assert genes.shape[1] == 1 self.gene_names = genes.values.flatten().tolist() # Use the gene names in the data file if no restrictive list is provided if self.gene_names is None and self.data is not None: self.gene_names = self.data.gene_names.copy()
def read_expression(self, expression_matrix_file=None, meta_data_file=None, gene_data_file=None): """ Read expression matrix file into an InferelatorData object """ expression_file = expression_matrix_file if expression_matrix_file is not None else self.expression_matrix_file meta_data_file = meta_data_file if meta_data_file is not None else self.meta_data_file gene_data_file = gene_data_file if gene_data_file is not None else self.gene_metadata_file loader = InferelatorDataLoader(input_dir=self.input_dir, file_format_settings=self._file_format_settings) if self._expression_loader == _H5AD: self.data = loader.load_data_h5ad(expression_file, use_layer=self._h5_layer, meta_data_file=meta_data_file, meta_data_handler=self.metadata_handler, gene_data_file=gene_data_file, gene_name_column=self.gene_list_index) elif self._expression_loader == _TSV: self.data = loader.load_data_tsv(expression_file, transpose_expression_data=not self.expression_matrix_columns_are_genes, expression_matrix_metadata=self.expression_matrix_metadata, meta_data_file=meta_data_file, meta_data_handler=self.metadata_handler, gene_data_file=gene_data_file, gene_name_column=self.gene_list_index) elif self._expression_loader == _MTX: self.data = loader.load_data_mtx(expression_file[0], mtx_feature=expression_file[2], mtx_obs=expression_file[1], meta_data_file=meta_data_file, meta_data_handler=self.metadata_handler, gene_data_file=gene_data_file, gene_name_column=self.gene_list_index) elif self._expression_loader == _TENX: self.data = loader.load_data_tenx(expression_file, meta_data_file=meta_data_file, meta_data_handler=self.metadata_handler, gene_data_file=gene_data_file, gene_name_column=self.gene_list_index) elif self._expression_loader == _HDF5: self.data = loader.load_data_hdf5(expression_file, transpose_expression_data=not self.expression_matrix_columns_are_genes, use_layer=self._h5_layer, meta_data_file=meta_data_file, meta_data_handler=self.metadata_handler, gene_data_file=gene_data_file, gene_name_column=self.gene_list_index) self.data.name = "Expression"
def read_priors(self, priors_file=None, gold_standard_file=None): """ Read in the priors and gold standard files """ priors_file = priors_file if priors_file is not None else self.priors_file gold_standard_file = gold_standard_file if gold_standard_file is not None else self.gold_standard_file loader = InferelatorDataLoader( input_dir=self.input_dir, file_format_settings=self._file_format_settings) if priors_file is not None: Debug.vprint( "Loading prior data from file {file}".format(file=priors_file), level=1) self.priors_data = loader.input_dataframe(priors_file) self.loaded_file_info("Priors data", self.priors_data) if gold_standard_file is not None: Debug.vprint("Loading gold_standard data from file {file}".format( file=gold_standard_file), level=1) self.gold_standard = loader.input_dataframe(gold_standard_file) self.loaded_file_info("Gold standard", self.gold_standard)