def load_data_h5ad(self, h5ad_file, meta_data_file=None, meta_data_handler=DEFAULT_METADATA, gene_data_file=None, gene_name_column=None, use_layer=None): data = anndata.read_h5ad(self.input_path(h5ad_file)) if meta_data_file is None and data.obs.shape[1] > 0: meta_data = None else: meta_data = self.load_metadata_tsv( meta_data_file, data.obs_names, meta_data_handler=meta_data_handler) gene_metadata = self.load_gene_metadata_tsv(gene_data_file, gene_name_column) if use_layer is not None and use_layer not in data.layers: msg = "Layer {lay} is not in {f}".format(lay=use_layer, f=h5ad_file) raise ValueError(msg) # Build an InferelatorData object from a layer elif use_layer is not None: data = InferelatorData(data.layers[use_layer].copy(), gene_names=data.var_names.copy(), sample_names=data.obs_names.copy(), meta_data=pd.concat((data.obs, meta_data), axis=1), gene_data=pd.concat( (data.var, gene_metadata), axis=1)) # Build an InferelatorData object from everything else: data = InferelatorData(data, meta_data=meta_data, gene_data=gene_metadata) # Make sure bytestrings are decoded _safe_dataframe_decoder(data.gene_data) _safe_dataframe_decoder(data.meta_data) self._check_loaded_data(data, filename=h5ad_file) return data
def load_data_mtx(self, mtx_file, mtx_obs=None, mtx_feature=None, meta_data_file=None, meta_data_handler=DEFAULT_METADATA, gene_data_file=None, gene_name_column=None): data = anndata.read_mtx(self.input_path(mtx_file)) row_names = self._load_list_from_file( self.input_path(mtx_obs)) if mtx_obs is not None else None col_names = self._load_list_from_file( self.input_path(mtx_feature)) if mtx_feature is not None else None meta_data = self.load_metadata_tsv(meta_data_file, data.obs_names, meta_data_handler=meta_data_handler) gene_metadata = self.load_gene_metadata_tsv(gene_data_file, gene_name_column) data = InferelatorData(data, meta_data=meta_data, gene_data=gene_metadata, sample_names=row_names, gene_names=col_names) return data
def load_data_hdf5(self, hdf5_file, use_layer=None, meta_data_file=None, meta_data_handler=DEFAULT_METADATA, gene_data_file=None, gene_name_column=None, transpose_expression_data=False): data = pd.HDFStore(self.input_path(hdf5_file), mode='r') data = data[data.keys()[0]] if use_layer is None else data[use_layer] meta_data = self.load_metadata_tsv(meta_data_file, data.index, meta_data_handler=meta_data_handler) gene_metadata = self.load_gene_metadata_tsv(gene_data_file, gene_name_column) data = data.transpose() if transpose_expression_data else data data = InferelatorData(data, meta_data=meta_data, gene_data=gene_metadata) # Make sure bytestrings are decoded _safe_dataframe_decoder(data.gene_data) _safe_dataframe_decoder(data.meta_data) return data
def load_data_tsv(self, expression_matrix_file, transpose_expression_data=False, meta_data_file=None, meta_data_handler=DEFAULT_METADATA, expression_matrix_metadata=None, gene_data_file=None, gene_name_column=None): Debug.vprint("Loading expression data file {file}".format( file=expression_matrix_file), level=0) # Load expression data data = self.input_dataframe(expression_matrix_file) if expression_matrix_metadata is not None: meta_cols = data.columns.intersection(expression_matrix_metadata) slice_meta_data = data.loc[:, meta_cols].copy() data = data.drop(meta_cols, axis=1) else: slice_meta_data = None if meta_data_file is None and slice_meta_data is not None: meta_data = None else: sample_labels = data.columns if transpose_expression_data else data.index meta_data = self.load_metadata_tsv( meta_data_file, sample_labels, meta_data_handler=meta_data_handler) meta_data = pd.concat((meta_data, slice_meta_data), axis=1) gene_metadata = self.load_gene_metadata_tsv(gene_data_file, gene_name_column) # Pack all data structures into an InferelatorData object data = InferelatorData(data, transpose_expression=transpose_expression_data, meta_data=meta_data, gene_data=gene_metadata) self._check_loaded_data(data, filename=expression_matrix_file) return data
from inferelator.utils.data import InferelatorData from numpy.random import default_rng fake_obsnames = list(map(str, range(1000))) fake_metadata = pd.DataFrame( { "CONST": ["A"] * 1000, "VAR": ["A"] * 100 + ["B"] * 200 + ["C"] * 1 + ["D"] * 99 + ["E"] * 500 + ["F"] * 100 }, index=fake_obsnames) fake_data_object = InferelatorData( default_rng(12345).random(size=1000).reshape((1000, 1)), meta_data=fake_metadata, sample_names=fake_obsnames) TEMP_DIR = tempfile.gettempdir() TEMP_DIR_1 = os.path.join(TEMP_DIR, "test1") class FakeResult(object): score = 1 name = "NAME" all_names = ["NAME"] all_scores = {"NAME": 1}