예제 #1
0
    def load_data_h5ad(self,
                       h5ad_file,
                       meta_data_file=None,
                       meta_data_handler=DEFAULT_METADATA,
                       gene_data_file=None,
                       gene_name_column=None,
                       use_layer=None):

        data = anndata.read_h5ad(self.input_path(h5ad_file))

        if meta_data_file is None and data.obs.shape[1] > 0:
            meta_data = None
        else:
            meta_data = self.load_metadata_tsv(
                meta_data_file,
                data.obs_names,
                meta_data_handler=meta_data_handler)

        gene_metadata = self.load_gene_metadata_tsv(gene_data_file,
                                                    gene_name_column)

        if use_layer is not None and use_layer not in data.layers:
            msg = "Layer {lay} is not in {f}".format(lay=use_layer,
                                                     f=h5ad_file)
            raise ValueError(msg)

        # Build an InferelatorData object from a layer
        elif use_layer is not None:
            data = InferelatorData(data.layers[use_layer].copy(),
                                   gene_names=data.var_names.copy(),
                                   sample_names=data.obs_names.copy(),
                                   meta_data=pd.concat((data.obs, meta_data),
                                                       axis=1),
                                   gene_data=pd.concat(
                                       (data.var, gene_metadata), axis=1))

        # Build an InferelatorData object from everything
        else:
            data = InferelatorData(data,
                                   meta_data=meta_data,
                                   gene_data=gene_metadata)

        # Make sure bytestrings are decoded
        _safe_dataframe_decoder(data.gene_data)
        _safe_dataframe_decoder(data.meta_data)

        self._check_loaded_data(data, filename=h5ad_file)
        return data
예제 #2
0
    def load_data_mtx(self,
                      mtx_file,
                      mtx_obs=None,
                      mtx_feature=None,
                      meta_data_file=None,
                      meta_data_handler=DEFAULT_METADATA,
                      gene_data_file=None,
                      gene_name_column=None):

        data = anndata.read_mtx(self.input_path(mtx_file))
        row_names = self._load_list_from_file(
            self.input_path(mtx_obs)) if mtx_obs is not None else None
        col_names = self._load_list_from_file(
            self.input_path(mtx_feature)) if mtx_feature is not None else None

        meta_data = self.load_metadata_tsv(meta_data_file,
                                           data.obs_names,
                                           meta_data_handler=meta_data_handler)
        gene_metadata = self.load_gene_metadata_tsv(gene_data_file,
                                                    gene_name_column)

        data = InferelatorData(data,
                               meta_data=meta_data,
                               gene_data=gene_metadata,
                               sample_names=row_names,
                               gene_names=col_names)

        return data
예제 #3
0
    def load_data_hdf5(self,
                       hdf5_file,
                       use_layer=None,
                       meta_data_file=None,
                       meta_data_handler=DEFAULT_METADATA,
                       gene_data_file=None,
                       gene_name_column=None,
                       transpose_expression_data=False):

        data = pd.HDFStore(self.input_path(hdf5_file), mode='r')
        data = data[data.keys()[0]] if use_layer is None else data[use_layer]

        meta_data = self.load_metadata_tsv(meta_data_file,
                                           data.index,
                                           meta_data_handler=meta_data_handler)
        gene_metadata = self.load_gene_metadata_tsv(gene_data_file,
                                                    gene_name_column)

        data = data.transpose() if transpose_expression_data else data
        data = InferelatorData(data,
                               meta_data=meta_data,
                               gene_data=gene_metadata)

        # Make sure bytestrings are decoded
        _safe_dataframe_decoder(data.gene_data)
        _safe_dataframe_decoder(data.meta_data)

        return data
예제 #4
0
    def load_data_tsv(self,
                      expression_matrix_file,
                      transpose_expression_data=False,
                      meta_data_file=None,
                      meta_data_handler=DEFAULT_METADATA,
                      expression_matrix_metadata=None,
                      gene_data_file=None,
                      gene_name_column=None):

        Debug.vprint("Loading expression data file {file}".format(
            file=expression_matrix_file),
                     level=0)

        # Load expression data
        data = self.input_dataframe(expression_matrix_file)
        if expression_matrix_metadata is not None:
            meta_cols = data.columns.intersection(expression_matrix_metadata)
            slice_meta_data = data.loc[:, meta_cols].copy()
            data = data.drop(meta_cols, axis=1)
        else:
            slice_meta_data = None

        if meta_data_file is None and slice_meta_data is not None:
            meta_data = None
        else:
            sample_labels = data.columns if transpose_expression_data else data.index
            meta_data = self.load_metadata_tsv(
                meta_data_file,
                sample_labels,
                meta_data_handler=meta_data_handler)

        meta_data = pd.concat((meta_data, slice_meta_data), axis=1)

        gene_metadata = self.load_gene_metadata_tsv(gene_data_file,
                                                    gene_name_column)

        # Pack all data structures into an InferelatorData object
        data = InferelatorData(data,
                               transpose_expression=transpose_expression_data,
                               meta_data=meta_data,
                               gene_data=gene_metadata)

        self._check_loaded_data(data, filename=expression_matrix_file)
        return data
from inferelator.utils.data import InferelatorData

from numpy.random import default_rng

fake_obsnames = list(map(str, range(1000)))

fake_metadata = pd.DataFrame(
    {
        "CONST": ["A"] * 1000,
        "VAR": ["A"] * 100 + ["B"] * 200 + ["C"] * 1 + ["D"] * 99 +
        ["E"] * 500 + ["F"] * 100
    },
    index=fake_obsnames)

fake_data_object = InferelatorData(
    default_rng(12345).random(size=1000).reshape((1000, 1)),
    meta_data=fake_metadata,
    sample_names=fake_obsnames)

TEMP_DIR = tempfile.gettempdir()
TEMP_DIR_1 = os.path.join(TEMP_DIR, "test1")


class FakeResult(object):

    score = 1
    name = "NAME"

    all_names = ["NAME"]
    all_scores = {"NAME": 1}