def load_velocity(self, velocity_file=None, loader_type=None):

        velocity_file = self._velocity_file_name if velocity_file is None else velocity_file
        loader_type = self._velocity_file_type if loader_type is None else loader_type
        transpose = not self.expression_matrix_columns_are_genes

        loader = InferelatorDataLoader(
            input_dir=self.input_dir,
            file_format_settings=self._file_format_settings)
        Debug.vprint("Loading velocity data from {f}".format(f=velocity_file),
                     level=1)

        if loader_type == _TSV or loader_type is None:
            self._velocity_data = loader.load_data_tsv(
                velocity_file, transpose_expression_data=transpose)

        elif loader_type == _H5AD:
            self._velocity_data = loader.load_data_h5ad(
                velocity_file, use_layer=self._velocity_h5_layer)

        elif loader_type == _HDF5:
            self._velocity_data = loader.load_data_hdf5(
                velocity_file,
                transpose_expression_data=transpose,
                use_layer=self._velocity_h5_layer)
        else:
            raise ValueError(
                "Invalid velocity_file_type: {a}".format(a=loader_type))

        self._velocity_data.name = "Velocity"
예제 #2
0
    def read_priors(self, priors_file=None, gold_standard_file=None):
        """
        Read in the priors and gold standard files
        """

        priors_file = priors_file if priors_file is not None else self.priors_file
        gold_standard_file = gold_standard_file if gold_standard_file is not None else self.gold_standard_file

        loader = InferelatorDataLoader(input_dir=self.input_dir, file_format_settings=self._file_format_settings)

        if priors_file is not None:

            Debug.vprint("Loading prior data from file {file}".format(file=priors_file), level=1)
            self.priors_data = loader.input_dataframe(priors_file)

            # Print debug info & check prior for duplicate indices (which will raise errors later)
            self.loaded_file_info("Priors data", self.priors_data)
            self._check_network_labels_unique("Priors_data", priors_file, self.priors_data)

        if gold_standard_file is not None:

            Debug.vprint("Loading gold_standard data from file {file}".format(file=gold_standard_file), level=1)
            self.gold_standard = loader.input_dataframe(gold_standard_file)

            # Print debug info & check gold standard for duplicate indices (which will raise errors later)
            self.loaded_file_info("Gold standard", self.gold_standard)
            self._check_network_labels_unique("Gold standard", gold_standard_file, self.gold_standard)
예제 #3
0
    def load_activity(self, file=None, file_type=None):

        file = self._tfa_input_file if file is None else file
        file_type = self._tfa_input_file_type if file_type is None else file_type

        loader = InferelatorDataLoader(
            input_dir=self.input_dir,
            file_format_settings=self._file_format_settings)

        if file_type.lower() == "h5ad":
            self.design = loader.load_data_h5ad(file)
        elif self._expression_loader.lower() == "tsv":
            self.design = loader.load_data_tsv(file)

        Debug.vprint("Loaded {f} as design matrix {d}".format(
            d=self.design.shape, f=file),
                     level=1)

        self.design.trim_genes(
            remove_constant_genes=False,
            trim_gene_list=self.design.gene_names.intersection(self.tf_names))

        Debug.vprint("Trimmed to {d} for TF activity".format(
            d=self.design.shape, f=file),
                     level=1)

        assert check.indexes_align(
            [self.design.sample_names, self.response.sample_names])
예제 #4
0
    def create_output_dir(self):
        """
        Set a default output_dir if nothing is set. Create the path if it doesn't exist.
        """
        if self.output_dir is None:
            new_path = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
            self.output_dir = InferelatorDataLoader.make_path_safe(os.path.join(self.input_dir, new_path))
        else:
            self.output_dir = InferelatorDataLoader.make_path_safe(self.output_dir)

        try:
            os.makedirs(os.path.expanduser(self.output_dir))
        except FileExistsError:
            pass
예제 #5
0
    def read_tfs(self, file=None):
        """
        Read tf names file into tf_names
        """

        # Load the class variable if no file is passed
        file = self.tf_names_file if file is None else file

        if file is not None:
            Debug.vprint("Loading TF feature names from file {file}".format(file=file), level=1)
            # Read in a dataframe with no header or index
            loader = InferelatorDataLoader(input_dir=self.input_dir, file_format_settings=self._file_format_settings)
            tfs = loader.input_dataframe(file, header=None, index_col=None)

            # Cast the dataframe into a list
            assert tfs.shape[1] == 1
            self.tf_names = tfs.values.flatten().tolist()
예제 #6
0
    def output_path(self, filename):
        """
        Join filename to output_dir

        :param filename: Path to some file that needs to be attached to the output path
        :type filename: str
        :return: File joined to output_dir instance variable
        :rtype: str
        """
        return InferelatorDataLoader.filename_path_join(self.output_dir, filename)
예제 #7
0
    def read_genes(self, file=None):
        """
        Read gene names file into gene_names
        """

        # Load the class variable if no file is passed
        file = self.gene_names_file if file is None else file

        if file is not None:
            Debug.vprint("Loading Gene feature names from file {file}".format(file=file), level=1)
            # Read in a dataframe with no header or index
            loader = InferelatorDataLoader(input_dir=self.input_dir, file_format_settings=self._file_format_settings)
            genes = loader.input_dataframe(file, header=None, index_col=None)

            # Cast the dataframe into a list
            assert genes.shape[1] == 1
            self.gene_names = genes.values.flatten().tolist()

        # Use the gene names in the data file if no restrictive list is provided
        if self.gene_names is None and self.data is not None:
            self.gene_names = self.data.gene_names.copy()
예제 #8
0
    def read_expression(self, expression_matrix_file=None, meta_data_file=None, gene_data_file=None):
        """
        Read expression matrix file into an InferelatorData object
        """
        expression_file = expression_matrix_file if expression_matrix_file is not None else self.expression_matrix_file
        meta_data_file = meta_data_file if meta_data_file is not None else self.meta_data_file
        gene_data_file = gene_data_file if gene_data_file is not None else self.gene_metadata_file

        loader = InferelatorDataLoader(input_dir=self.input_dir, file_format_settings=self._file_format_settings)

        if self._expression_loader == _H5AD:
            self.data = loader.load_data_h5ad(expression_file,
                                              use_layer=self._h5_layer,
                                              meta_data_file=meta_data_file,
                                              meta_data_handler=self.metadata_handler,
                                              gene_data_file=gene_data_file,
                                              gene_name_column=self.gene_list_index)

        elif self._expression_loader == _TSV:
            self.data = loader.load_data_tsv(expression_file,
                                             transpose_expression_data=not self.expression_matrix_columns_are_genes,
                                             expression_matrix_metadata=self.expression_matrix_metadata,
                                             meta_data_file=meta_data_file,
                                             meta_data_handler=self.metadata_handler,
                                             gene_data_file=gene_data_file,
                                             gene_name_column=self.gene_list_index)

        elif self._expression_loader == _MTX:
            self.data = loader.load_data_mtx(expression_file[0],
                                             mtx_feature=expression_file[2],
                                             mtx_obs=expression_file[1],
                                             meta_data_file=meta_data_file,
                                             meta_data_handler=self.metadata_handler,
                                             gene_data_file=gene_data_file,
                                             gene_name_column=self.gene_list_index)

        elif self._expression_loader == _TENX:
            self.data = loader.load_data_tenx(expression_file,
                                              meta_data_file=meta_data_file,
                                              meta_data_handler=self.metadata_handler,
                                              gene_data_file=gene_data_file,
                                              gene_name_column=self.gene_list_index)

        elif self._expression_loader == _HDF5:
            self.data = loader.load_data_hdf5(expression_file,
                                              transpose_expression_data=not self.expression_matrix_columns_are_genes,
                                              use_layer=self._h5_layer,
                                              meta_data_file=meta_data_file,
                                              meta_data_handler=self.metadata_handler,
                                              gene_data_file=gene_data_file,
                                              gene_name_column=self.gene_list_index)

        self.data.name = "Expression"
예제 #9
0
    def read_priors(self, priors_file=None, gold_standard_file=None):
        """
        Read in the priors and gold standard files
        """
        priors_file = priors_file if priors_file is not None else self.priors_file
        gold_standard_file = gold_standard_file if gold_standard_file is not None else self.gold_standard_file
        loader = InferelatorDataLoader(
            input_dir=self.input_dir,
            file_format_settings=self._file_format_settings)

        if priors_file is not None:
            Debug.vprint(
                "Loading prior data from file {file}".format(file=priors_file),
                level=1)
            self.priors_data = loader.input_dataframe(priors_file)
            self.loaded_file_info("Priors data", self.priors_data)

        if gold_standard_file is not None:
            Debug.vprint("Loading gold_standard data from file {file}".format(
                file=gold_standard_file),
                         level=1)
            self.gold_standard = loader.input_dataframe(gold_standard_file)
            self.loaded_file_info("Gold standard", self.gold_standard)