def test_unaligned_regression_genes(self):
        tfs = ['tf1', 'tf2', 'tf3']
        targets = ['gene1', 'gene2', 'gene3']
        targets1 = ['gene1', 'gene2']
        targets2 = ['gene1', 'gene3']

        des = [InferelatorData(pd.DataFrame(np.array([[1, 1, 3], [0, 0, 2], [0, 0, 1]]).astype(float), columns=tfs)),
               InferelatorData(pd.DataFrame(np.array([[1, 1, 3], [0, 0, 2], [0, 0, 1]]).astype(float), columns=tfs))]

        res = [InferelatorData(pd.DataFrame(np.array([[1, 1], [2, 2], [3, 3]]).astype(float), columns=targets1)),
               InferelatorData(pd.DataFrame(np.array([[1, 1], [2, 2], [3, 3]]).astype(float), columns=targets2))]
        priors = pd.DataFrame([[0, 1, 1], [1, 0, 1], [1, 0, 1]], index=targets, columns=tfs)

        r = amusr_regression.AMuSR_regression(des, res, tfs=tfs, genes=targets, priors=priors, use_numba=self.use_numba)

        out = [pd.DataFrame([['tf3', 'gene1', -1, 1], ['tf3', 'gene1', -1, 1]],
                            index=pd.MultiIndex(levels=[[0, 1], [0]], codes=[[0, 1], [0, 0]]),
                            columns=['regulator', 'target', 'weights', 'resc_weights']),
               pd.DataFrame([['tf3', 'gene2', -1, 1]],
                            index=pd.MultiIndex(levels=[[0, 1], [0]], codes=[[0], [0]]),
                            columns=['regulator', 'target', 'weights', 'resc_weights']),
               pd.DataFrame([['tf3', 'gene3', -1, 1]],
                            index=pd.MultiIndex(levels=[[0, 1], [0]], codes=[[1], [0]]),
                            columns=['regulator', 'target', 'weights', 'resc_weights'])]

        regress_data = r.regress()
        for i in range(len(targets)):
            pdt.assert_frame_equal(pd.concat(regress_data[i]), out[i], check_dtype=False)

        weights, resc_weights = r.pileup_data(regress_data)
Beispiel #2
0
 def test_two_genes_nonzero_clr_nonzero(self):
     self.set_all_zero_priors()
     self.X = InferelatorData(pd.DataFrame([1, 2],
                                           index=['gene1', 'gene2'],
                                           columns=['ss']),
                              transpose_expression=True)
     self.Y = InferelatorData(pd.DataFrame([1, 2],
                                           index=['gene1', 'gene2'],
                                           columns=['ss']),
                              transpose_expression=True)
     self.clr = pd.DataFrame([[.1, .1], [.1, .2]],
                             index=['gene1', 'gene2'],
                             columns=['gene1', 'gene2'])
     (betas, resc) = self.run_bbsr()
     self.assert_matrix_is_square(2, betas)
     self.assert_matrix_is_square(2, resc)
     pdt.assert_frame_equal(
         betas,
         pd.DataFrame([[0, 0], [0, 0]],
                      index=['gene1', 'gene2'],
                      columns=['gene1', 'gene2']).astype(float))
     pdt.assert_frame_equal(
         resc,
         pd.DataFrame([[0, 0], [0, 0]],
                      index=['gene1', 'gene2'],
                      columns=['gene1', 'gene2']).astype(float))
    def compute_common_data(self):
        """
        Compute common data structures like design and response matrices.
        """

        drd = self.drd_driver(
            metadata_handler=self.metadata_handler,
            return_half_tau=True) if self.drd_driver is not None else None

        # If there is no design-response driver set, use the expression data for design and response
        # Also do this if there is no usable metadata
        if drd is None or not drd.validate_run(self.data.meta_data):
            self.design, self.response, self.half_tau_response = self.data, self.data, self.data

        # Otherwise calculate the design-response ODE
        # TODO: Rewrite DRD for InferelatorData
        # TODO: This is *horrifying* as is from a memory perspective
        # TODO: Really fix this soon
        else:
            Debug.vprint('Creating design and response matrix ... ')
            drd.delTmin, drd.delTmax, drd.tau = self.delTmin, self.delTmax, self.tau

            design, response, half_tau_response = drd.run(
                self.data.to_df().T, self.data.meta_data)
            self.design = InferelatorData(design.T)
            self.response = InferelatorData(response.T)
            self.half_tau_response = InferelatorData(half_tau_response.T)

        Debug.vprint("Constructed design {d} and response {r} matrices".format(
            d=self.design.shape, r=self.response.shape),
                     level=1)

        self.data = None
 def test_create_sparse(self):
     data = sparse.csr_matrix(self.expr.values)
     adata = InferelatorData(data,
                             gene_names=self.expr.columns.astype(str),
                             sample_names=self.expr.index.astype(str))
     InferelatorData._make_idx_str(self.expr)
     pdt.assert_frame_equal(self.expr, adata._adata.to_df())
 def test_12_34_identical(self):
     """Compute mi for identical arrays [[1, 2, 1], [2, 4, 6]]."""
     M = InferelatorData(expression_data=np.array([[1, 2, 1], [3, 4, 6]]),
                         transpose_expression=True)
     self.x_dataframe = M.copy()
     self.y_dataframe = M.copy()
     self.clr_matrix, self.mi_matrix = mi.context_likelihood_mi(
         self.x_dataframe, self.y_dataframe)
     expected = np.array([[0, 1], [1, 0]])
     np.testing.assert_almost_equal(self.clr_matrix.values, expected)
Beispiel #6
0
    def test_dask_function_mi(self):
        """Compute mi for identical arrays [[1, 2, 1], [2, 4, 6]]."""

        L = [[0, 0], [9, 3], [0, 9]]
        x_dataframe = InferelatorData(pd.DataFrame(L))
        y_dataframe = InferelatorData(pd.DataFrame(L))
        mi = dask_functions.build_mi_array_dask(x_dataframe.values,
                                                y_dataframe.values, 10, np.log)
        expected = np.array([[0.63651417, 0.63651417],
                             [0.63651417, 1.09861229]])
        np.testing.assert_almost_equal(mi, expected)
 def setUp(self):
     self.expr = TestDataSingleCellLike.expression_matrix.copy().T
     self.expr_sparse = sparse.csr_matrix(
         TestDataSingleCellLike.expression_matrix.values.T).astype(np.int32)
     self.meta = TestDataSingleCellLike.meta_data.copy()
     self.adata = InferelatorData(self.expr, transpose_expression=False)
     self.adata_sparse = InferelatorData(
         self.expr_sparse,
         gene_names=TestDataSingleCellLike.expression_matrix.index,
         transpose_expression=False,
         meta_data=TestDataSingleCellLike.meta_data.copy())
 def test_12_34_and_zeros(self):
     """Compute mi for identical arrays [[1, 2], [2, 4]]."""
     self.y_dataframe = InferelatorData(expression_data=np.zeros((2, 2)))
     self.clr_matrix, self.mi_matrix = mi.context_likelihood_mi(
         self.x_dataframe, self.y_dataframe)
     # the entire clr matrix is NAN
     self.assertTrue(np.isnan(self.clr_matrix.values).all())
    def test_add_genedata(self):
        gene_data = TestDataSingleCellLike.gene_metadata
        gene_data.index = gene_data.iloc[:, 0]

        adata = InferelatorData(self.expr, gene_data=gene_data)
        pdt.assert_index_equal(adata.gene_names, self.expr.columns)
        pdt.assert_index_equal(adata._adata.uns["trim_gene_list"],
                               CORRECT_GENES_INTERSECT)
    def test_non_finite_sparse(self):
        adata = InferelatorData(sparse.csr_matrix(
            self.expr.values.astype(float)),
                                gene_names=self.expr.columns,
                                sample_names=self.expr.index)

        nnf, name_nf = adata.non_finite
        self.assertEqual(nnf, 0)
        self.assertIsNone(name_nf)

        adata.expression_data[0, 0] = np.nan

        nnf, name_nf = adata.non_finite
        self.assertEqual(nnf, 1)

        adata.expression_data[0, 1] = np.nan

        nnf, name_nf = adata.non_finite
        self.assertEqual(nnf, 2)
Beispiel #11
0
    def setup_one_column(self):
        exp = pd.DataFrame(np.array([[1, 1, 0], [3, 2, 3]]),
                           index=['s1', 's2'],
                           columns=['g1', 'tf1', 'g3'])

        self.exp = InferelatorData(exp)

        self.priors = pd.DataFrame(np.array([[1], [1], [0]]),
                                   columns=['tf1'],
                                   index=self.exp.gene_names)
    def test_non_finite(self):
        adata = InferelatorData(self.expr.values.astype(float),
                                gene_names=self.expr.columns,
                                sample_names=self.expr.index)

        nnf, name_nf = adata.non_finite
        self.assertEqual(nnf, 0)
        self.assertIsNone(name_nf)

        adata.expression_data[0, 0] = np.nan

        nnf, name_nf = adata.non_finite
        self.assertEqual(nnf, 1)
        self.assertListEqual(name_nf.tolist(), ["gene1"])

        adata.expression_data[0, 1] = np.nan

        nnf, name_nf = adata.non_finite
        self.assertEqual(nnf, 2)
        self.assertListEqual(name_nf.tolist(), ["gene1", "gene2"])
    def setUp(self):

        self.workflow = workflow.inferelator_workflow(workflow="amusr", regression="amusr")
        self.workflow.create_output_dir = lambda *x: None

        tfs = ['tf1', 'tf2', 'tf3']
        targets = ['gene1', 'gene2']

        self.workflow._task_design = [
            InferelatorData(pd.DataFrame(np.array([[1, 1, 3], [0, 0, 2], [0, 0, 1]]).astype(float), columns=tfs)),
            InferelatorData(pd.DataFrame(np.array([[1, 1, 3], [0, 0, 2], [0, 0, 1]]).astype(float), columns=tfs))
        ]

        self.workflow._task_response = [
            InferelatorData(pd.DataFrame(np.array([[1, 1], [2, 0], [3, 0]]).astype(float), columns=targets)),
            InferelatorData(pd.DataFrame(np.array([[1, 3], [2, 3], [3, 3]]).astype(float), columns=targets))
        ]

        self.priors_data = pd.DataFrame([[0, 1, 1], [1, 0, 1]], index=targets, columns=tfs)
        self.gold_standard = self.priors_data.copy()
Beispiel #14
0
    def setup_mouse_th17(self):
        exp = pd.DataFrame(
            np.array([[12.2844, 8.16, 10.4782, 5.46, 7.96367],
                      [12.55, 8.5536, 11.0834, 5.4891, 7.86005],
                      [11.8626, 7.765, 10.5227, 4.9039, 7.82641],
                      [11.8623, 7.8903, 10.3418, 4.698, 7.94938],
                      [11.881, 8.0871, 10.3878, 5.0788, 7.67066]]))
        exp.index = ['s1', 's2', 's3', 's4', 's5']
        exp.columns = ['g1', 't2', 'g3', 'g4', 'g5']

        self.exp = InferelatorData(exp)

        self.priors = pd.DataFrame(np.array([[1, 0, 0, 1], [0, 0, 0, 0],
                                             [0, 0, -1, 0], [-1, 0, 0, -1],
                                             [0, 0, 1, 0]]),
                                   columns=['t1', 't2', 't3', 't4'],
                                   index=['g1', 't2', 'g3', 'g4', 'g5'])
    def test_trim_dense(self):
        gene_data = TestDataSingleCellLike.gene_metadata
        gene_data.index = gene_data.iloc[:, 0]

        adata = InferelatorData(self.expr, gene_data=gene_data)
        adata.trim_genes(remove_constant_genes=False)

        pdt.assert_frame_equal(
            self.expr.reindex(CORRECT_GENES_INTERSECT,
                              axis=1).astype(np.int32), adata._adata.to_df())

        adata.trim_genes(remove_constant_genes=True)
        pdt.assert_frame_equal(
            self.expr.reindex(CORRECT_GENES_NZ_VAR, axis=1).astype(np.int32),
            adata._adata.to_df())
    def compute_transcription_factor_activity(self,
                                              prior,
                                              expression_data,
                                              expression_data_halftau=None,
                                              keep_self=False,
                                              tau=None):

        prior, activity_tfs, expr_tfs = self._check_prior(prior,
                                                          expression_data,
                                                          keep_self=keep_self)

        if len(activity_tfs) > 0:
            activity = self._calculate_activity(
                prior.loc[:, activity_tfs].values, expression_data)
        else:
            raise ValueError(
                "TFA cannot be calculated; prior matrix has no edges")

        return InferelatorData(activity,
                               gene_names=activity_tfs,
                               sample_names=expression_data.sample_names,
                               meta_data=expression_data.meta_data)
    def _combine_expression_velocity(self, expression, velocity):
        """
        Calculate dX/dt + lambda * X
        :param expression:
        :param velocity:
        :return:
        """

        assert check.indexes_align(
            (expression.gene_names, velocity.gene_names))
        assert check.indexes_align(
            (expression.sample_names, velocity.sample_names))

        if self._decay_constants is not None:
            Debug.vprint("Using preloaded decay constants in _decay_constants")
            decay_constants = self._decay_constants
        elif self.tau is not None:
            Debug.vprint(
                "Calculating decay constants for tau {t}".format(t=self.tau))
            decay_constants = np.repeat(1 / self.tau, expression.num_genes)
        elif "decay_constants" in velocity.gene_data.columns and self._use_precalculated_decay_constants:
            Debug.vprint(
                "Extracting decay constants from {n}".format(n=velocity.name))
            decay_constants = velocity.gene_data["decay_constants"].values
        elif "decay_constants" in expression.gene_data.columns and self._use_precalculated_decay_constants:
            Debug.vprint("Extracting decay constants from {n}".format(
                n=expression.name))
            decay_constants = expression.gene_data["decay_constants"].values
        else:
            Debug.vprint(
                "No decay information found. Solving dX/dt = AB for Betas")
            return velocity

        x = np.multiply(expression.values, decay_constants[None, :])
        return InferelatorData(np.add(velocity.values, x),
                               gene_names=expression.gene_names,
                               sample_names=expression.sample_names,
                               meta_data=expression.meta_data)
    def test_trim_sparse(self):
        gene_data = TestDataSingleCellLike.gene_metadata
        gene_data.index = gene_data.iloc[:, 0]

        adata_sparse = InferelatorData(
            sparse.csr_matrix(
                TestDataSingleCellLike.expression_matrix.values.T),
            gene_names=TestDataSingleCellLike.expression_matrix.index,
            meta_data=TestDataSingleCellLike.meta_data.copy(),
            gene_data=gene_data)

        adata_sparse.trim_genes(remove_constant_genes=False)
        pdt.assert_frame_equal(
            self.expr.reindex(CORRECT_GENES_INTERSECT, axis=1),
            adata_sparse._adata.to_df())

        adata_sparse.trim_genes(remove_constant_genes=True)
        pdt.assert_frame_equal(self.expr.reindex(CORRECT_GENES_NZ_VAR, axis=1),
                               adata_sparse._adata.to_df())
 def test_create_df(self):
     adata = InferelatorData(self.expr)
     npt.assert_array_equal(adata.expression_data, self.expr.values)
import unittest
import pandas as pd
import numpy as np
import scipy.sparse as sps
from inferelator.regression import mi
from inferelator.utils import InferelatorData

L = InferelatorData(expression_data=np.array([[1, 2], [3, 4]]),
                    transpose_expression=True)
L_sparse = InferelatorData(expression_data=sps.csr_matrix([[1, 2], [3, 4]]),
                           transpose_expression=True)
L2 = InferelatorData(expression_data=np.array([[3, 4], [2, 1]]),
                     transpose_expression=True)


class Test2By2(unittest.TestCase):
    def setUp(self):
        self.x_dataframe = L.copy()
        self.y_dataframe = L.copy()

    def test_12_34_identical(self):
        """Compute mi for identical arrays [[1, 2], [2, 4]]."""
        self.clr_matrix, self.mi_matrix = mi.context_likelihood_mi(
            self.x_dataframe, self.y_dataframe)
        expected = np.array([[0, 1], [1, 0]])
        np.testing.assert_almost_equal(self.clr_matrix.values, expected)

    def test_12_34_minus(self):
        """Compute mi for identical arrays [[1, 2], [2, 4]]."""
        self.y_dataframe.multiply(-1)
        self.clr_matrix, self.mi_matrix = mi.context_likelihood_mi(
Beispiel #21
0
    })
    priors_data = pd.DataFrame([[0, 1], [0, 1], [1, 0], [0, 0]],
                               index=["gene1", "gene2", "gene4", "gene5"],
                               columns=["gene3", "gene6"])
    gene_metadata = pd.DataFrame({
        "SystematicName":
        ["gene1", "gene2", "gene3", "gene4", "gene7", "gene6"]
    })
    gene_list_index = "SystematicName"
    tf_names = ["gene3", "gene6"]


TEST_DATA = InferelatorData(
    TestDataSingleCellLike.expression_matrix,
    transpose_expression=True,
    meta_data=TestDataSingleCellLike.meta_data,
    gene_data=TestDataSingleCellLike.gene_metadata,
    gene_data_idx_column=TestDataSingleCellLike.gene_list_index,
    sample_names=list(map(str, range(10))))

TEST_DATA_SPARSE = InferelatorData(
    sps.csr_matrix(TestDataSingleCellLike.expression_matrix.T.values),
    gene_names=TestDataSingleCellLike.expression_matrix.index,
    sample_names=list(map(str, range(10))),
    meta_data=TestDataSingleCellLike.meta_data,
    gene_data=TestDataSingleCellLike.gene_metadata,
    gene_data_idx_column=TestDataSingleCellLike.gene_list_index)

CORRECT_GENES_INTERSECT = pd.Index(
    ["gene1", "gene2", "gene3", "gene4", "gene6"])
CORRECT_GENES_NZ_VAR = pd.Index(["gene1", "gene2", "gene4", "gene6"])
 def test_create_df_transpose(self):
     adata = InferelatorData(self.expr, transpose_expression=True)
     npt.assert_array_equal(adata.expression_data, self.expr.values.T)
 def test_create_array(self):
     adata = InferelatorData(self.expr.values,
                             gene_names=self.expr.columns.astype(str),
                             sample_names=self.expr.index.astype(str))
     InferelatorData._make_idx_str(self.expr)
     pdt.assert_frame_equal(self.expr, adata._adata.to_df())
 def test_add_metadata(self):
     adata = InferelatorData(self.expr, meta_data=self.meta)
     self.meta.index = self.meta.index.astype(str)
     pdt.assert_frame_equal(self.meta, adata.meta_data)
 def test_create_metadata(self):
     adata = InferelatorData(self.expr, meta_data=self.meta)
     pdt.assert_frame_equal(adata.meta_data, self.meta)