Example #1
0
    def _test_interaction(self, ngenes: int, test: str, constrained: bool,
                          spline_basis: str):
        n_timepoints = 5
        sim = Simulator(num_observations=n_timepoints * 200,
                        num_features=ngenes)
        sim.generate_sample_description(num_batches=0, num_conditions=0)
        sim.generate_params()
        sim.generate_data()

        random_sample_description = pd.DataFrame({
            "continuous":
            np.asarray(np.random.randint(0, n_timepoints, size=sim.nobs),
                       dtype=float)
        })
        random_sample_description["condition"] = [
            str(np.random.randint(0, 2))
            for x in random_sample_description["continuous"]
        ]
        random_sample_description["batch"] = [
            x + str(np.random.randint(0, 3))
            for x in random_sample_description["condition"]
        ]
        random_sample_description["size_factors"] = np.random.uniform(
            0.9, 1.1, sim.nobs)  # TODO put into simulation.
        det = self._fit_continuous_interaction(
            sim=sim,
            sample_description=random_sample_description,
            test=test,
            constrained=constrained,
            spline_basis=spline_basis,
        )
        return det
Example #2
0
    def _prepare_data(self, n_cells: int, n_genes: int, noise_model: str):
        """

        :param n_cells: Number of cells to simulate (number of observations per test).
        :param n_genes: Number of genes to simulate (number of tests).
        :param noise_model: Noise model to use for data fitting.
        """
        if noise_model == "nb":
            from batchglm.api.models.numpy.glm_nb import Simulator
            rand_fn_loc = lambda shape: np.random.uniform(5, 10, shape)
            rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
        elif noise_model == "norm":
            from batchglm.api.models.numpy.glm_norm import Simulator
            rand_fn_loc = lambda shape: np.random.uniform(500, 1000, shape)
            rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
        else:
            raise ValueError("noise model %s not recognized" % noise_model)

        num_non_de = n_genes // 2
        sim = Simulator(num_observations=n_cells, num_features=n_genes)
        sim.generate_sample_description(num_batches=0, num_conditions=2)
        sim.generate_params(rand_fn_loc=rand_fn_loc,
                            rand_fn_scale=rand_fn_scale)
        sim.a_var[1, :num_non_de] = 0
        sim.b_var[1, :num_non_de] = 0
        self.isDE = np.arange(n_genes) >= num_non_de
        sim.generate_data()
        return sim
Example #3
0
    def test(self):
        """
        Check that factors that are numeric receive the correct number of coefficients.

        :return:
        """
        logging.getLogger("tensorflow").setLevel(logging.ERROR)
        logging.getLogger("batchglm").setLevel(logging.WARNING)
        logging.getLogger("diffxpy").setLevel(logging.WARNING)

        sim = Simulator(num_observations=2000, num_features=2)
        sim.generate_sample_description(num_batches=0, num_conditions=2)
        sim.generate_params()
        sim.generate_data()

        sample_description = sim.sample_description
        sample_description["numeric1"] = np.random.random(size=sim.nobs)
        sample_description["numeric2"] = np.random.random(size=sim.nobs)

        test = de.test.wald(
            data=sim.input_data,
            sample_description=sample_description,
            formula_loc="~ 1 + condition + numeric1 + numeric2",
            formula_scale="~ 1",
            factor_loc_totest="condition",
            as_numeric=["numeric1", "numeric2"],
            training_strategy="DEFAULT")
        # Check that number of coefficients is correct.
        assert test.model_estim.a_var.shape[0] == 4

        return True
Example #4
0
    def _test_null_distribution_wald_repeated(
            self,
            n_cells: int,
            n_genes: int,
            noise_model: str
    ):
        """
        Test if de.wald() generates a uniform p-value distribution
        if it is given data simulated based on the null model. Returns the p-value
        of the two-side Kolmgorov-Smirnov test for equality of the observed
        p-value distribution and a uniform distribution.

        :param n_cells: Number of cells to simulate (number of observations per test).
        :param n_genes: Number of genes to simulate (number of tests).
        :param noise_model: Noise model to use for data fitting.
        """
        if noise_model == "nb":
            from batchglm.api.models.numpy.glm_nb import Simulator
            rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
        elif noise_model == "norm":
            from batchglm.api.models.numpy.glm_norm import Simulator
            rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
        else:
            raise ValueError("noise model %s not recognized" % noise_model)

        sim = Simulator(num_observations=n_cells, num_features=n_genes)
        sim.generate_sample_description(num_batches=0, num_conditions=0)
        sim.generate_params(rand_fn_scale=rand_fn_scale)
        sim.generate_data()

        random_sample_description = pd.DataFrame({
            "condition": np.random.randint(2, size=sim.nobs),
            "batch": np.random.randint(2, size=sim.nobs)
        })

        test1 = de.test.wald(
            data=sim.input_data,
            sample_description=random_sample_description,
            factor_loc_totest="condition",
            formula_loc="~ 1 + condition + batch",
            noise_model=noise_model
        )
        test = de.test.wald_repeated(
            det=test1,
            factor_loc_totest="condition"
        )

        _ = test.summary()

        # Compare p-value distribution under null model against uniform distribution.
        pval_h0 = stats.kstest(test.pval, 'uniform').pvalue

        logging.getLogger("diffxpy").info('KS-test pvalue for null model match of wald_repeated(): %f' % pval_h0)
        assert pval_h0 > 0.05, ("KS-Test failed: pval_h0=%f is <= 0.05!" % np.round(pval_h0, 5))

        return True
Example #5
0
    def _test_wald_de(self, constrained: bool, spline_basis: str, ngenes: int):
        if self.noise_model == "nb":
            from batchglm.api.models.numpy.glm_nb import Simulator
            rand_fn_loc = lambda shape: np.random.uniform(2, 5, shape)
            rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
        elif self.noise_model == "norm":
            from batchglm.api.models.numpy.glm_norm import Simulator
            rand_fn_loc = lambda shape: np.random.uniform(500, 1000, shape)
            rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
        else:
            raise ValueError("noise model %s not recognized" %
                             self.noise_model)

        n_timepoints = 7
        sim = Simulator(num_observations=n_timepoints * 200,
                        num_features=ngenes)
        sim.generate_sample_description(num_batches=0,
                                        num_conditions=n_timepoints)
        sim.generate_params(rand_fn_loc=rand_fn_loc,
                            rand_fn_scale=rand_fn_scale)
        num_non_de = round(ngenes / 2)
        sim.a_var[
            1:, :
            num_non_de] = 0  # Set all condition effects of non DE genes to zero.
        sim.b_var[1:, :] = 0  # Use constant dispersion across all conditions.
        self.isDE = np.arange(ngenes) >= num_non_de
        sim.generate_data()

        random_sample_description = sim.sample_description
        random_sample_description["continuous"] = [
            int(x) for x in random_sample_description["condition"]
        ]
        random_sample_description["batch"] = [
            str(int(x)) + str(np.random.randint(0, 3))
            for x in random_sample_description["continuous"]
        ]

        test = de.test.continuous_1d(
            data=sim.input_data,
            sample_description=random_sample_description,
            gene_names=[
                "gene" + str(i) for i in range(sim.input_data.num_features)
            ],
            formula_loc="~ 1 + continuous + batch"
            if constrained else "~ 1 + continuous",
            formula_scale="~ 1",
            factor_loc_totest="continuous",
            continuous="continuous",
            constraints_loc={"batch": "continuous"} if constrained else None,
            df=5,
            spline_basis=spline_basis,
            test="wald",
            quick_scale=True,
            noise_model=self.noise_model)
        self._eval(sim=sim, test=test)
Example #6
0
    def _prepare_data(self, n_cells: int = 2000, n_genes: int = 100):
        """

        :param n_cells: Number of cells to simulate (number of observations per test).
        :param n_genes: Number of genes to simulate (number of tests).
        """
        sim = Simulator(num_observations=n_cells, num_features=n_genes)
        sim.generate_sample_description(num_batches=0, num_conditions=2)
        sim.generate_params()
        sim.generate_data()

        return sim
Example #7
0
    def _test_model_fit_partition(self, n_cells: int, n_genes: int,
                                  noise_model: str):
        """
        Test if de.wald() generates a uniform p-value distribution
        if it is given data simulated based on the null model. Returns the p-value
        of the two-side Kolmgorov-Smirnov test for equality of the observed
        p-value distribution and a uniform distribution.

        :param n_cells: Number of cells to simulate (number of observations per test).
        :param n_genes: Number of genes to simulate (number of tests).
        :param noise_model: Noise model to use for data fitting.
        """
        if noise_model == "nb":
            from batchglm.api.models.numpy.glm_nb import Simulator
            rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
        elif noise_model == "norm":
            from batchglm.api.models.numpy.glm_norm import Simulator
            rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
        else:
            raise ValueError("noise model %s not recognized" % noise_model)

        sim = Simulator(num_observations=n_cells, num_features=n_genes)
        sim.generate_sample_description(num_batches=0, num_conditions=0)
        sim.generate_params(rand_fn_scale=rand_fn_scale)
        sim.generate_data()

        random_sample_description = pd.DataFrame({
            "condition":
            np.random.randint(2, size=sim.nobs),
            "batch":
            np.random.randint(2, size=sim.nobs)
        })

        partition = de.fit.partition(
            data=sim.input_data,
            sample_description=random_sample_description,
            parts="condition")
        estim = partition.model(formula_loc="~ 1 + batch",
                                noise_model=noise_model)
        return True
Example #8
0
    def _prepate_data(self, n_cells: int, n_genes: int, n_groups: int):
        if self.noise_model == "nb":
            from batchglm.api.models.numpy.glm_nb import Simulator
            rand_fn_loc = lambda shape: np.random.uniform(0.1, 1, shape)
            rand_fn_scale = lambda shape: np.random.uniform(0.5, 1, shape)
        elif self.noise_model == "norm" or self.noise_model is None:
            from batchglm.api.models.numpy.glm_norm import Simulator
            rand_fn_loc = lambda shape: np.random.uniform(500, 1000, shape)
            rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
        else:
            raise ValueError("noise model %s not recognized" %
                             self.noise_model)

        sim = Simulator(num_observations=n_cells, num_features=n_genes)
        sim.generate_sample_description(num_batches=0, num_conditions=0)
        sim.generate_params(rand_fn_loc=rand_fn_loc,
                            rand_fn_scale=rand_fn_scale)
        sim.generate_data()

        random_sample_description = pd.DataFrame({
            "condition":
            [str(x) for x in np.random.randint(n_groups, size=sim.nobs)]
        })
        return sim, random_sample_description