Example #1
0
    def test_creation_and_from_to_x_y(self):
        problem, true_parameters = LinearLMEProblem.generate(
            groups_sizes=[4, 5, 10],
            features_labels=[3, 3, 1, 2],
            random_intercept=True,
            obs_std=0.1,
            seed=42)
        x1, y1 = problem.to_x_y()
        problem2, _ = LinearLMEProblem.from_x_y(x1, y1)
        x2, y2 = problem2.to_x_y()
        self.assertTrue(np.all(x1 == x2) and np.all(y1 == y2))
        test_problem, true_test_parameters = LinearLMEProblem.generate(
            groups_sizes=[3, 4, 5],
            features_labels=[3, 3, 1, 2],
            random_intercept=True,
            beta=true_parameters["beta"],
            gamma=true_parameters["gamma"],
            true_random_effects=true_parameters["random_effects"],
            obs_std=0.1,
            seed=43)

        self.assertTrue(
            np.all(true_parameters["beta"] == true_test_parameters["beta"]) and
            np.all(true_parameters["gamma"] == true_test_parameters["gamma"])
            and np.all([
                np.all(u1 == u2)
                for u1, u2 in zip(true_parameters["random_effects"],
                                  true_test_parameters["random_effects"])
            ]))
Example #2
0
 def test_compare_to_old_oracle(self):
     num_fixed_effects = 4
     num_random_effects = 2
     problem, true_parameters = LinearLMEProblem.generate(
         groups_sizes=[4, 5, 10],
         features_labels=[3, 3, 1],
         random_intercept=False,
         obs_std=0.1,
         seed=42)
     new_oracle = LinearLMEOracle(problem)
     old_oracle = OldOracle(problem)
     np.random.seed(42)
     trials = 100
     # the error should stem only from Cholesky/regular inversions instabilities, so
     # tolerances should pretty much represent machine precision
     rtol = 1e-8
     atol = 1e-10
     for random_beta, random_gamma in zip(
             np.random.rand(trials, num_fixed_effects),
             np.random.rand(trials, num_random_effects)):
         loss1 = new_oracle.loss(random_beta, random_gamma)
         loss2 = old_oracle.loss(random_beta, random_gamma)
         self.assertAlmostEqual(loss1,
                                loss2,
                                delta=atol,
                                msg="Loss does not match with old oracle")
         gradient1 = new_oracle.gradient_gamma(random_beta, random_gamma)
         gradient2 = old_oracle.gradient_gamma(random_beta, random_gamma)
         self.assertTrue(allclose(gradient1,
                                  gradient2,
                                  rtol=rtol,
                                  atol=atol),
                         msg="Gradients don't match with old oracle")
         hessian1 = new_oracle.hessian_gamma(random_beta, random_gamma)
         hessian2 = old_oracle.hessian_gamma(random_beta, random_gamma)
         self.assertTrue(allclose(hessian1,
                                  hessian2,
                                  rtol=100 * rtol,
                                  atol=100 * atol),
                         msg="Hessian does not match with old oracle")
         beta1 = new_oracle.optimal_beta(random_gamma)
         beta2 = old_oracle.optimal_beta(random_gamma)
         self.assertTrue(allclose(beta1, beta2, rtol=rtol, atol=atol),
                         msg="Optimal betas don't match with old oracle")
         us1 = new_oracle.optimal_random_effects(random_beta, random_gamma)
         us2 = old_oracle.optimal_random_effects(random_beta, random_gamma)
         self.assertTrue(
             allclose(us1, us2, rtol=rtol, atol=atol),
             msg="Optimal random effects don't match with old oracle")
     return None
Example #3
0
    def test_creation_from_no_data(self):
        problem, true_parameters = LinearLMEProblem.generate(
            groups_sizes=[4, 5, 10],
            features_labels=[],
            random_intercept=True,
            obs_std=0.1,
            seed=42)

        self.assertEqual(len(true_parameters["beta"]), 1,
                         "Beta should be of len = 1 for no-data problem")
        self.assertEqual(len(true_parameters["gamma"]),
                         1), "Gamma should be of len = 1 for no-data problem"
        self.assertTrue(
            np.all(
                [np.all(x == 1) and np.all(z == 1) for x, y, z, l in problem])
        ), "All fixed and random features should be 1 for no-data problem"
Example #4
0
 def test_beta_to_gamma_map(self):
     problem, true_parameters = LinearLMEProblem.generate(
         groups_sizes=[4, 5, 10],
         features_labels=[3, 3, 1, 2, 3, 1, 2],
         random_intercept=False,
         obs_std=0.1,
         seed=42)
     oracle = LinearLMEOracle(problem)
     true_beta_to_gamma_map = np.array([-1, 0, 1, -1, 3, -1])
     for e1, e2 in zip(true_beta_to_gamma_map, oracle.beta_to_gamma_map):
         self.assertEqual(
             e1,
             e2,
             msg=
             "Beta-to-gamma mask is not right: \n %s is not \n %s as should be"
             % (true_beta_to_gamma_map, oracle.beta_to_gamma_map))
    def test_score_function(self):
        # this is only a basic test which checks R^2 in two points: nearly perfect prediction and constant prediction.

        problem_parameters = {
            "groups_sizes": [20, 5, 10, 50],
            "features_labels": [3, 3, 3],
            "random_intercept":
            True,
            "features_covariance_matrix":
            np.array([[1, 0, 0], [0, 1, 0.7], [0, 0.7, 1]]),
            "obs_std":
            0.1,
        }

        model_parameters = {
            "nnz_tbeta": 4,
            "nnz_tgamma": 4,
            "lb":
            0,  # We expect the coefficient vectors to be dense so we turn regularization off.
            "lg": 0,  # Same.
            "initializer": 'EM',
            "logger_keys": (
                'converged',
                'loss',
            ),
            "tol": 1e-6,
            "n_iter": 1000,
            "tol_inner": 1e-4,
            "n_iter_inner": 1000,
        }

        problem, true_model_parameters = LinearLMEProblem.generate(
            **problem_parameters, seed=42)
        x, y = problem.to_x_y()
        model = LinearLMESparseModel(**model_parameters)
        model.fit(x, y)
        model.coef_["beta"] = true_model_parameters["beta"]
        model.coef_["random_effects"] = true_model_parameters["random_effects"]
        good_score = model.score(x, y)
        assert good_score > 0.99
        model.coef_["beta"] = np.zeros(4)
        model.coef_["random_effects"] = np.zeros((4, 4))
        bad_score = model.score(x, y)
        assert abs(bad_score) < 0.1
Example #6
0
    def test_gamma_derivatives(self):
        trials = 5
        rtol = 1e-3
        atol = 1e-2
        dx = rtol / 1000
        for random_seed in np.random.randint(0, 1000, size=trials):
            np.random.seed(random_seed)
            problem, true_parameters = LinearLMEProblem.generate(
                features_labels=[3, 3],
                random_intercept=False,
                seed=random_seed)
            beta = true_parameters['beta']
            oracle = LinearLMEOracle(problem)
            points = np.random.rand(30, 2)
            beta = np.random.rand(len(beta))

            oracle_gradient = np.array(
                [oracle.gradient_gamma(beta, g) for g in points])
            partial_derivative_1 = np.array([
                derivative(lambda x: oracle.loss(beta, np.array([x, g[1]])),
                           g[0],
                           dx=dx) for g in points
            ])
            partial_derivative_2 = np.array([
                derivative(lambda x: oracle.loss(beta, np.array([g[0], x])),
                           g[1],
                           dx=dx) for g in points
            ])
            for i, (a, c, d, e) in enumerate(
                    zip(points, oracle_gradient, partial_derivative_1,
                        partial_derivative_2)):
                self.assertTrue(
                    allclose(c[0], d, rtol=rtol, atol=atol),
                    msg=
                    "Gamma gradient does not match with numerical partial derivative: %d"
                    % i)
                self.assertTrue(
                    allclose(c[1], e, rtol=rtol, atol=atol),
                    msg=
                    "Gamma gradient does not match with numerical partial derivative: %d"
                    % i)
        return None
Example #7
0
    def test_hessian_gamma(self):
        trials = 100
        random_seed = 34
        r = 1e-6
        rtol = 1e-5
        atol = 1e-7
        problem, true_parameters = LinearLMEProblem.generate(seed=random_seed)
        oracle = LinearLMEOracle(problem)
        np.random.seed(random_seed)
        for j in range(trials):
            beta = np.random.rand(problem.num_fixed_effects)
            gamma = np.random.rand(problem.num_random_effects)
            dg = np.random.rand(problem.num_random_effects)
            hess = oracle.hessian_gamma(beta, gamma)
            maybe_dir = hess.dot(dg)
            true_dir = (oracle.gradient_gamma(beta, gamma + r * dg) -
                        oracle.gradient_gamma(beta, gamma - r * dg)) / (2 * r)

            self.assertTrue(allclose(maybe_dir, true_dir, rtol=rtol,
                                     atol=atol),
                            msg="Hessian does not look right")
Example #8
0
    def test_no_data_problem(self):
        random_seed = 43
        problem, true_parameters = LinearLMEProblem.generate(
            groups_sizes=[10, 10, 10],
            features_labels=[],
            random_intercept=True,
            seed=random_seed)
        beta = true_parameters['beta']
        us = true_parameters['random_effects']
        empirical_gamma = np.sum(us**2, axis=0) / problem.num_groups
        rtol = 1e-1
        atol = 1e-1
        oracle = LinearLMEOracle(problem)

        maybe_beta = oracle.optimal_beta(empirical_gamma)
        maybe_us = oracle.optimal_random_effects(maybe_beta, empirical_gamma)
        self.assertTrue(allclose(maybe_beta + maybe_us,
                                 beta + us,
                                 rtol=rtol,
                                 atol=atol),
                        msg="No-data-problem is not right")
        return None
Example #9
0
 def test_non_regularized_oracle_is_zero_regularized_oracle(self):
     num_fixed_effects = 4
     num_random_effects = 3
     problem, true_parameters = LinearLMEProblem.generate(
         groups_sizes=[4, 5, 10],
         features_labels=[3, 3, 1, 2],
         random_intercept=False,
         obs_std=0.1,
         seed=42)
     # when both regularization coefficients are zero, these two oracles should be exactly equivalent
     oracle_non_regularized = LinearLMEOracle(problem)
     oracle_regularized = LinearLMEOracleRegularized(problem,
                                                     lg=0,
                                                     lb=0,
                                                     nnz_tbeta=1,
                                                     nnz_tgamma=1)
     np.random.seed(42)
     trials = 100
     rtol = 1e-14
     atol = 1e-14
     for random_beta, random_gamma, random_tbeta, random_tgamma in zip(
             np.random.rand(trials, num_fixed_effects),
             np.random.rand(trials, num_random_effects),
             np.random.rand(trials, num_fixed_effects),
             np.random.rand(trials, num_random_effects),
     ):
         loss1 = oracle_regularized.loss(random_beta, random_gamma,
                                         random_tbeta, random_tgamma)
         loss2 = oracle_non_regularized.loss(random_beta, random_gamma)
         self.assertAlmostEqual(
             loss1,
             loss2,
             delta=atol,
             msg=
             "Loss of zero-regularized and non-regularized oracles is different"
         )
         gradient1 = oracle_regularized.gradient_gamma(
             random_beta, random_gamma, random_tgamma)
         gradient2 = oracle_non_regularized.gradient_gamma(
             random_beta, random_gamma)
         self.assertTrue(
             allclose(gradient1, gradient2, rtol=rtol, atol=atol),
             msg=
             "Gradients w.r.t. gamma of zero-regularized and non-regularized oracles are different"
         )
         hessian1 = oracle_regularized.hessian_gamma(
             random_beta, random_gamma)
         hessian2 = oracle_non_regularized.hessian_gamma(
             random_beta, random_gamma)
         self.assertTrue(
             allclose(hessian1, hessian2, rtol=100 * rtol, atol=100 * atol),
             msg=
             "Hessian w.r.t. gamma of zero-regularized and non-regularized oracles are different"
         )
         beta1 = oracle_regularized.optimal_beta(random_gamma, random_tbeta)
         beta2 = oracle_non_regularized.optimal_beta(random_gamma)
         self.assertTrue(
             allclose(beta1, beta2, rtol=rtol, atol=atol),
             msg=
             "Optimal betas of zero-regularized and non-regularized oracles are different"
         )
         us1 = oracle_regularized.optimal_random_effects(
             random_beta, random_gamma)
         us2 = oracle_non_regularized.optimal_random_effects(
             random_beta, random_gamma)
         self.assertTrue(
             allclose(us1, us2, rtol=rtol, atol=atol),
             msg=
             "Optimal random effects of zero-regularized and non-regularized oracles is different"
         )
     return None
    def test_solving_sparse_problem(self):
        trials = 10
        problem_parameters = {
            "groups_sizes": [20, 12, 14, 50, 11],
            "features_labels": [3, 3, 3, 3, 3, 3, 3, 3, 3, 3],
            "random_intercept": True,
            "obs_std": 0.1,
        }

        model_parameters = {
            "lb": 0.01,
            "lg": 0.01,
            "initializer": None,
            "logger_keys": (
                'converged',
                'loss',
            ),
            "tol": 1e-6,
            "n_iter": 1000,
            "tol_inner": 1e-4,
            "n_iter_inner": 1000,
        }

        max_mse = 0.05
        min_explained_variance = 0.9
        fixed_effects_min_accuracy = 0.7
        random_effects_min_accuracy = 0.7

        fea = []
        rea = []

        for i in range(trials):
            np.random.seed(i)
            true_beta = np.random.choice(2, size=11, p=np.array([0.5, 0.5]))
            if sum(true_beta) == 0:
                true_beta[0] = 1
            true_gamma = np.random.choice(2, size=11, p=np.array(
                [0.3, 0.7])) * true_beta

            problem, true_model_parameters = LinearLMEProblem.generate(
                **problem_parameters, beta=true_beta, gamma=true_gamma, seed=i)
            model = LinearLMESparseModel(**model_parameters,
                                         nnz_tbeta=sum(true_beta),
                                         nnz_tgamma=sum(true_gamma),
                                         regularization_type="loss-weighted")
            model2 = LinearLMESparseModel(**model_parameters,
                                          nnz_tbeta=sum(true_beta),
                                          nnz_tgamma=sum(true_gamma),
                                          regularization_type="l2")

            x, y = problem.to_x_y()
            model.fit(x, y)
            model2.fit(x, y)

            logger = model.logger_
            loss = np.array(logger.get("loss"))
            self.assertTrue(
                np.all(loss[1:] - loss[:-1] <= 0),
                msg=
                "%d) Loss does not decrease monotonically with iterations. (seed=%d)"
                % (i, i))

            y_pred = model.predict(x)
            explained_variance = explained_variance_score(y, y_pred)
            mse = mean_squared_error(y, y_pred)

            y_pred2 = model2.predict(x)
            explained_variance2 = explained_variance_score(y, y_pred2)
            mse2 = mean_squared_error(y, y_pred2)

            coefficients = model.coef_
            maybe_tbeta = coefficients["tbeta"]
            maybe_tgamma = coefficients["tgamma"]
            fixed_effects_accuracy = accuracy_score(true_beta,
                                                    maybe_tbeta != 0)
            random_effects_accuracy = accuracy_score(true_gamma,
                                                     maybe_tgamma != 0)

            coefficients2 = model2.coef_
            maybe_tbeta2 = coefficients2["tbeta"]
            maybe_tgamma2 = coefficients2["tgamma"]
            fixed_effects_accuracy2 = accuracy_score(true_beta,
                                                     maybe_tbeta2 != 0)
            random_effects_accuracy2 = accuracy_score(true_gamma,
                                                      maybe_tgamma2 != 0)
            print("\n %d) MSE    EV FEA REA")
            print("%.4f  %.4f %.4f %.4f" %
                  (mse, explained_variance, fixed_effects_accuracy,
                   random_effects_accuracy))
            print("%.4f  %.4f %.4f %.4f" %
                  (mse2, explained_variance2, fixed_effects_accuracy2,
                   random_effects_accuracy2))

            # maybe_per_group_coefficients = coefficients["per_group_coefficients"]

            self.assertGreater(
                explained_variance,
                min_explained_variance,
                msg=
                "%d) Explained variance is too small: %.3f < %.3f. (seed=%d)" %
                (i, explained_variance, min_explained_variance, i))
            self.assertGreater(
                max_mse,
                mse,
                msg="%d) MSE is too big: %.3f > %.2f  (seed=%d)" %
                (i, mse, max_mse, i))
            self.assertGreater(
                fixed_effects_accuracy,
                fixed_effects_min_accuracy,
                msg=
                "%d) Fixed Effects Selection Accuracy is too small: %.3f < %.2f  (seed=%d)"
                % (i, fixed_effects_accuracy, fixed_effects_min_accuracy, i))
            self.assertGreater(
                random_effects_accuracy,
                random_effects_min_accuracy,
                msg=
                "%d) Random Effects Selection Accuracy is too small: %.3f < %.2f  (seed=%d)"
                % (i, random_effects_accuracy, random_effects_min_accuracy, i))
            fea.append(fixed_effects_accuracy)
            rea.append(random_effects_accuracy)

        return None
Example #11
0
    def test_drop_matrices(self):

        problem_parameters = {
            "groups_sizes": [20, 5, 10, 50],
            "features_labels": [1, 2, 3, 3],
            "random_intercept": True,
            "obs_std": 0.1,
            "seed": 42
        }

        problem, _ = LinearLMEProblem.generate(**problem_parameters)
        simple_oracle = LinearLMEOracle(problem)
        oracle = LinearLMEOracleW(problem,
                                  lb=0,
                                  lg=0,
                                  nnz_tbeta=problem.num_fixed_effects,
                                  nnz_tgamma=problem.num_random_effects)
        trials = 100

        rtol = 1e-10
        atol = 1e-10
        np.random.seed(42)

        for t, (random_beta, random_gamma) in enumerate(
                zip(np.random.rand(trials, problem.num_fixed_effects),
                    np.random.rand(trials, problem.num_random_effects))):
            loss = simple_oracle.loss(random_beta, random_gamma)
            oracle._recalculate_drop_matrices(random_beta, random_gamma)
            w_beta = oracle.drop_penalties_beta
            w_gamma = oracle.drop_penalties_gamma
            for j in range(problem.num_fixed_effects):
                sparse_beta = random_beta.copy()
                sparse_beta[j] = 0
                sparse_gamma = random_gamma.copy()
                idx = oracle.beta_to_gamma_map[j].astype(int)
                if idx >= 0:
                    sparse_gamma[idx] = 0
                    loss3 = simple_oracle.loss(random_beta, sparse_gamma)
                    self.assertTrue(np.isclose(loss3 - loss,
                                               w_gamma[idx],
                                               rtol=rtol,
                                               atol=atol),
                                    msg="%d: W_gamma is not right" % j)
                    loss2 = simple_oracle.loss(sparse_beta, sparse_gamma)
                else:
                    loss2 = simple_oracle.loss(sparse_beta, random_gamma)
                self.assertTrue(np.isclose(loss2 - loss,
                                           w_beta[j],
                                           rtol=rtol,
                                           atol=atol),
                                msg="%d) W_beta is not right" % j)

        sparse_beta = np.zeros(problem.num_fixed_effects)
        sparse_gamma = np.zeros(problem.num_random_effects)
        sparse_beta[0:2] = 1
        sparse_gamma[0] = 1
        oracle._recalculate_drop_matrices(sparse_beta, sparse_gamma)
        w_beta = oracle.drop_penalties_beta
        w_gamma = oracle.drop_penalties_gamma
        self.assertTrue((w_gamma[1:] == 0).all(),
                        msg="Drop of zero gamma is not zero")
        self.assertTrue((w_beta[2:] == 0).all(),
                        msg="Drop of zero beta is not zero")
    def test_get_set_params(self):
        problem_parameters = {
            "groups_sizes": [20, 5, 10, 50],
            "features_labels": [3, 3, 3],
            "random_intercept":
            True,
            "features_covariance_matrix":
            np.array([[1, 0, 0], [0, 1, 0.7], [0, 0.7, 1]]),
            "obs_std":
            0.1,
        }
        model_parameters = {
            "nnz_tbeta": 4,
            "nnz_tgamma": 4,
            "lb":
            0,  # We expect the coefficient vectors to be dense so we turn regularization off.
            "lg": 0,  # Same.
            "initializer": 'EM',
            "logger_keys": (
                'converged',
                'loss',
            ),
            "tol": 1e-6,
            "n_iter": 1000,
            "tol_inner": 1e-4,
            "n_iter_inner": 1000,
        }
        # Now we want to solve a regularized problem to get two different models
        model2_parameters = {
            "nnz_tbeta": 3,
            "nnz_tgamma": 2,
            "lb": 20,
            "lg": 20,
            "initializer": None,
            "logger_keys": ('converged', ),
            "tol": 1e-6,
            "n_iter": 1000,
            "tol_inner": 1e-4,
            "n_iter_inner": 1000,
        }
        problem, true_model_parameters = LinearLMEProblem.generate(
            **problem_parameters, seed=42)
        x, y = problem.to_x_y()

        model = LinearLMESparseModel(**model_parameters)
        model.fit(x, y)
        params = model.get_params()
        y_pred = model.predict(x)

        model2 = LinearLMESparseModel(**model2_parameters)
        model2.fit(x, y)
        params2 = model2.get_params()
        y_pred2 = model2.predict(x)

        model.set_params(**params2)
        model.fit(x, y)
        y_pred_with_other_params = model.predict(x)
        assert np.equal(y_pred_with_other_params, y_pred2).all(),\
            "set_params or get_params is not working properly"
        model2.set_params(**params)
        model2.fit(x, y)
        y_pred2_with_other_params = model2.predict(x)
        assert np.equal(y_pred2_with_other_params, y_pred).all(), \
            "set_params or get_params is not working properly"
    def test_solving_dense_problem(self):
        trials = 20
        problem_parameters = {
            "groups_sizes": [20, 5, 10, 50],
            "features_labels": [3, 3, 3],
            "random_intercept":
            True,
            "features_covariance_matrix":
            np.array([[1, 0, 0], [0, 1, 0.7], [0, 0.7, 1]]),
            "obs_std":
            0.1,
        }
        model_parameters = {
            "nnz_tbeta": 2,
            "nnz_tgamma": 2,
            "lb":
            0,  # We expect the coefficient vectors to be dense so we turn regularization off.
            "lg": 0,  # Same.
            "initializer": 'EM',
            "logger_keys": (
                'converged',
                'loss',
            ),
            "tol": 1e-6,
            "n_iter": 1000,
            "tol_inner": 1e-4,
            "n_iter_inner": 1000,
        }

        max_mse = 0.05
        min_explained_variance = 0.9

        for i in range(trials):
            problem, true_model_parameters = LinearLMEProblem.generate(
                **problem_parameters, seed=i)
            model = LinearLMESparseModel(**model_parameters)

            x, y = problem.to_x_y()
            model.fit(x, y)

            logger = model.logger_
            loss = np.array(logger.get("loss"))
            self.assertTrue(
                np.all(loss[1:] - loss[:-1] <= 0),
                msg=
                "%d) Loss does not decrease monotonically with iterations. (seed=%d)"
                % (i, i))

            y_pred = model.predict(x)
            explained_variance = explained_variance_score(y, y_pred)
            mse = mean_squared_error(y, y_pred)

            # coefficients = model.coef_
            # maybe_per_group_coefficients = coefficients["per_group_coefficients"]

            self.assertGreater(
                explained_variance,
                min_explained_variance,
                msg=
                "%d) Explained variance is too small: %.3f < %.3f. (seed=%d)" %
                (i, explained_variance, min_explained_variance, i))
            self.assertGreater(
                max_mse,
                mse,
                msg="%d) MSE is too big: %.3f > %.2f  (seed=%d)" %
                (i, mse, max_mse, i))

            # coefficients = model.coef_
            # maybe_per_group_coefficients = coefficients["per_group_coefficients"]
            # maybe_beta = coefficients["beta"]
            # maybe_us = coefficients["random_effects"]
            # maybe_gamma = coefficients["gamma"]
            # maybe_tbeta = coefficients["tbeta"]
            # maybe_tgamma = coefficients["tgamma"]
            # maybe_cluster_coefficients = coefficients["per_cluster_coefficients"]
            # maybe_sparse_cluster_coefficients = coefficients["sparse_per_cluster_coefficients"]
        # cluster_coefficients = beta + us
        # maybe_cluster_coefficients = maybe_beta + maybe_us
        return None