Exemple #1
0
    def test_creation_and_from_to_x_y(self):
        problem, true_parameters = LinearLMEProblem.generate(
            groups_sizes=[4, 5, 10],
            features_labels=[3, 3, 1, 2],
            random_intercept=True,
            obs_std=0.1,
            seed=42)
        x1, y1 = problem.to_x_y()
        problem2, _ = LinearLMEProblem.from_x_y(x1, y1)
        x2, y2 = problem2.to_x_y()
        self.assertTrue(np.all(x1 == x2) and np.all(y1 == y2))
        test_problem, true_test_parameters = LinearLMEProblem.generate(
            groups_sizes=[3, 4, 5],
            features_labels=[3, 3, 1, 2],
            random_intercept=True,
            beta=true_parameters["beta"],
            gamma=true_parameters["gamma"],
            true_random_effects=true_parameters["random_effects"],
            obs_std=0.1,
            seed=43)

        self.assertTrue(
            np.all(true_parameters["beta"] == true_test_parameters["beta"]) and
            np.all(true_parameters["gamma"] == true_test_parameters["gamma"])
            and np.all([
                np.all(u1 == u2)
                for u1, u2 in zip(true_parameters["random_effects"],
                                  true_test_parameters["random_effects"])
            ]))
Exemple #2
0
    def predict(self, x, use_sparse_coefficients=False):
        """
        Makes a prediction if .fit(X, y) was called before and throws an error otherwise.

        Parameters
        ----------
        x : np.ndarray
            Data matrix. Should have the same format as the data which was used for fitting the model:
            the number of columns and the columns' labels should be the same. It may contain new groups, in which case
            the prediction will be formed using the fixed effects only.

        use_sparse_coefficients : bool, default is False
            If true then uses sparse coefficients, tbeta and tgamma, for making a prediction, otherwise uses
            beta and gamma.

        Returns
        -------
        y : np.ndarray
            Models predictions.
        """
        check_is_fitted(self, 'coef_')
        problem, _ = LinearLMEProblem.from_x_y(x, y=None)

        if use_sparse_coefficients:
            beta = self.coef_['tbeta']
            us = self.coef_['sparse_random_effects']
        else:
            beta = self.coef_['beta']
            us = self.coef_['random_effects']

        assert problem.num_fixed_effects == beta.shape[0], \
            "Number of fixed effects is not the same to what it was in the train data."

        assert problem.num_random_effects == us[0].shape[0], \
            "Number of random effects is not the same to what it was in the train data."

        group_labels = self.coef_['group_labels']
        answers = []
        for i, (x, _, z, stds) in enumerate(problem):
            label = problem.group_labels[i]
            idx_of_this_label_in_train = np.where(group_labels == label)
            assert len(
                idx_of_this_label_in_train
            ) <= 1, "Group labels of the classifier contain duplicates."
            if len(idx_of_this_label_in_train) == 1:
                idx_of_this_label_in_train = idx_of_this_label_in_train[0]
                y = x.dot(beta) + z.dot(us[idx_of_this_label_in_train][0])
            else:
                # If we have not seen this group (so we don't have inferred random effects for this)
                # then we make a prediction with "expected" (e.g. zero) random effects
                y = x.dot(beta)
            answers.append(y)
        return np.concatenate(answers)
 def test_compare_to_old_oracle(self):
     num_fixed_effects = 4
     num_random_effects = 2
     problem, true_parameters = LinearLMEProblem.generate(
         groups_sizes=[4, 5, 10],
         features_labels=[3, 3, 1],
         random_intercept=False,
         obs_std=0.1,
         seed=42)
     new_oracle = LinearLMEOracle(problem)
     old_oracle = OldOracle(problem)
     np.random.seed(42)
     trials = 100
     # the error should stem only from Cholesky/regular inversions instabilities, so
     # tolerances should pretty much represent machine precision
     rtol = 1e-8
     atol = 1e-10
     for random_beta, random_gamma in zip(
             np.random.rand(trials, num_fixed_effects),
             np.random.rand(trials, num_random_effects)):
         loss1 = new_oracle.loss(random_beta, random_gamma)
         loss2 = old_oracle.loss(random_beta, random_gamma)
         self.assertAlmostEqual(loss1,
                                loss2,
                                delta=atol,
                                msg="Loss does not match with old oracle")
         gradient1 = new_oracle.gradient_gamma(random_beta, random_gamma)
         gradient2 = old_oracle.gradient_gamma(random_beta, random_gamma)
         self.assertTrue(allclose(gradient1,
                                  gradient2,
                                  rtol=rtol,
                                  atol=atol),
                         msg="Gradients don't match with old oracle")
         hessian1 = new_oracle.hessian_gamma(random_beta, random_gamma)
         hessian2 = old_oracle.hessian_gamma(random_beta, random_gamma)
         self.assertTrue(allclose(hessian1,
                                  hessian2,
                                  rtol=100 * rtol,
                                  atol=100 * atol),
                         msg="Hessian does not match with old oracle")
         beta1 = new_oracle.optimal_beta(random_gamma)
         beta2 = old_oracle.optimal_beta(random_gamma)
         self.assertTrue(allclose(beta1, beta2, rtol=rtol, atol=atol),
                         msg="Optimal betas don't match with old oracle")
         us1 = new_oracle.optimal_random_effects(random_beta, random_gamma)
         us2 = old_oracle.optimal_random_effects(random_beta, random_gamma)
         self.assertTrue(
             allclose(us1, us2, rtol=rtol, atol=atol),
             msg="Optimal random effects don't match with old oracle")
     return None
Exemple #4
0
    def test_creation_from_no_data(self):
        problem, true_parameters = LinearLMEProblem.generate(
            groups_sizes=[4, 5, 10],
            features_labels=[],
            random_intercept=True,
            obs_std=0.1,
            seed=42)

        self.assertEqual(len(true_parameters["beta"]), 1,
                         "Beta should be of len = 1 for no-data problem")
        self.assertEqual(len(true_parameters["gamma"]),
                         1), "Gamma should be of len = 1 for no-data problem"
        self.assertTrue(
            np.all(
                [np.all(x == 1) and np.all(z == 1) for x, y, z, l in problem])
        ), "All fixed and random features should be 1 for no-data problem"
 def test_beta_to_gamma_map(self):
     problem, true_parameters = LinearLMEProblem.generate(
         groups_sizes=[4, 5, 10],
         features_labels=[3, 3, 1, 2, 3, 1, 2],
         random_intercept=False,
         obs_std=0.1,
         seed=42)
     oracle = LinearLMEOracle(problem)
     true_beta_to_gamma_map = np.array([-1, 0, 1, -1, 3, -1])
     for e1, e2 in zip(true_beta_to_gamma_map, oracle.beta_to_gamma_map):
         self.assertEqual(
             e1,
             e2,
             msg=
             "Beta-to-gamma mask is not right: \n %s is not \n %s as should be"
             % (true_beta_to_gamma_map, oracle.beta_to_gamma_map))
    def test_score_function(self):
        # this is only a basic test which checks R^2 in two points: nearly perfect prediction and constant prediction.

        problem_parameters = {
            "groups_sizes": [20, 5, 10, 50],
            "features_labels": [3, 3, 3],
            "random_intercept":
            True,
            "features_covariance_matrix":
            np.array([[1, 0, 0], [0, 1, 0.7], [0, 0.7, 1]]),
            "obs_std":
            0.1,
        }

        model_parameters = {
            "nnz_tbeta": 4,
            "nnz_tgamma": 4,
            "lb":
            0,  # We expect the coefficient vectors to be dense so we turn regularization off.
            "lg": 0,  # Same.
            "initializer": 'EM',
            "logger_keys": (
                'converged',
                'loss',
            ),
            "tol": 1e-6,
            "n_iter": 1000,
            "tol_inner": 1e-4,
            "n_iter_inner": 1000,
        }

        problem, true_model_parameters = LinearLMEProblem.generate(
            **problem_parameters, seed=42)
        x, y = problem.to_x_y()
        model = LinearLMESparseModel(**model_parameters)
        model.fit(x, y)
        model.coef_["beta"] = true_model_parameters["beta"]
        model.coef_["random_effects"] = true_model_parameters["random_effects"]
        good_score = model.score(x, y)
        assert good_score > 0.99
        model.coef_["beta"] = np.zeros(4)
        model.coef_["random_effects"] = np.zeros((4, 4))
        bad_score = model.score(x, y)
        assert abs(bad_score) < 0.1
Exemple #7
0
 def test_from_to_xy_preserves_dataset_structure(self):
     study_sizes = [20, 15, 10]
     num_features = 6
     num_random_effects = 4
     np.random.seed(42)
     x = np.random.rand(
         sum(study_sizes) + 1,
         1 + (num_features - 1) + 1 + (num_random_effects - 1) + 1)
     y = np.random.rand(sum(study_sizes))
     x[1:, 0] = np.repeat([0, 1, 2], study_sizes)
     x[0, :] = [0] + [1] * (num_features - 1) + [
         3
     ] + [2] * (num_random_effects - 1) + [4]
     problem, true_parameters = LinearLMEProblem.from_x_y(x, y)
     x2, y2 = problem.to_x_y()
     self.assertTrue(np.all(x2 == x),
                     msg="x is not the same after from/to transformation")
     self.assertTrue(np.all(y2 == y),
                     msg="y is not the same after from/to transformation")
    def test_gamma_derivatives(self):
        trials = 5
        rtol = 1e-3
        atol = 1e-2
        dx = rtol / 1000
        for random_seed in np.random.randint(0, 1000, size=trials):
            np.random.seed(random_seed)
            problem, true_parameters = LinearLMEProblem.generate(
                features_labels=[3, 3],
                random_intercept=False,
                seed=random_seed)
            beta = true_parameters['beta']
            oracle = LinearLMEOracle(problem)
            points = np.random.rand(30, 2)
            beta = np.random.rand(len(beta))

            oracle_gradient = np.array(
                [oracle.gradient_gamma(beta, g) for g in points])
            partial_derivative_1 = np.array([
                derivative(lambda x: oracle.loss(beta, np.array([x, g[1]])),
                           g[0],
                           dx=dx) for g in points
            ])
            partial_derivative_2 = np.array([
                derivative(lambda x: oracle.loss(beta, np.array([g[0], x])),
                           g[1],
                           dx=dx) for g in points
            ])
            for i, (a, c, d, e) in enumerate(
                    zip(points, oracle_gradient, partial_derivative_1,
                        partial_derivative_2)):
                self.assertTrue(
                    allclose(c[0], d, rtol=rtol, atol=atol),
                    msg=
                    "Gamma gradient does not match with numerical partial derivative: %d"
                    % i)
                self.assertTrue(
                    allclose(c[1], e, rtol=rtol, atol=atol),
                    msg=
                    "Gamma gradient does not match with numerical partial derivative: %d"
                    % i)
        return None
    def test_hessian_gamma(self):
        trials = 100
        random_seed = 34
        r = 1e-6
        rtol = 1e-5
        atol = 1e-7
        problem, true_parameters = LinearLMEProblem.generate(seed=random_seed)
        oracle = LinearLMEOracle(problem)
        np.random.seed(random_seed)
        for j in range(trials):
            beta = np.random.rand(problem.num_fixed_effects)
            gamma = np.random.rand(problem.num_random_effects)
            dg = np.random.rand(problem.num_random_effects)
            hess = oracle.hessian_gamma(beta, gamma)
            maybe_dir = hess.dot(dg)
            true_dir = (oracle.gradient_gamma(beta, gamma + r * dg) -
                        oracle.gradient_gamma(beta, gamma - r * dg)) / (2 * r)

            self.assertTrue(allclose(maybe_dir, true_dir, rtol=rtol,
                                     atol=atol),
                            msg="Hessian does not look right")
    def test_no_data_problem(self):
        random_seed = 43
        problem, true_parameters = LinearLMEProblem.generate(
            groups_sizes=[10, 10, 10],
            features_labels=[],
            random_intercept=True,
            seed=random_seed)
        beta = true_parameters['beta']
        us = true_parameters['random_effects']
        empirical_gamma = np.sum(us**2, axis=0) / problem.num_groups
        rtol = 1e-1
        atol = 1e-1
        oracle = LinearLMEOracle(problem)

        maybe_beta = oracle.optimal_beta(empirical_gamma)
        maybe_us = oracle.optimal_random_effects(maybe_beta, empirical_gamma)
        self.assertTrue(allclose(maybe_beta + maybe_us,
                                 beta + us,
                                 rtol=rtol,
                                 atol=atol),
                        msg="No-data-problem is not right")
        return None
 def test_non_regularized_oracle_is_zero_regularized_oracle(self):
     num_fixed_effects = 4
     num_random_effects = 3
     problem, true_parameters = LinearLMEProblem.generate(
         groups_sizes=[4, 5, 10],
         features_labels=[3, 3, 1, 2],
         random_intercept=False,
         obs_std=0.1,
         seed=42)
     # when both regularization coefficients are zero, these two oracles should be exactly equivalent
     oracle_non_regularized = LinearLMEOracle(problem)
     oracle_regularized = LinearLMEOracleRegularized(problem,
                                                     lg=0,
                                                     lb=0,
                                                     nnz_tbeta=1,
                                                     nnz_tgamma=1)
     np.random.seed(42)
     trials = 100
     rtol = 1e-14
     atol = 1e-14
     for random_beta, random_gamma, random_tbeta, random_tgamma in zip(
             np.random.rand(trials, num_fixed_effects),
             np.random.rand(trials, num_random_effects),
             np.random.rand(trials, num_fixed_effects),
             np.random.rand(trials, num_random_effects),
     ):
         loss1 = oracle_regularized.loss(random_beta, random_gamma,
                                         random_tbeta, random_tgamma)
         loss2 = oracle_non_regularized.loss(random_beta, random_gamma)
         self.assertAlmostEqual(
             loss1,
             loss2,
             delta=atol,
             msg=
             "Loss of zero-regularized and non-regularized oracles is different"
         )
         gradient1 = oracle_regularized.gradient_gamma(
             random_beta, random_gamma, random_tgamma)
         gradient2 = oracle_non_regularized.gradient_gamma(
             random_beta, random_gamma)
         self.assertTrue(
             allclose(gradient1, gradient2, rtol=rtol, atol=atol),
             msg=
             "Gradients w.r.t. gamma of zero-regularized and non-regularized oracles are different"
         )
         hessian1 = oracle_regularized.hessian_gamma(
             random_beta, random_gamma)
         hessian2 = oracle_non_regularized.hessian_gamma(
             random_beta, random_gamma)
         self.assertTrue(
             allclose(hessian1, hessian2, rtol=100 * rtol, atol=100 * atol),
             msg=
             "Hessian w.r.t. gamma of zero-regularized and non-regularized oracles are different"
         )
         beta1 = oracle_regularized.optimal_beta(random_gamma, random_tbeta)
         beta2 = oracle_non_regularized.optimal_beta(random_gamma)
         self.assertTrue(
             allclose(beta1, beta2, rtol=rtol, atol=atol),
             msg=
             "Optimal betas of zero-regularized and non-regularized oracles are different"
         )
         us1 = oracle_regularized.optimal_random_effects(
             random_beta, random_gamma)
         us2 = oracle_non_regularized.optimal_random_effects(
             random_beta, random_gamma)
         self.assertTrue(
             allclose(us1, us2, rtol=rtol, atol=atol),
             msg=
             "Optimal random effects of zero-regularized and non-regularized oracles is different"
         )
     return None
    def test_solving_sparse_problem(self):
        trials = 10
        problem_parameters = {
            "groups_sizes": [20, 12, 14, 50, 11],
            "features_labels": [3, 3, 3, 3, 3, 3, 3, 3, 3, 3],
            "random_intercept": True,
            "obs_std": 0.1,
        }

        model_parameters = {
            "lb": 0.01,
            "lg": 0.01,
            "initializer": None,
            "logger_keys": (
                'converged',
                'loss',
            ),
            "tol": 1e-6,
            "n_iter": 1000,
            "tol_inner": 1e-4,
            "n_iter_inner": 1000,
        }

        max_mse = 0.05
        min_explained_variance = 0.9
        fixed_effects_min_accuracy = 0.7
        random_effects_min_accuracy = 0.7

        fea = []
        rea = []

        for i in range(trials):
            np.random.seed(i)
            true_beta = np.random.choice(2, size=11, p=np.array([0.5, 0.5]))
            if sum(true_beta) == 0:
                true_beta[0] = 1
            true_gamma = np.random.choice(2, size=11, p=np.array(
                [0.3, 0.7])) * true_beta

            problem, true_model_parameters = LinearLMEProblem.generate(
                **problem_parameters, beta=true_beta, gamma=true_gamma, seed=i)
            model = LinearLMESparseModel(**model_parameters,
                                         nnz_tbeta=sum(true_beta),
                                         nnz_tgamma=sum(true_gamma),
                                         regularization_type="loss-weighted")
            model2 = LinearLMESparseModel(**model_parameters,
                                          nnz_tbeta=sum(true_beta),
                                          nnz_tgamma=sum(true_gamma),
                                          regularization_type="l2")

            x, y = problem.to_x_y()
            model.fit(x, y)
            model2.fit(x, y)

            logger = model.logger_
            loss = np.array(logger.get("loss"))
            self.assertTrue(
                np.all(loss[1:] - loss[:-1] <= 0),
                msg=
                "%d) Loss does not decrease monotonically with iterations. (seed=%d)"
                % (i, i))

            y_pred = model.predict(x)
            explained_variance = explained_variance_score(y, y_pred)
            mse = mean_squared_error(y, y_pred)

            y_pred2 = model2.predict(x)
            explained_variance2 = explained_variance_score(y, y_pred2)
            mse2 = mean_squared_error(y, y_pred2)

            coefficients = model.coef_
            maybe_tbeta = coefficients["tbeta"]
            maybe_tgamma = coefficients["tgamma"]
            fixed_effects_accuracy = accuracy_score(true_beta,
                                                    maybe_tbeta != 0)
            random_effects_accuracy = accuracy_score(true_gamma,
                                                     maybe_tgamma != 0)

            coefficients2 = model2.coef_
            maybe_tbeta2 = coefficients2["tbeta"]
            maybe_tgamma2 = coefficients2["tgamma"]
            fixed_effects_accuracy2 = accuracy_score(true_beta,
                                                     maybe_tbeta2 != 0)
            random_effects_accuracy2 = accuracy_score(true_gamma,
                                                      maybe_tgamma2 != 0)
            print("\n %d) MSE    EV FEA REA")
            print("%.4f  %.4f %.4f %.4f" %
                  (mse, explained_variance, fixed_effects_accuracy,
                   random_effects_accuracy))
            print("%.4f  %.4f %.4f %.4f" %
                  (mse2, explained_variance2, fixed_effects_accuracy2,
                   random_effects_accuracy2))

            # maybe_per_group_coefficients = coefficients["per_group_coefficients"]

            self.assertGreater(
                explained_variance,
                min_explained_variance,
                msg=
                "%d) Explained variance is too small: %.3f < %.3f. (seed=%d)" %
                (i, explained_variance, min_explained_variance, i))
            self.assertGreater(
                max_mse,
                mse,
                msg="%d) MSE is too big: %.3f > %.2f  (seed=%d)" %
                (i, mse, max_mse, i))
            self.assertGreater(
                fixed_effects_accuracy,
                fixed_effects_min_accuracy,
                msg=
                "%d) Fixed Effects Selection Accuracy is too small: %.3f < %.2f  (seed=%d)"
                % (i, fixed_effects_accuracy, fixed_effects_min_accuracy, i))
            self.assertGreater(
                random_effects_accuracy,
                random_effects_min_accuracy,
                msg=
                "%d) Random Effects Selection Accuracy is too small: %.3f < %.2f  (seed=%d)"
                % (i, random_effects_accuracy, random_effects_min_accuracy, i))
            fea.append(fixed_effects_accuracy)
            rea.append(random_effects_accuracy)

        return None
    def test_drop_matrices(self):

        problem_parameters = {
            "groups_sizes": [20, 5, 10, 50],
            "features_labels": [1, 2, 3, 3],
            "random_intercept": True,
            "obs_std": 0.1,
            "seed": 42
        }

        problem, _ = LinearLMEProblem.generate(**problem_parameters)
        simple_oracle = LinearLMEOracle(problem)
        oracle = LinearLMEOracleW(problem,
                                  lb=0,
                                  lg=0,
                                  nnz_tbeta=problem.num_fixed_effects,
                                  nnz_tgamma=problem.num_random_effects)
        trials = 100

        rtol = 1e-10
        atol = 1e-10
        np.random.seed(42)

        for t, (random_beta, random_gamma) in enumerate(
                zip(np.random.rand(trials, problem.num_fixed_effects),
                    np.random.rand(trials, problem.num_random_effects))):
            loss = simple_oracle.loss(random_beta, random_gamma)
            oracle._recalculate_drop_matrices(random_beta, random_gamma)
            w_beta = oracle.drop_penalties_beta
            w_gamma = oracle.drop_penalties_gamma
            for j in range(problem.num_fixed_effects):
                sparse_beta = random_beta.copy()
                sparse_beta[j] = 0
                sparse_gamma = random_gamma.copy()
                idx = oracle.beta_to_gamma_map[j].astype(int)
                if idx >= 0:
                    sparse_gamma[idx] = 0
                    loss3 = simple_oracle.loss(random_beta, sparse_gamma)
                    self.assertTrue(np.isclose(loss3 - loss,
                                               w_gamma[idx],
                                               rtol=rtol,
                                               atol=atol),
                                    msg="%d: W_gamma is not right" % j)
                    loss2 = simple_oracle.loss(sparse_beta, sparse_gamma)
                else:
                    loss2 = simple_oracle.loss(sparse_beta, random_gamma)
                self.assertTrue(np.isclose(loss2 - loss,
                                           w_beta[j],
                                           rtol=rtol,
                                           atol=atol),
                                msg="%d) W_beta is not right" % j)

        sparse_beta = np.zeros(problem.num_fixed_effects)
        sparse_gamma = np.zeros(problem.num_random_effects)
        sparse_beta[0:2] = 1
        sparse_gamma[0] = 1
        oracle._recalculate_drop_matrices(sparse_beta, sparse_gamma)
        w_beta = oracle.drop_penalties_beta
        w_gamma = oracle.drop_penalties_gamma
        self.assertTrue((w_gamma[1:] == 0).all(),
                        msg="Drop of zero gamma is not zero")
        self.assertTrue((w_beta[2:] == 0).all(),
                        msg="Drop of zero beta is not zero")
Exemple #14
0
    def fit(self,
            x: np.ndarray,
            y: np.ndarray,
            columns_labels: np.ndarray = None,
            initial_parameters: dict = None,
            warm_start=False,
            random_intercept=True,
            **kwargs):
        """
        Fits a Linear Model with Linear Mixed-Effects to the given data.

        Parameters
        ----------
        x : np.ndarray
            Data. If columns_labels = None then it's assumed that columns_labels are in the first row of x.

        y : np.ndarray
            Answers, real-valued array.

        columns_labels : np.ndarray
            List of column labels. There shall be only one column of group labels and answers STDs,
            and overall n columns with fixed effects (1 or 3) and k columns of random effects (2 or 3).

                - 1 : fixed effect
                - 2 : random effect
                - 3 : both fixed and random,
                - 0 : groups labels
                - 4 : answers standard deviations

        initial_parameters : np.ndarray
            Dict with possible fields:

                -   | 'beta0' : np.ndarray, shape = [n],
                    | Initial estimate of fixed effects. If None then it defaults to an all-ones vector.
                -   | 'gamma0' : np.ndarray, shape = [k],
                    | Initial estimate of random effects covariances. If None then it defaults to an all-ones vector.
                -   | 'tbeta0' : np.ndarray, shape = [n],
                    | Initial estimate of sparse fixed effects. If None then it defaults to an all-zeros vector.
                -   | 'tgamma0' : np.ndarray, shape = [k],
                    | Initial estimate of sparse random covariances. If None then it defaults to an all-zeros vector.

        warm_start : bool, default is False
            Whether to use previous parameters as initial ones. Overrides initial_parameters if given.
            Throws NotFittedError if set to True when not fitted.

        random_intercept : bool, default = True
            Whether treat the intercept as a random effect.
        kwargs :
            Not used currently, left here for passing debugging parameters.

        Returns
        -------
        self : LinearLMESparseModel
            Fitted regression model.
        """

        problem, _ = LinearLMEProblem.from_x_y(
            x, y, columns_labels, random_intercept=random_intercept, **kwargs)
        if initial_parameters is None:
            initial_parameters = {}
        beta0 = initial_parameters.get("beta", None)
        gamma0 = initial_parameters.get("gamma", None)
        tbeta0 = initial_parameters.get("tbeta", None)
        tgamma0 = initial_parameters.get("tgamma", None)
        _check_input_consistency(problem, beta0, gamma0, tbeta0, tgamma0)

        if self.regularization_type == "l2":
            oracle = LinearLMEOracleRegularized(problem,
                                                lb=self.lb,
                                                lg=self.lg,
                                                nnz_tbeta=self.nnz_tbeta,
                                                nnz_tgamma=self.nnz_tgamma)
        elif self.regularization_type == "loss-weighted":
            oracle = LinearLMEOracleW(problem,
                                      lb=self.lb,
                                      lg=self.lg,
                                      nnz_tbeta=self.nnz_tbeta,
                                      nnz_tgamma=self.nnz_tgamma)
        else:
            raise ValueError("regularization_type is not understood.")

        num_fixed_effects = problem.num_fixed_effects
        num_random_effects = problem.num_random_effects
        assert num_fixed_effects >= self.nnz_tbeta
        assert num_random_effects >= self.nnz_tgamma
        # old_oracle = OldOracle(problem, lb=self.lb, lg=self.lg, k=self.nnz_tbeta, j=self.nnz_tgamma)

        if warm_start:
            check_is_fitted(self, 'coef_')
            beta = self.coef_["beta"]
            gamma = self.coef_["gamma"]
            tbeta = self.coef_["tbeta"]
            tgamma = self.coef_["tgamma"]

        else:
            if beta0 is not None:
                beta = beta0
            else:
                beta = np.ones(num_fixed_effects)

            if gamma0 is not None:
                gamma = gamma0
            else:
                gamma = np.ones(num_random_effects)

            if tbeta0 is not None:
                tbeta = tbeta0
            else:
                tbeta = np.zeros(num_fixed_effects)

            if tgamma0 is not None:
                tgamma = tgamma0
            else:
                tgamma = np.zeros(num_random_effects)

        if self.initializer == "EM":
            beta = oracle.optimal_beta(gamma, tbeta)
            us = oracle.optimal_random_effects(beta, gamma)
            gamma = np.sum(us**2, axis=0) / oracle.problem.num_groups
            # tbeta = oracle.optimal_tbeta(beta)
            # tgamma = oracle.optimal_tgamma(tbeta, gamma)

        def projected_direction(current_gamma, current_direction):
            proj_direction = current_direction.copy()
            for j, _ in enumerate(current_gamma):
                if current_gamma[j] == 0 and current_direction[j] <= 0:
                    proj_direction[j] = 0
            return proj_direction

        loss = oracle.loss(beta, gamma, tbeta, tgamma)
        self.logger_ = Logger(self.logger_keys)

        prev_tbeta = np.infty
        prev_tgamma = np.infty

        iteration = 0
        while (np.linalg.norm(tbeta - prev_tbeta) > self.tol
               and np.linalg.norm(tgamma - prev_tgamma) > self.tol
               and iteration < self.n_iter):

            if iteration >= self.n_iter:
                us = oracle.optimal_random_effects(beta, gamma)
                if len(self.logger_keys) > 0:
                    self.logger_.log(**locals())
                self.coef_ = {
                    "beta": beta,
                    "gamma": gamma,
                    "tbeta": tbeta,
                    "tgamma": tgamma,
                    "random_effects": us
                }
                self.logger_.add("converged", 0)
                return self

            if self.solver == 'pgd':
                inner_iteration = 0
                beta = oracle.optimal_beta(gamma, tbeta, beta=beta)
                gradient_gamma = oracle.gradient_gamma(beta, gamma, tgamma)
                direction = projected_direction(gamma, -gradient_gamma)
                while (np.linalg.norm(direction) > self.tol_inner
                       and inner_iteration < self.n_iter_inner):
                    # gradient_gamma = oracle.gradient_gamma(beta, gamma, tgamma)
                    # projecting the gradient to the set of constraints
                    # direction = projected_direction(gamma, -gradient_gamma)
                    if self.use_line_search:
                        # line search method
                        step_len = 0.1
                        for i, _ in enumerate(gamma):
                            if direction[i] < 0:
                                step_len = min(-gamma[i] / direction[i],
                                               step_len)

                        current_loss = oracle.loss(beta, gamma, tbeta, tgamma)

                        while (
                                oracle.loss(beta, gamma + step_len * direction,
                                            tbeta, tgamma) >=
                            (1 - np.sign(current_loss) * 1e-5) * current_loss):
                            step_len *= 0.5
                            if step_len <= 1e-15:
                                break
                    else:
                        # fixed step size
                        step_len = 1 / iteration
                    if step_len <= 1e-15:
                        break
                    gamma = gamma + step_len * direction
                    gradient_gamma = oracle.gradient_gamma(beta, gamma, tgamma)
                    direction = projected_direction(gamma, -gradient_gamma)
                    inner_iteration += 1

                prev_tbeta = tbeta
                prev_tgamma = tgamma
                tbeta = oracle.optimal_tbeta(beta=beta, gamma=gamma)
                tgamma = oracle.optimal_tgamma(tbeta, gamma, beta=beta)
                iteration += 1

            loss = oracle.loss(beta, gamma, tbeta, tgamma)
            if len(self.logger_keys) > 0:
                self.logger_.log(locals())

        us = oracle.optimal_random_effects(beta, gamma)
        sparse_us = oracle.optimal_random_effects(tbeta, tgamma)

        per_group_coefficients = get_per_group_coefficients(
            beta, us, labels=problem.column_labels)
        sparse_per_group_coefficients = get_per_group_coefficients(
            tbeta, sparse_us, labels=problem.column_labels)

        self.logger_.add('converged', 1)
        self.logger_.add('iterations', iteration)

        self.coef_ = {
            "beta": beta,
            "gamma": gamma,
            "tbeta": tbeta,
            "tgamma": tgamma,
            "random_effects": us,
            "sparse_random_effects": sparse_us,
            "group_labels": np.copy(problem.group_labels),
            "per_group_coefficients": per_group_coefficients,
            "sparse_per_group_coefficients": sparse_per_group_coefficients,
        }

        return self
    def test_get_set_params(self):
        problem_parameters = {
            "groups_sizes": [20, 5, 10, 50],
            "features_labels": [3, 3, 3],
            "random_intercept":
            True,
            "features_covariance_matrix":
            np.array([[1, 0, 0], [0, 1, 0.7], [0, 0.7, 1]]),
            "obs_std":
            0.1,
        }
        model_parameters = {
            "nnz_tbeta": 4,
            "nnz_tgamma": 4,
            "lb":
            0,  # We expect the coefficient vectors to be dense so we turn regularization off.
            "lg": 0,  # Same.
            "initializer": 'EM',
            "logger_keys": (
                'converged',
                'loss',
            ),
            "tol": 1e-6,
            "n_iter": 1000,
            "tol_inner": 1e-4,
            "n_iter_inner": 1000,
        }
        # Now we want to solve a regularized problem to get two different models
        model2_parameters = {
            "nnz_tbeta": 3,
            "nnz_tgamma": 2,
            "lb": 20,
            "lg": 20,
            "initializer": None,
            "logger_keys": ('converged', ),
            "tol": 1e-6,
            "n_iter": 1000,
            "tol_inner": 1e-4,
            "n_iter_inner": 1000,
        }
        problem, true_model_parameters = LinearLMEProblem.generate(
            **problem_parameters, seed=42)
        x, y = problem.to_x_y()

        model = LinearLMESparseModel(**model_parameters)
        model.fit(x, y)
        params = model.get_params()
        y_pred = model.predict(x)

        model2 = LinearLMESparseModel(**model2_parameters)
        model2.fit(x, y)
        params2 = model2.get_params()
        y_pred2 = model2.predict(x)

        model.set_params(**params2)
        model.fit(x, y)
        y_pred_with_other_params = model.predict(x)
        assert np.equal(y_pred_with_other_params, y_pred2).all(),\
            "set_params or get_params is not working properly"
        model2.set_params(**params)
        model2.fit(x, y)
        y_pred2_with_other_params = model2.predict(x)
        assert np.equal(y_pred2_with_other_params, y_pred).all(), \
            "set_params or get_params is not working properly"
    def test_solving_dense_problem(self):
        trials = 20
        problem_parameters = {
            "groups_sizes": [20, 5, 10, 50],
            "features_labels": [3, 3, 3],
            "random_intercept":
            True,
            "features_covariance_matrix":
            np.array([[1, 0, 0], [0, 1, 0.7], [0, 0.7, 1]]),
            "obs_std":
            0.1,
        }
        model_parameters = {
            "nnz_tbeta": 2,
            "nnz_tgamma": 2,
            "lb":
            0,  # We expect the coefficient vectors to be dense so we turn regularization off.
            "lg": 0,  # Same.
            "initializer": 'EM',
            "logger_keys": (
                'converged',
                'loss',
            ),
            "tol": 1e-6,
            "n_iter": 1000,
            "tol_inner": 1e-4,
            "n_iter_inner": 1000,
        }

        max_mse = 0.05
        min_explained_variance = 0.9

        for i in range(trials):
            problem, true_model_parameters = LinearLMEProblem.generate(
                **problem_parameters, seed=i)
            model = LinearLMESparseModel(**model_parameters)

            x, y = problem.to_x_y()
            model.fit(x, y)

            logger = model.logger_
            loss = np.array(logger.get("loss"))
            self.assertTrue(
                np.all(loss[1:] - loss[:-1] <= 0),
                msg=
                "%d) Loss does not decrease monotonically with iterations. (seed=%d)"
                % (i, i))

            y_pred = model.predict(x)
            explained_variance = explained_variance_score(y, y_pred)
            mse = mean_squared_error(y, y_pred)

            # coefficients = model.coef_
            # maybe_per_group_coefficients = coefficients["per_group_coefficients"]

            self.assertGreater(
                explained_variance,
                min_explained_variance,
                msg=
                "%d) Explained variance is too small: %.3f < %.3f. (seed=%d)" %
                (i, explained_variance, min_explained_variance, i))
            self.assertGreater(
                max_mse,
                mse,
                msg="%d) MSE is too big: %.3f > %.2f  (seed=%d)" %
                (i, mse, max_mse, i))

            # coefficients = model.coef_
            # maybe_per_group_coefficients = coefficients["per_group_coefficients"]
            # maybe_beta = coefficients["beta"]
            # maybe_us = coefficients["random_effects"]
            # maybe_gamma = coefficients["gamma"]
            # maybe_tbeta = coefficients["tbeta"]
            # maybe_tgamma = coefficients["tgamma"]
            # maybe_cluster_coefficients = coefficients["per_cluster_coefficients"]
            # maybe_sparse_cluster_coefficients = coefficients["sparse_per_cluster_coefficients"]
        # cluster_coefficients = beta + us
        # maybe_cluster_coefficients = maybe_beta + maybe_us
        return None