def test_creation_and_from_to_x_y(self): problem, true_parameters = LinearLMEProblem.generate( groups_sizes=[4, 5, 10], features_labels=[3, 3, 1, 2], random_intercept=True, obs_std=0.1, seed=42) x1, y1 = problem.to_x_y() problem2, _ = LinearLMEProblem.from_x_y(x1, y1) x2, y2 = problem2.to_x_y() self.assertTrue(np.all(x1 == x2) and np.all(y1 == y2)) test_problem, true_test_parameters = LinearLMEProblem.generate( groups_sizes=[3, 4, 5], features_labels=[3, 3, 1, 2], random_intercept=True, beta=true_parameters["beta"], gamma=true_parameters["gamma"], true_random_effects=true_parameters["random_effects"], obs_std=0.1, seed=43) self.assertTrue( np.all(true_parameters["beta"] == true_test_parameters["beta"]) and np.all(true_parameters["gamma"] == true_test_parameters["gamma"]) and np.all([ np.all(u1 == u2) for u1, u2 in zip(true_parameters["random_effects"], true_test_parameters["random_effects"]) ]))
def predict(self, x, use_sparse_coefficients=False): """ Makes a prediction if .fit(X, y) was called before and throws an error otherwise. Parameters ---------- x : np.ndarray Data matrix. Should have the same format as the data which was used for fitting the model: the number of columns and the columns' labels should be the same. It may contain new groups, in which case the prediction will be formed using the fixed effects only. use_sparse_coefficients : bool, default is False If true then uses sparse coefficients, tbeta and tgamma, for making a prediction, otherwise uses beta and gamma. Returns ------- y : np.ndarray Models predictions. """ check_is_fitted(self, 'coef_') problem, _ = LinearLMEProblem.from_x_y(x, y=None) if use_sparse_coefficients: beta = self.coef_['tbeta'] us = self.coef_['sparse_random_effects'] else: beta = self.coef_['beta'] us = self.coef_['random_effects'] assert problem.num_fixed_effects == beta.shape[0], \ "Number of fixed effects is not the same to what it was in the train data." assert problem.num_random_effects == us[0].shape[0], \ "Number of random effects is not the same to what it was in the train data." group_labels = self.coef_['group_labels'] answers = [] for i, (x, _, z, stds) in enumerate(problem): label = problem.group_labels[i] idx_of_this_label_in_train = np.where(group_labels == label) assert len( idx_of_this_label_in_train ) <= 1, "Group labels of the classifier contain duplicates." if len(idx_of_this_label_in_train) == 1: idx_of_this_label_in_train = idx_of_this_label_in_train[0] y = x.dot(beta) + z.dot(us[idx_of_this_label_in_train][0]) else: # If we have not seen this group (so we don't have inferred random effects for this) # then we make a prediction with "expected" (e.g. zero) random effects y = x.dot(beta) answers.append(y) return np.concatenate(answers)
def test_compare_to_old_oracle(self): num_fixed_effects = 4 num_random_effects = 2 problem, true_parameters = LinearLMEProblem.generate( groups_sizes=[4, 5, 10], features_labels=[3, 3, 1], random_intercept=False, obs_std=0.1, seed=42) new_oracle = LinearLMEOracle(problem) old_oracle = OldOracle(problem) np.random.seed(42) trials = 100 # the error should stem only from Cholesky/regular inversions instabilities, so # tolerances should pretty much represent machine precision rtol = 1e-8 atol = 1e-10 for random_beta, random_gamma in zip( np.random.rand(trials, num_fixed_effects), np.random.rand(trials, num_random_effects)): loss1 = new_oracle.loss(random_beta, random_gamma) loss2 = old_oracle.loss(random_beta, random_gamma) self.assertAlmostEqual(loss1, loss2, delta=atol, msg="Loss does not match with old oracle") gradient1 = new_oracle.gradient_gamma(random_beta, random_gamma) gradient2 = old_oracle.gradient_gamma(random_beta, random_gamma) self.assertTrue(allclose(gradient1, gradient2, rtol=rtol, atol=atol), msg="Gradients don't match with old oracle") hessian1 = new_oracle.hessian_gamma(random_beta, random_gamma) hessian2 = old_oracle.hessian_gamma(random_beta, random_gamma) self.assertTrue(allclose(hessian1, hessian2, rtol=100 * rtol, atol=100 * atol), msg="Hessian does not match with old oracle") beta1 = new_oracle.optimal_beta(random_gamma) beta2 = old_oracle.optimal_beta(random_gamma) self.assertTrue(allclose(beta1, beta2, rtol=rtol, atol=atol), msg="Optimal betas don't match with old oracle") us1 = new_oracle.optimal_random_effects(random_beta, random_gamma) us2 = old_oracle.optimal_random_effects(random_beta, random_gamma) self.assertTrue( allclose(us1, us2, rtol=rtol, atol=atol), msg="Optimal random effects don't match with old oracle") return None
def test_creation_from_no_data(self): problem, true_parameters = LinearLMEProblem.generate( groups_sizes=[4, 5, 10], features_labels=[], random_intercept=True, obs_std=0.1, seed=42) self.assertEqual(len(true_parameters["beta"]), 1, "Beta should be of len = 1 for no-data problem") self.assertEqual(len(true_parameters["gamma"]), 1), "Gamma should be of len = 1 for no-data problem" self.assertTrue( np.all( [np.all(x == 1) and np.all(z == 1) for x, y, z, l in problem]) ), "All fixed and random features should be 1 for no-data problem"
def test_beta_to_gamma_map(self): problem, true_parameters = LinearLMEProblem.generate( groups_sizes=[4, 5, 10], features_labels=[3, 3, 1, 2, 3, 1, 2], random_intercept=False, obs_std=0.1, seed=42) oracle = LinearLMEOracle(problem) true_beta_to_gamma_map = np.array([-1, 0, 1, -1, 3, -1]) for e1, e2 in zip(true_beta_to_gamma_map, oracle.beta_to_gamma_map): self.assertEqual( e1, e2, msg= "Beta-to-gamma mask is not right: \n %s is not \n %s as should be" % (true_beta_to_gamma_map, oracle.beta_to_gamma_map))
def test_score_function(self): # this is only a basic test which checks R^2 in two points: nearly perfect prediction and constant prediction. problem_parameters = { "groups_sizes": [20, 5, 10, 50], "features_labels": [3, 3, 3], "random_intercept": True, "features_covariance_matrix": np.array([[1, 0, 0], [0, 1, 0.7], [0, 0.7, 1]]), "obs_std": 0.1, } model_parameters = { "nnz_tbeta": 4, "nnz_tgamma": 4, "lb": 0, # We expect the coefficient vectors to be dense so we turn regularization off. "lg": 0, # Same. "initializer": 'EM', "logger_keys": ( 'converged', 'loss', ), "tol": 1e-6, "n_iter": 1000, "tol_inner": 1e-4, "n_iter_inner": 1000, } problem, true_model_parameters = LinearLMEProblem.generate( **problem_parameters, seed=42) x, y = problem.to_x_y() model = LinearLMESparseModel(**model_parameters) model.fit(x, y) model.coef_["beta"] = true_model_parameters["beta"] model.coef_["random_effects"] = true_model_parameters["random_effects"] good_score = model.score(x, y) assert good_score > 0.99 model.coef_["beta"] = np.zeros(4) model.coef_["random_effects"] = np.zeros((4, 4)) bad_score = model.score(x, y) assert abs(bad_score) < 0.1
def test_from_to_xy_preserves_dataset_structure(self): study_sizes = [20, 15, 10] num_features = 6 num_random_effects = 4 np.random.seed(42) x = np.random.rand( sum(study_sizes) + 1, 1 + (num_features - 1) + 1 + (num_random_effects - 1) + 1) y = np.random.rand(sum(study_sizes)) x[1:, 0] = np.repeat([0, 1, 2], study_sizes) x[0, :] = [0] + [1] * (num_features - 1) + [ 3 ] + [2] * (num_random_effects - 1) + [4] problem, true_parameters = LinearLMEProblem.from_x_y(x, y) x2, y2 = problem.to_x_y() self.assertTrue(np.all(x2 == x), msg="x is not the same after from/to transformation") self.assertTrue(np.all(y2 == y), msg="y is not the same after from/to transformation")
def test_gamma_derivatives(self): trials = 5 rtol = 1e-3 atol = 1e-2 dx = rtol / 1000 for random_seed in np.random.randint(0, 1000, size=trials): np.random.seed(random_seed) problem, true_parameters = LinearLMEProblem.generate( features_labels=[3, 3], random_intercept=False, seed=random_seed) beta = true_parameters['beta'] oracle = LinearLMEOracle(problem) points = np.random.rand(30, 2) beta = np.random.rand(len(beta)) oracle_gradient = np.array( [oracle.gradient_gamma(beta, g) for g in points]) partial_derivative_1 = np.array([ derivative(lambda x: oracle.loss(beta, np.array([x, g[1]])), g[0], dx=dx) for g in points ]) partial_derivative_2 = np.array([ derivative(lambda x: oracle.loss(beta, np.array([g[0], x])), g[1], dx=dx) for g in points ]) for i, (a, c, d, e) in enumerate( zip(points, oracle_gradient, partial_derivative_1, partial_derivative_2)): self.assertTrue( allclose(c[0], d, rtol=rtol, atol=atol), msg= "Gamma gradient does not match with numerical partial derivative: %d" % i) self.assertTrue( allclose(c[1], e, rtol=rtol, atol=atol), msg= "Gamma gradient does not match with numerical partial derivative: %d" % i) return None
def test_hessian_gamma(self): trials = 100 random_seed = 34 r = 1e-6 rtol = 1e-5 atol = 1e-7 problem, true_parameters = LinearLMEProblem.generate(seed=random_seed) oracle = LinearLMEOracle(problem) np.random.seed(random_seed) for j in range(trials): beta = np.random.rand(problem.num_fixed_effects) gamma = np.random.rand(problem.num_random_effects) dg = np.random.rand(problem.num_random_effects) hess = oracle.hessian_gamma(beta, gamma) maybe_dir = hess.dot(dg) true_dir = (oracle.gradient_gamma(beta, gamma + r * dg) - oracle.gradient_gamma(beta, gamma - r * dg)) / (2 * r) self.assertTrue(allclose(maybe_dir, true_dir, rtol=rtol, atol=atol), msg="Hessian does not look right")
def test_no_data_problem(self): random_seed = 43 problem, true_parameters = LinearLMEProblem.generate( groups_sizes=[10, 10, 10], features_labels=[], random_intercept=True, seed=random_seed) beta = true_parameters['beta'] us = true_parameters['random_effects'] empirical_gamma = np.sum(us**2, axis=0) / problem.num_groups rtol = 1e-1 atol = 1e-1 oracle = LinearLMEOracle(problem) maybe_beta = oracle.optimal_beta(empirical_gamma) maybe_us = oracle.optimal_random_effects(maybe_beta, empirical_gamma) self.assertTrue(allclose(maybe_beta + maybe_us, beta + us, rtol=rtol, atol=atol), msg="No-data-problem is not right") return None
def test_non_regularized_oracle_is_zero_regularized_oracle(self): num_fixed_effects = 4 num_random_effects = 3 problem, true_parameters = LinearLMEProblem.generate( groups_sizes=[4, 5, 10], features_labels=[3, 3, 1, 2], random_intercept=False, obs_std=0.1, seed=42) # when both regularization coefficients are zero, these two oracles should be exactly equivalent oracle_non_regularized = LinearLMEOracle(problem) oracle_regularized = LinearLMEOracleRegularized(problem, lg=0, lb=0, nnz_tbeta=1, nnz_tgamma=1) np.random.seed(42) trials = 100 rtol = 1e-14 atol = 1e-14 for random_beta, random_gamma, random_tbeta, random_tgamma in zip( np.random.rand(trials, num_fixed_effects), np.random.rand(trials, num_random_effects), np.random.rand(trials, num_fixed_effects), np.random.rand(trials, num_random_effects), ): loss1 = oracle_regularized.loss(random_beta, random_gamma, random_tbeta, random_tgamma) loss2 = oracle_non_regularized.loss(random_beta, random_gamma) self.assertAlmostEqual( loss1, loss2, delta=atol, msg= "Loss of zero-regularized and non-regularized oracles is different" ) gradient1 = oracle_regularized.gradient_gamma( random_beta, random_gamma, random_tgamma) gradient2 = oracle_non_regularized.gradient_gamma( random_beta, random_gamma) self.assertTrue( allclose(gradient1, gradient2, rtol=rtol, atol=atol), msg= "Gradients w.r.t. gamma of zero-regularized and non-regularized oracles are different" ) hessian1 = oracle_regularized.hessian_gamma( random_beta, random_gamma) hessian2 = oracle_non_regularized.hessian_gamma( random_beta, random_gamma) self.assertTrue( allclose(hessian1, hessian2, rtol=100 * rtol, atol=100 * atol), msg= "Hessian w.r.t. gamma of zero-regularized and non-regularized oracles are different" ) beta1 = oracle_regularized.optimal_beta(random_gamma, random_tbeta) beta2 = oracle_non_regularized.optimal_beta(random_gamma) self.assertTrue( allclose(beta1, beta2, rtol=rtol, atol=atol), msg= "Optimal betas of zero-regularized and non-regularized oracles are different" ) us1 = oracle_regularized.optimal_random_effects( random_beta, random_gamma) us2 = oracle_non_regularized.optimal_random_effects( random_beta, random_gamma) self.assertTrue( allclose(us1, us2, rtol=rtol, atol=atol), msg= "Optimal random effects of zero-regularized and non-regularized oracles is different" ) return None
def test_solving_sparse_problem(self): trials = 10 problem_parameters = { "groups_sizes": [20, 12, 14, 50, 11], "features_labels": [3, 3, 3, 3, 3, 3, 3, 3, 3, 3], "random_intercept": True, "obs_std": 0.1, } model_parameters = { "lb": 0.01, "lg": 0.01, "initializer": None, "logger_keys": ( 'converged', 'loss', ), "tol": 1e-6, "n_iter": 1000, "tol_inner": 1e-4, "n_iter_inner": 1000, } max_mse = 0.05 min_explained_variance = 0.9 fixed_effects_min_accuracy = 0.7 random_effects_min_accuracy = 0.7 fea = [] rea = [] for i in range(trials): np.random.seed(i) true_beta = np.random.choice(2, size=11, p=np.array([0.5, 0.5])) if sum(true_beta) == 0: true_beta[0] = 1 true_gamma = np.random.choice(2, size=11, p=np.array( [0.3, 0.7])) * true_beta problem, true_model_parameters = LinearLMEProblem.generate( **problem_parameters, beta=true_beta, gamma=true_gamma, seed=i) model = LinearLMESparseModel(**model_parameters, nnz_tbeta=sum(true_beta), nnz_tgamma=sum(true_gamma), regularization_type="loss-weighted") model2 = LinearLMESparseModel(**model_parameters, nnz_tbeta=sum(true_beta), nnz_tgamma=sum(true_gamma), regularization_type="l2") x, y = problem.to_x_y() model.fit(x, y) model2.fit(x, y) logger = model.logger_ loss = np.array(logger.get("loss")) self.assertTrue( np.all(loss[1:] - loss[:-1] <= 0), msg= "%d) Loss does not decrease monotonically with iterations. (seed=%d)" % (i, i)) y_pred = model.predict(x) explained_variance = explained_variance_score(y, y_pred) mse = mean_squared_error(y, y_pred) y_pred2 = model2.predict(x) explained_variance2 = explained_variance_score(y, y_pred2) mse2 = mean_squared_error(y, y_pred2) coefficients = model.coef_ maybe_tbeta = coefficients["tbeta"] maybe_tgamma = coefficients["tgamma"] fixed_effects_accuracy = accuracy_score(true_beta, maybe_tbeta != 0) random_effects_accuracy = accuracy_score(true_gamma, maybe_tgamma != 0) coefficients2 = model2.coef_ maybe_tbeta2 = coefficients2["tbeta"] maybe_tgamma2 = coefficients2["tgamma"] fixed_effects_accuracy2 = accuracy_score(true_beta, maybe_tbeta2 != 0) random_effects_accuracy2 = accuracy_score(true_gamma, maybe_tgamma2 != 0) print("\n %d) MSE EV FEA REA") print("%.4f %.4f %.4f %.4f" % (mse, explained_variance, fixed_effects_accuracy, random_effects_accuracy)) print("%.4f %.4f %.4f %.4f" % (mse2, explained_variance2, fixed_effects_accuracy2, random_effects_accuracy2)) # maybe_per_group_coefficients = coefficients["per_group_coefficients"] self.assertGreater( explained_variance, min_explained_variance, msg= "%d) Explained variance is too small: %.3f < %.3f. (seed=%d)" % (i, explained_variance, min_explained_variance, i)) self.assertGreater( max_mse, mse, msg="%d) MSE is too big: %.3f > %.2f (seed=%d)" % (i, mse, max_mse, i)) self.assertGreater( fixed_effects_accuracy, fixed_effects_min_accuracy, msg= "%d) Fixed Effects Selection Accuracy is too small: %.3f < %.2f (seed=%d)" % (i, fixed_effects_accuracy, fixed_effects_min_accuracy, i)) self.assertGreater( random_effects_accuracy, random_effects_min_accuracy, msg= "%d) Random Effects Selection Accuracy is too small: %.3f < %.2f (seed=%d)" % (i, random_effects_accuracy, random_effects_min_accuracy, i)) fea.append(fixed_effects_accuracy) rea.append(random_effects_accuracy) return None
def test_drop_matrices(self): problem_parameters = { "groups_sizes": [20, 5, 10, 50], "features_labels": [1, 2, 3, 3], "random_intercept": True, "obs_std": 0.1, "seed": 42 } problem, _ = LinearLMEProblem.generate(**problem_parameters) simple_oracle = LinearLMEOracle(problem) oracle = LinearLMEOracleW(problem, lb=0, lg=0, nnz_tbeta=problem.num_fixed_effects, nnz_tgamma=problem.num_random_effects) trials = 100 rtol = 1e-10 atol = 1e-10 np.random.seed(42) for t, (random_beta, random_gamma) in enumerate( zip(np.random.rand(trials, problem.num_fixed_effects), np.random.rand(trials, problem.num_random_effects))): loss = simple_oracle.loss(random_beta, random_gamma) oracle._recalculate_drop_matrices(random_beta, random_gamma) w_beta = oracle.drop_penalties_beta w_gamma = oracle.drop_penalties_gamma for j in range(problem.num_fixed_effects): sparse_beta = random_beta.copy() sparse_beta[j] = 0 sparse_gamma = random_gamma.copy() idx = oracle.beta_to_gamma_map[j].astype(int) if idx >= 0: sparse_gamma[idx] = 0 loss3 = simple_oracle.loss(random_beta, sparse_gamma) self.assertTrue(np.isclose(loss3 - loss, w_gamma[idx], rtol=rtol, atol=atol), msg="%d: W_gamma is not right" % j) loss2 = simple_oracle.loss(sparse_beta, sparse_gamma) else: loss2 = simple_oracle.loss(sparse_beta, random_gamma) self.assertTrue(np.isclose(loss2 - loss, w_beta[j], rtol=rtol, atol=atol), msg="%d) W_beta is not right" % j) sparse_beta = np.zeros(problem.num_fixed_effects) sparse_gamma = np.zeros(problem.num_random_effects) sparse_beta[0:2] = 1 sparse_gamma[0] = 1 oracle._recalculate_drop_matrices(sparse_beta, sparse_gamma) w_beta = oracle.drop_penalties_beta w_gamma = oracle.drop_penalties_gamma self.assertTrue((w_gamma[1:] == 0).all(), msg="Drop of zero gamma is not zero") self.assertTrue((w_beta[2:] == 0).all(), msg="Drop of zero beta is not zero")
def fit(self, x: np.ndarray, y: np.ndarray, columns_labels: np.ndarray = None, initial_parameters: dict = None, warm_start=False, random_intercept=True, **kwargs): """ Fits a Linear Model with Linear Mixed-Effects to the given data. Parameters ---------- x : np.ndarray Data. If columns_labels = None then it's assumed that columns_labels are in the first row of x. y : np.ndarray Answers, real-valued array. columns_labels : np.ndarray List of column labels. There shall be only one column of group labels and answers STDs, and overall n columns with fixed effects (1 or 3) and k columns of random effects (2 or 3). - 1 : fixed effect - 2 : random effect - 3 : both fixed and random, - 0 : groups labels - 4 : answers standard deviations initial_parameters : np.ndarray Dict with possible fields: - | 'beta0' : np.ndarray, shape = [n], | Initial estimate of fixed effects. If None then it defaults to an all-ones vector. - | 'gamma0' : np.ndarray, shape = [k], | Initial estimate of random effects covariances. If None then it defaults to an all-ones vector. - | 'tbeta0' : np.ndarray, shape = [n], | Initial estimate of sparse fixed effects. If None then it defaults to an all-zeros vector. - | 'tgamma0' : np.ndarray, shape = [k], | Initial estimate of sparse random covariances. If None then it defaults to an all-zeros vector. warm_start : bool, default is False Whether to use previous parameters as initial ones. Overrides initial_parameters if given. Throws NotFittedError if set to True when not fitted. random_intercept : bool, default = True Whether treat the intercept as a random effect. kwargs : Not used currently, left here for passing debugging parameters. Returns ------- self : LinearLMESparseModel Fitted regression model. """ problem, _ = LinearLMEProblem.from_x_y( x, y, columns_labels, random_intercept=random_intercept, **kwargs) if initial_parameters is None: initial_parameters = {} beta0 = initial_parameters.get("beta", None) gamma0 = initial_parameters.get("gamma", None) tbeta0 = initial_parameters.get("tbeta", None) tgamma0 = initial_parameters.get("tgamma", None) _check_input_consistency(problem, beta0, gamma0, tbeta0, tgamma0) if self.regularization_type == "l2": oracle = LinearLMEOracleRegularized(problem, lb=self.lb, lg=self.lg, nnz_tbeta=self.nnz_tbeta, nnz_tgamma=self.nnz_tgamma) elif self.regularization_type == "loss-weighted": oracle = LinearLMEOracleW(problem, lb=self.lb, lg=self.lg, nnz_tbeta=self.nnz_tbeta, nnz_tgamma=self.nnz_tgamma) else: raise ValueError("regularization_type is not understood.") num_fixed_effects = problem.num_fixed_effects num_random_effects = problem.num_random_effects assert num_fixed_effects >= self.nnz_tbeta assert num_random_effects >= self.nnz_tgamma # old_oracle = OldOracle(problem, lb=self.lb, lg=self.lg, k=self.nnz_tbeta, j=self.nnz_tgamma) if warm_start: check_is_fitted(self, 'coef_') beta = self.coef_["beta"] gamma = self.coef_["gamma"] tbeta = self.coef_["tbeta"] tgamma = self.coef_["tgamma"] else: if beta0 is not None: beta = beta0 else: beta = np.ones(num_fixed_effects) if gamma0 is not None: gamma = gamma0 else: gamma = np.ones(num_random_effects) if tbeta0 is not None: tbeta = tbeta0 else: tbeta = np.zeros(num_fixed_effects) if tgamma0 is not None: tgamma = tgamma0 else: tgamma = np.zeros(num_random_effects) if self.initializer == "EM": beta = oracle.optimal_beta(gamma, tbeta) us = oracle.optimal_random_effects(beta, gamma) gamma = np.sum(us**2, axis=0) / oracle.problem.num_groups # tbeta = oracle.optimal_tbeta(beta) # tgamma = oracle.optimal_tgamma(tbeta, gamma) def projected_direction(current_gamma, current_direction): proj_direction = current_direction.copy() for j, _ in enumerate(current_gamma): if current_gamma[j] == 0 and current_direction[j] <= 0: proj_direction[j] = 0 return proj_direction loss = oracle.loss(beta, gamma, tbeta, tgamma) self.logger_ = Logger(self.logger_keys) prev_tbeta = np.infty prev_tgamma = np.infty iteration = 0 while (np.linalg.norm(tbeta - prev_tbeta) > self.tol and np.linalg.norm(tgamma - prev_tgamma) > self.tol and iteration < self.n_iter): if iteration >= self.n_iter: us = oracle.optimal_random_effects(beta, gamma) if len(self.logger_keys) > 0: self.logger_.log(**locals()) self.coef_ = { "beta": beta, "gamma": gamma, "tbeta": tbeta, "tgamma": tgamma, "random_effects": us } self.logger_.add("converged", 0) return self if self.solver == 'pgd': inner_iteration = 0 beta = oracle.optimal_beta(gamma, tbeta, beta=beta) gradient_gamma = oracle.gradient_gamma(beta, gamma, tgamma) direction = projected_direction(gamma, -gradient_gamma) while (np.linalg.norm(direction) > self.tol_inner and inner_iteration < self.n_iter_inner): # gradient_gamma = oracle.gradient_gamma(beta, gamma, tgamma) # projecting the gradient to the set of constraints # direction = projected_direction(gamma, -gradient_gamma) if self.use_line_search: # line search method step_len = 0.1 for i, _ in enumerate(gamma): if direction[i] < 0: step_len = min(-gamma[i] / direction[i], step_len) current_loss = oracle.loss(beta, gamma, tbeta, tgamma) while ( oracle.loss(beta, gamma + step_len * direction, tbeta, tgamma) >= (1 - np.sign(current_loss) * 1e-5) * current_loss): step_len *= 0.5 if step_len <= 1e-15: break else: # fixed step size step_len = 1 / iteration if step_len <= 1e-15: break gamma = gamma + step_len * direction gradient_gamma = oracle.gradient_gamma(beta, gamma, tgamma) direction = projected_direction(gamma, -gradient_gamma) inner_iteration += 1 prev_tbeta = tbeta prev_tgamma = tgamma tbeta = oracle.optimal_tbeta(beta=beta, gamma=gamma) tgamma = oracle.optimal_tgamma(tbeta, gamma, beta=beta) iteration += 1 loss = oracle.loss(beta, gamma, tbeta, tgamma) if len(self.logger_keys) > 0: self.logger_.log(locals()) us = oracle.optimal_random_effects(beta, gamma) sparse_us = oracle.optimal_random_effects(tbeta, tgamma) per_group_coefficients = get_per_group_coefficients( beta, us, labels=problem.column_labels) sparse_per_group_coefficients = get_per_group_coefficients( tbeta, sparse_us, labels=problem.column_labels) self.logger_.add('converged', 1) self.logger_.add('iterations', iteration) self.coef_ = { "beta": beta, "gamma": gamma, "tbeta": tbeta, "tgamma": tgamma, "random_effects": us, "sparse_random_effects": sparse_us, "group_labels": np.copy(problem.group_labels), "per_group_coefficients": per_group_coefficients, "sparse_per_group_coefficients": sparse_per_group_coefficients, } return self
def test_get_set_params(self): problem_parameters = { "groups_sizes": [20, 5, 10, 50], "features_labels": [3, 3, 3], "random_intercept": True, "features_covariance_matrix": np.array([[1, 0, 0], [0, 1, 0.7], [0, 0.7, 1]]), "obs_std": 0.1, } model_parameters = { "nnz_tbeta": 4, "nnz_tgamma": 4, "lb": 0, # We expect the coefficient vectors to be dense so we turn regularization off. "lg": 0, # Same. "initializer": 'EM', "logger_keys": ( 'converged', 'loss', ), "tol": 1e-6, "n_iter": 1000, "tol_inner": 1e-4, "n_iter_inner": 1000, } # Now we want to solve a regularized problem to get two different models model2_parameters = { "nnz_tbeta": 3, "nnz_tgamma": 2, "lb": 20, "lg": 20, "initializer": None, "logger_keys": ('converged', ), "tol": 1e-6, "n_iter": 1000, "tol_inner": 1e-4, "n_iter_inner": 1000, } problem, true_model_parameters = LinearLMEProblem.generate( **problem_parameters, seed=42) x, y = problem.to_x_y() model = LinearLMESparseModel(**model_parameters) model.fit(x, y) params = model.get_params() y_pred = model.predict(x) model2 = LinearLMESparseModel(**model2_parameters) model2.fit(x, y) params2 = model2.get_params() y_pred2 = model2.predict(x) model.set_params(**params2) model.fit(x, y) y_pred_with_other_params = model.predict(x) assert np.equal(y_pred_with_other_params, y_pred2).all(),\ "set_params or get_params is not working properly" model2.set_params(**params) model2.fit(x, y) y_pred2_with_other_params = model2.predict(x) assert np.equal(y_pred2_with_other_params, y_pred).all(), \ "set_params or get_params is not working properly"
def test_solving_dense_problem(self): trials = 20 problem_parameters = { "groups_sizes": [20, 5, 10, 50], "features_labels": [3, 3, 3], "random_intercept": True, "features_covariance_matrix": np.array([[1, 0, 0], [0, 1, 0.7], [0, 0.7, 1]]), "obs_std": 0.1, } model_parameters = { "nnz_tbeta": 2, "nnz_tgamma": 2, "lb": 0, # We expect the coefficient vectors to be dense so we turn regularization off. "lg": 0, # Same. "initializer": 'EM', "logger_keys": ( 'converged', 'loss', ), "tol": 1e-6, "n_iter": 1000, "tol_inner": 1e-4, "n_iter_inner": 1000, } max_mse = 0.05 min_explained_variance = 0.9 for i in range(trials): problem, true_model_parameters = LinearLMEProblem.generate( **problem_parameters, seed=i) model = LinearLMESparseModel(**model_parameters) x, y = problem.to_x_y() model.fit(x, y) logger = model.logger_ loss = np.array(logger.get("loss")) self.assertTrue( np.all(loss[1:] - loss[:-1] <= 0), msg= "%d) Loss does not decrease monotonically with iterations. (seed=%d)" % (i, i)) y_pred = model.predict(x) explained_variance = explained_variance_score(y, y_pred) mse = mean_squared_error(y, y_pred) # coefficients = model.coef_ # maybe_per_group_coefficients = coefficients["per_group_coefficients"] self.assertGreater( explained_variance, min_explained_variance, msg= "%d) Explained variance is too small: %.3f < %.3f. (seed=%d)" % (i, explained_variance, min_explained_variance, i)) self.assertGreater( max_mse, mse, msg="%d) MSE is too big: %.3f > %.2f (seed=%d)" % (i, mse, max_mse, i)) # coefficients = model.coef_ # maybe_per_group_coefficients = coefficients["per_group_coefficients"] # maybe_beta = coefficients["beta"] # maybe_us = coefficients["random_effects"] # maybe_gamma = coefficients["gamma"] # maybe_tbeta = coefficients["tbeta"] # maybe_tgamma = coefficients["tgamma"] # maybe_cluster_coefficients = coefficients["per_cluster_coefficients"] # maybe_sparse_cluster_coefficients = coefficients["sparse_per_cluster_coefficients"] # cluster_coefficients = beta + us # maybe_cluster_coefficients = maybe_beta + maybe_us return None