def elastic_net(X, Y, params): """ :param X: np.ndarray [K x N] :param Y: np.ndarray [1 x N] :param params: dict :return: """ assert check.argument_type(X, np.ndarray) assert check.argument_type(Y, np.ndarray) (K, N) = X.shape X = X.T # Make X into [N, K] Y = Y.flatten() # Make Y into [N, ] # Fit the linear model using the elastic net model = ElasticNetCV(**params).fit(X, Y) # Set coefficients below threshold to 0 coefs = model.coef_ # Get all model coefficients [K, ] coefs[np.abs(coefs) < MIN_COEF] = 0. # Threshold coefficients coef_nonzero = coefs != 0 # Create a boolean array where coefficients are nonzero [K, ] # If there are non-zero coefficients, redo the linear regression with them alone # And calculate beta_resc if coef_nonzero.sum() > 0: x = X[:, coef_nonzero] utils.make_array_2d(Y) betas = base_regression.recalculate_betas_from_selected(x, Y) betas_resc = base_regression.predict_error_reduction(x, Y, betas) return dict(pp=coef_nonzero, betas=betas, betas_resc=betas_resc) else: return dict(pp=np.repeat(True, K).tolist(), betas=np.zeros(K), betas_resc=np.zeros(K))
def best_subset_regression(x, y, gprior): """ :param x: np.ndarray Independent (predictor) variables [n x k] :param y: np.ndarray Dependent (response) variable [n x 1] :param gprior: np.ndarray Weighted priors [k x 1] :return: """ (n, k) = x.shape combos = combo_index(k) bic_combos = calc_all_expected_BIC(x, y, gprior, combos, check_rank=False) best_betas = np.zeros(k, dtype=np.dtype(float)) try: best_combo = combos[:, _best_combo_idx(x, bic_combos, combos)] except np.linalg.LinAlgError: return best_betas if best_combo.sum() > 0: best_betas = base_regression.recalculate_betas_from_selected( x, y, best_combo) return best_betas
def test_recalculate_betas_from_selected(self): # testing rank(xtx) = shape(xtx) x = np.array([[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]]) y = np.array([0, 1, 0, 1, 0]) result = base_regression.recalculate_betas_from_selected(x, y, idx=None) np.testing.assert_array_almost_equal(result, np.array([-0.4, 0.4]))
def test_recalculate_betas_from_selected_matrix_rank(self): # test that the matrix rank(A) = min(n,m) # dim(v) - rank(A) = null(A) = 0 x = np.array([[2, 4, 6], [4, 8, 12]]) y = np.array([1, 1]) result = base_regression.recalculate_betas_from_selected(x, y, idx=None) np.testing.assert_array_almost_equal(result, np.array([0.0, 0.0, 0.0]), 2)
def sklearn_gene(x, y, model, min_coef=None, **kwargs): """ Use a scikit-learn model for regression :param x: Feature array :type x: np.ndarray [N x K] :param y: Response array :type y: np.ndarray [N x 1] :param model: Instance of a scikit BaseEstimator-derived model :type model: BaseEstimator :param min_coef: A minimum coefficient value to include in the model. Any values smaller will be set to 0. :type min_coef: numeric :return: A dict of results for this gene :rtype: dict """ assert check.argument_type(x, np.ndarray) assert check.argument_type(y, np.ndarray) assert check.argument_is_subclass(model, BaseEstimator) (N, K) = x.shape # Fit the model model.fit(x, y, **kwargs) # Get all model coefficients [K, ] try: coefs = model.coef_ except AttributeError: coefs = model.estimator_.coef_ # Set coefficients below threshold to 0 if min_coef is not None: coefs[np.abs(coefs) < min_coef] = 0. # Threshold coefficients coef_nonzero = coefs != 0 # Create a boolean array where coefficients are nonzero [K, ] # If there are non-zero coefficients, redo the linear regression with them alone # And calculate beta_resc if coef_nonzero.sum() > 0: x = x[:, coef_nonzero] utils.make_array_2d(y) betas = base_regression.recalculate_betas_from_selected(x, y) betas_resc = base_regression.predict_error_reduction(x, y, betas) return dict(pp=coef_nonzero, betas=betas, betas_resc=betas_resc) else: return dict(pp=np.repeat(True, K).tolist(), betas=np.zeros(K), betas_resc=np.zeros(K))
def stars_model_select(x, y, alphas, threshold=_DEFAULT_THRESHOLD, num_subsamples=_DEFAULT_NUM_SUBSAMPLES, random_seed=_DEFAULT_SEED, method='lasso', **kwargs): """ Model using StARS (Stability Approach to Regularization Selection) for model selection :param x: :param y: :param alphas: :param threshold: :param num_subsamples: :param random_seed: :param method: :param kwargs: :return: """ if method.lower() == 'lasso': _regress_func = lasso elif method.lower() == 'ridge': _regress_func = ridge else: raise ValueError("Method must be 'lasso' or 'ridge'") # Number of obs n, k = x.shape if n < num_subsamples: msg = "Subsamples ({ns}) for StARS is larger than the number of samples ({n})".format(ns=num_subsamples, n=n) raise ValueError(msg) # Calculate the number of obs per subsample b = math.floor(n / num_subsamples) # Make an index for subsampling idx = _make_subsample_idx(n, b, num_subsamples, random_seed=random_seed) # Calculate betas for stability selection betas = {a: [] for a in alphas} for sample in range(num_subsamples): # Sample and put into column-major (the coordinate descent implementation in sklearn wants that order) x_samp = np.asarray(x[idx == sample, :], order='F') y_samp = y[idx == sample] for a in alphas: betas[a].append(_regress_func(x_samp, y_samp, a, **kwargs)) # Calculate edge stability stabilities = {a: _calculate_stability(betas[a]) for a in alphas} # Calculate monotonic increasing (as alpha decreases) mean edge stability alphas = np.sort(alphas)[::-1] total_instability = np.array([np.mean(stabilities[a]) for a in alphas]) for i in range(1, len(total_instability)): if total_instability[i] < total_instability[i - 1]: total_instability[i] = total_instability[i - 1] threshold_alphas = np.array(alphas)[total_instability < threshold] selected_alpha = np.min(threshold_alphas) if len(threshold_alphas) > 0 else alphas[0] refit_betas = _regress_func(x, y, selected_alpha, **kwargs) beta_nonzero = _make_bool_matrix(refit_betas) if beta_nonzero.sum() == 0: return dict(pp=np.repeat(True, k).tolist(), betas=np.zeros(k), betas_resc=np.zeros(k)) else: x = x[:, beta_nonzero] utils.make_array_2d(y) betas = base_regression.recalculate_betas_from_selected(x, y) betas_resc = base_regression.predict_error_reduction(x, y, betas) return dict(pp=beta_nonzero, betas=betas, betas_resc=betas_resc)