def test_uoi_lasso_toy(): """Test UoI Lasso on a toy example.""" X = np.array([[-1, 2], [4, 1], [1, 3], [4, 3], [8, 11]], dtype=float) beta = np.array([1, 4], dtype=float) y = np.dot(X, beta) # choose selection_frac to be slightly smaller to ensure that we get # good test sets # Also test both choices of solver lasso = UoI_Lasso(fit_intercept=False, selection_frac=0.75, estimation_frac=0.75, solver='cd') lasso.fit(X, y) lasso.fit(X, y, verbose=True) assert_allclose(lasso.coef_, beta) if pycasso is not None: lasso = UoI_Lasso(fit_intercept=False, selection_frac=0.75, estimation_frac=0.75, solver='pyc') lasso.fit(X, y) assert_allclose(lasso.coef_, beta)
def test_choice_of_solver(): '''Tests whether one can correctly switch between solvers in UoI Lasso''' uoi1 = UoI_Lasso(solver='cd') assert (isinstance(uoi1._selection_lm, Lasso)) uoi2 = UoI_Lasso(solver='pyc') assert (isinstance(uoi2._selection_lm, PycLasso))
def test_variable_selection(): """Test basic functionality of UoI_Lasso and that it finds right model""" X, y, w = make_regression(coef=True, random_state=1) lasso = UoI_Lasso(comm=MPI.COMM_WORLD) lasso.fit(X, y) true_coef = np.nonzero(w)[0] fit_coef = np.nonzero(lasso.coef_)[0] assert_array_equal(true_coef, fit_coef) assert_array_almost_equal_nulp(true_coef, fit_coef)
def test_variable_selection(): """Test basic functionality of UoI_Lasso and that it finds right model""" from sklearn.datasets import make_regression X, y, w = make_regression(coef=True, random_state=1) lasso = UoI_Lasso() lasso.fit(X, y) true_coef = np.nonzero(w)[0] fit_coef = np.nonzero(lasso.coef_)[0] assert_array_equal(true_coef, fit_coef) assert_array_almost_equal_nulp(true_coef, fit_coef)
def test_fit_intercept(): """Tests whether `include_intercept` in passed through to the linear models. """ lasso = UoI_Lasso(fit_intercept=True) assert lasso._selection_lm.fit_intercept assert lasso._estimation_lm.fit_intercept lasso = UoI_Lasso(fit_intercept=False) assert not lasso._selection_lm.fit_intercept assert not lasso._estimation_lm.fit_intercept
def test_intercept(): """Test that UoI Lasso properly calculates the intercept when centering the response variable.""" X = np.array([[-1, 2], [0, 1], [1, 3], [4, 3]]) y = np.array([8, 5, 14, 17]) lasso = UoI_Lasso(normalize=False, fit_intercept=True) lasso.fit(X, y) assert lasso.intercept_ == np.mean(y) - np.dot(X.mean(axis=0), lasso.coef_)
def test_estimation_score_usage(): """Test the ability to change the estimation score in UoI Lasso.""" methods = ('r2', 'AIC', 'AICc', 'BIC') X, y = make_regression(n_features=10, n_informative=3, random_state=10) scores = [] for method in methods: lasso = UoI_Lasso(estimation_score=method) assert_equal(lasso.estimation_score, method) lasso.fit(X, y) score = np.max(lasso.scores_) scores.append(score) assert_equal(len(np.unique(scores)), len(methods))
def test_uoi_lasso_toy(): """Test UoI Lasso on a toy example.""" X = np.array([[-1, 2], [4, 1], [1, 3], [4, 3], [8, 11]], dtype=float) beta = np.array([1, 4], dtype=float) y = np.dot(X, beta) # choose selection_frac to be slightly smaller to ensure that we get # good test sets lasso = UoI_Lasso(fit_intercept=False, selection_frac=0.75, estimation_frac=0.75) lasso.fit(X, y) assert_allclose(lasso.coef_, beta)
def test_pycasso_error(): """Tests whether an error is raised if pycasso is not installed. """ with pytest.raises(ImportError): uoi2 = UoI_Lasso(solver='pyc') assert (isinstance(uoi2._selection_lm, PycLasso))
def test_get_reg_params(): """Tests whether get_reg_params works correctly for UoI Lasso.""" X = np.array([[-1, 2], [0, 1], [1, 3], [4, 3]]) y = np.array([7, 4, 13, 16]) # calculate regularization parameters manually alpha_max = np.max(np.dot(X.T, y) / 4) alphas = [{'alpha': alpha_max}, {'alpha': alpha_max / 10.}] # calculate regularization parameters with UoI_Lasso object lasso = UoI_Lasso(n_lambdas=2, fit_intercept=False, eps=0.1) reg_params = lasso.get_reg_params(X, y) # check each regularization parameter and key for estimate, true in zip(reg_params, alphas): assert estimate.keys() == true.keys() assert_allclose(list(estimate.values()), list(true.values()))
def test_lass_bad_est_score(): """Test that UoI Lasso raises an error when given a bad estimation_score value. """ X = np.random.randn(20, 5) y = np.random.randn(20) with pytest.raises(ValueError): UoI_Lasso(estimation_score='z', n_boots_sel=10, n_boots_est=10).fit(X, y)
def test_lasso_selection_sweep(): """Tests uoi_selection_sweep for UoI_Lasso.""" # toy data X = np.array([[-1, 2, 3], [4, 1, -7], [1, 3, 1], [4, 3, 12], [8, 11, 2]]) beta = np.array([1, 4, 2]) y = np.dot(X, beta) # toy regularization reg_param_values = [{'alpha': 1.0}, {'alpha': 2.0}] lasso1 = Lasso(alpha=1.0, fit_intercept=True, normalize=True) lasso2 = Lasso(alpha=2.0, fit_intercept=True, normalize=True) lasso = UoI_Lasso(fit_intercept=True, normalize=True) coefs = lasso.uoi_selection_sweep(X, y, reg_param_values) lasso1.fit(X, y) lasso2.fit(X, y) assert np.allclose(coefs[0], lasso1.coef_) assert np.allclose(coefs[1], lasso2.coef_)
def test_lasso_selection_sweep(): """Tests uoi_selection_sweep for UoI_Lasso.""" # toy data X = np.array([[-1, 2, 3], [4, 1, -7], [1, 3, 1], [4, 3, 12], [8, 11, 2]], dtype=float) beta = np.array([1, 4, 2], dtype=float) y = np.dot(X, beta) # toy regularization reg_param_values = [{'alpha': 1.0}, {'alpha': 2.0}] lasso = UoI_Lasso(fit_intercept=True, warm_start=False) lasso1 = Lasso(alpha=1.0, fit_intercept=True, max_iter=lasso.max_iter) lasso2 = Lasso(alpha=2.0, fit_intercept=True, max_iter=lasso.max_iter) lasso.output_dim = 1 coefs = lasso.uoi_selection_sweep(X, y, reg_param_values) lasso1.fit(X, y) lasso2.fit(X, y) assert np.allclose(coefs[0], lasso1.coef_) assert np.allclose(coefs[1], lasso2.coef_)
def test_uoi_lasso_estimation_shape_match(): """Test UoI Lasso on a toy example.""" n_samples = 40 n_features = 10 X, y = make_regression(n_samples=n_samples, n_features=n_features, n_informative=5, random_state=1) lasso = UoI_Lasso() lasso.fit(X, y) with pytest.raises(ValueError, match='Targets and predictions are ' + 'not the same shape.'): support = np.ones(n_features).astype(bool) boot_idxs = [np.arange(n_samples)] * 2 lasso.coef_ = np.random.randn(2, n_features) lasso._score_predictions('r2', lasso, X, y, support, boot_idxs) with pytest.raises(ValueError, match='y should either have'): support = np.ones(n_features).astype(bool) boot_idxs = [np.arange(n_samples)] * 2 lasso._score_predictions('r2', lasso, X, y[:, np.newaxis, np.newaxis], support, boot_idxs)
def test_uoi_lasso_fit_shape_match(): """Test UoI Lasso on a toy example.""" n_samples = 40 n_features = 10 X, y = make_regression(n_samples=n_samples, n_features=n_features, n_informative=5, random_state=1) lasso = UoI_Lasso() # Check that second axis gets squeezed lasso.fit(X, y[:, np.newaxis]) # Check that second axis gets squeezed message = 'y should either have shape' with pytest.raises(ValueError, match=message): lasso.fit(X, np.tile(y[:, np.newaxis], (1, 2))) with pytest.raises(ValueError, match=message): lasso.fit(X, y[:, np.newaxis, np.newaxis])
def test_intercept_and_coefs_no_selection(): """Test that UoI Lasso properly calculates the intercept with and without standardization.""" # create line model X, y, beta, intercept = make_linear_regression(n_samples=500, n_features=2, n_informative=2, snr=10., include_intercept=True, random_state=2332) # without standardization lasso = UoI_Lasso(standardize=False, fit_intercept=True) lasso.fit(X, y) assert_allclose(lasso.intercept_, intercept, rtol=0.25) assert_allclose(lasso.coef_, beta, rtol=0.25) # with standardization lasso = UoI_Lasso(standardize=True, fit_intercept=True) lasso.fit(X, y) assert_allclose(lasso.intercept_, intercept, rtol=0.25) assert_allclose(lasso.coef_, beta, rtol=0.25)
def test_set_random_state(): """Tests whether random states are handled correctly.""" X, y = make_regression(n_features=5, n_informative=3, random_state=16, noise=.5) # same state l1log_0 = UoI_Lasso(random_state=13) l1log_1 = UoI_Lasso(random_state=13) l1log_0.fit(X, y) l1log_1.fit(X, y) assert_array_equal(l1log_0.coef_, l1log_1.coef_) # different state l1log_1 = UoI_Lasso(random_state=14) l1log_1.fit(X, y) assert not np.array_equal(l1log_0.coef_, l1log_1.coef_) # different state, not set l1log_0 = UoI_Lasso() l1log_1 = UoI_Lasso() l1log_0.fit(X, y) l1log_1.fit(X, y) assert not np.array_equal(l1log_0.coef_, l1log_1.coef_)
def calculate_strf_for_neurons(self, method, recording_idx, window_length=0.5, cells=None, test_frac=None, return_scores=False, verbose=False, **kwargs): """Calculates the STRFs for specified neurons and a specified method. Parameters ---------- method : string The regression method to use when calculating STRFs. recording_idx : int The recording index to obtain design and response matrices. window_length : float The number of seconds to fit in STRF window. test_frac : float or None The fraction of data to use as a test set. If None, the entire set will be used only for training. return_scores : bool A flag indicating whether to return explained variance over window. cells : int, list, np.ndarray or None The set of cell indices under consideration. If None, creates a numpy array of all cells. verbose : bool If True, function will output which frame it is currently fitting. Returns ------- strf : np.ndarray, shape (n_cells, n_frames_per_window, n_features) A numpy array containing the spatio-temporal receptive field. intercepts : np.ndarray, shape (n_cells, n_frames_per_window) A numpy array containing the intercepts for the STRFs. training_scores : tuple of np.ndarrays, each with shape (n_cells, n_frames_per_window) A tuple of numpy arrays containing scores measuring the predictive power of the STRF for each frame in the window. Returned only if requested. test_scores : tuple of np.ndarrays, each with shape (n_cells, n_frames_per_window) A tuple of numpy arrays containing scores measuring the predictive power of the STRF, but on a test set for each frame in the window. Returned only if requested. If the test fraction is None, test_scores will be returned as None. """ # set up array of cells to iterate over cells = self.check_cells(cells=cells) # extract design and response matrices stimuli = self.get_stims_for_recording(recording_idx=recording_idx, window_length=window_length) responses = self.get_responses_for_recording( recording_idx=recording_idx, window_length=window_length, cells=cells) # number of frames that will appear in window length n_frames_per_window = self.get_n_frames_per_window( recording_idx=recording_idx, window_length=window_length) # create object to perform fitting if method == 'OLS': fitter = LinearRegression() elif method == 'Ridge': fitter = RidgeCV(cv=kwargs.get('cv', 5)) elif method == 'Lasso': fitter = LassoCV(normalize=kwargs.get('normalize', True), cv=kwargs.get('cv', 5), max_iter=kwargs.get('max_iter', 10000)) elif method == 'UoI_Lasso': fitter = UoI_Lasso( standardize=kwargs.get('standardize', True), n_boots_sel=kwargs.get('n_boots_sel', 30), n_boots_est=kwargs.get('n_boots_est', 30), selection_frac=kwargs.get('selection_frac', 0.8), estimation_frac=kwargs.get('estimation_frac', 0.8), n_lambdas=kwargs.get('n_lambdas', 30), stability_selection=kwargs.get('stability_selection', 1.), estimation_score=kwargs.get('estimation_score', 'BIC')) else: raise ValueError('Method %g is not available.' % method) # extract dimensions and create storage n_features, n_samples = stimuli.shape n_cells = cells.size strf = np.zeros((n_cells, n_frames_per_window, n_features)) intercepts = np.zeros((n_cells, n_frames_per_window)) # training and test score storage r2s_training = np.zeros((n_cells, n_frames_per_window)) aics_training = np.zeros((n_cells, n_frames_per_window)) bics_training = np.zeros((n_cells, n_frames_per_window)) # if we evaluate on a test set, split up the data if test_frac is not None: n_test_samples = int(test_frac * n_samples) # split up stimulus # the samples axis is different for the stimuli and responses # matrices stimuli_test, stimuli = np.split(stimuli, [n_test_samples], axis=1) responses_test, responses = np.split(responses, [n_test_samples], axis=0) r2s_test = np.zeros((n_cells, n_frames_per_window)) aics_test = np.zeros((n_cells, n_frames_per_window)) bics_test = np.zeros((n_cells, n_frames_per_window)) # iterate over cells for cell_idx, cell in enumerate(cells): if verbose: print('Cell ', cell) # copy response matrix responses_copy = np.copy(responses) if test_frac is not None: responses_test_copy = np.copy(responses_test) # iterate over frames in window for frame in range(n_frames_per_window): if verbose: print(' Frame ', frame) # perform fit fitter.fit(stimuli.T, responses_copy[:, cell_idx]) # extract coefficients strf[cell_idx, frame, :] = fitter.coef_.T intercepts[cell_idx, frame] = fitter.intercept_ # scores y_true = responses_copy[:, cell_idx] y_pred = fitter.intercept_ + np.dot(stimuli.T, fitter.coef_) n_features = np.count_nonzero(fitter.coef_) + 1 # explained variance r2s_training[cell_idx, frame] = r2_score(y_true=y_true, y_pred=y_pred) # bics bics_training[cell_idx, frame] = self.BIC(y_true=y_true, y_pred=y_pred, n_features=n_features) # aics aics_training[cell_idx, frame] = self.AIC(y_true=y_true, y_pred=y_pred, n_features=n_features) # roll the window up responses_copy = np.roll(responses_copy, -1, axis=0) # act on test set if necessary if test_frac is not None: y_true_test = responses_test_copy[:, cell_idx] y_pred_test = fitter.intercept_ \ + np.dot(stimuli_test.T, fitter.coef_) # explained variance r2s_test[cell_idx, frame] = r2_score(y_true=y_true_test, y_pred=y_pred_test) # bics bics_test[cell_idx, frame] = self.BIC(y_true=y_true_test, y_pred=y_pred_test, n_features=n_features) # aics aics_test[cell_idx, frame] = self.AIC(y_true=y_true_test, y_pred=y_pred_test, n_features=n_features) # roll the window up responses_test_copy = np.roll(responses_test_copy, -1, axis=0) # get rid of potential unnecessary dimensions strf = np.squeeze(strf) r2s_training = np.squeeze(r2s_training) bics_training = np.squeeze(bics_training) aics_training = np.squeeze(aics_training) training_scores = (r2s_training, bics_training, aics_training) if test_frac is not None: r2s_test = np.squeeze(r2s_test) bics_test = np.squeeze(bics_test) aics_test = np.squeeze(aics_test) test_scores = (r2s_test, bics_test, aics_test) else: test_scores = None if return_scores: return strf, intercepts, training_scores, test_scores else: return strf, intercepts
def __call__(self, task_tuple): cov_param_idx = task_tuple[0] rep = task_tuple[1] algorithm = task_tuple[2] n_features = self.n_features n_samples = self.n_samples beta = gen_beta2(n_features, n_features, sparsity=1, betawidth=-1, seed=1234) cov_param = self.cov_params[cov_param_idx] sigma = gen_covariance(n_features, cov_param['correlation'], cov_param['block_size'], cov_param['L'], cov_param['t']) beta_ = sparsify_beta(beta, cov_param['block_size'], sparsity=0.25, seed=cov_param['block_size']) # Follow the procedure of generating beta with a fixed betaseed at the getgo # and then sparsifying as one goes on. Is this reproducible subsequently? t0 = time.time() X, X_test, y, y_test, ss = gen_data(n_samples, n_features, kappa=5, covariance=sigma, beta=beta_) # Standardize X = StandardScaler().fit_transform(X) y -= np.mean(y) if algorithm == 0: lasso = LassoCV(fit_intercept=False, cv=5) lasso.fit(X, y.ravel()) beta_hat = lasso.coef_ elif algorithm == 1: uoi = UoI_Lasso(fit_intercept=False, estimation_score='r2') uoi.fit(X, y) beta_hat = uoi.coef_ elif algorithm == 2: scad = PycassoCV(penalty='scad', fit_intercept=False, nfolds=5, n_alphas=100) scad.fit(X, y) beta_hat = scad.coef_ elif algorithm == 3: mcp = PycassoCV(penalty='mcp', fit_intercept=False, nfolds=5, n_alphas=100) mcp.fit(X, y) beta_hat = mcp.coef_ self.beta.append(beta_) self.beta_hat.append(beta_hat) self.task_signature.append((cov_param_idx, rep, algorithm)) print('call successful, algorithm %d took %f seconds' % (algorithm, time.time() - t0))