예제 #1
0
def test_uoi_lasso_toy():
    """Test UoI Lasso on a toy example."""

    X = np.array([[-1, 2], [4, 1], [1, 3], [4, 3], [8, 11]], dtype=float)
    beta = np.array([1, 4], dtype=float)
    y = np.dot(X, beta)

    # choose selection_frac to be slightly smaller to ensure that we get
    # good test sets

    # Also test both choices of solver
    lasso = UoI_Lasso(fit_intercept=False,
                      selection_frac=0.75,
                      estimation_frac=0.75,
                      solver='cd')
    lasso.fit(X, y)
    lasso.fit(X, y, verbose=True)

    assert_allclose(lasso.coef_, beta)

    if pycasso is not None:
        lasso = UoI_Lasso(fit_intercept=False,
                          selection_frac=0.75,
                          estimation_frac=0.75,
                          solver='pyc')
        lasso.fit(X, y)

        assert_allclose(lasso.coef_, beta)
예제 #2
0
def test_choice_of_solver():
    '''Tests whether one can correctly switch between solvers in UoI Lasso'''

    uoi1 = UoI_Lasso(solver='cd')
    assert (isinstance(uoi1._selection_lm, Lasso))

    uoi2 = UoI_Lasso(solver='pyc')
    assert (isinstance(uoi2._selection_lm, PycLasso))
예제 #3
0
def test_variable_selection():
    """Test basic functionality of UoI_Lasso and that it finds right model"""
    X, y, w = make_regression(coef=True, random_state=1)
    lasso = UoI_Lasso(comm=MPI.COMM_WORLD)
    lasso.fit(X, y)
    true_coef = np.nonzero(w)[0]
    fit_coef = np.nonzero(lasso.coef_)[0]
    assert_array_equal(true_coef, fit_coef)
    assert_array_almost_equal_nulp(true_coef, fit_coef)
예제 #4
0
def test_variable_selection():
    """Test basic functionality of UoI_Lasso and that it finds right model"""
    from sklearn.datasets import make_regression
    X, y, w = make_regression(coef=True, random_state=1)
    lasso = UoI_Lasso()
    lasso.fit(X, y)
    true_coef = np.nonzero(w)[0]
    fit_coef = np.nonzero(lasso.coef_)[0]
    assert_array_equal(true_coef, fit_coef)
    assert_array_almost_equal_nulp(true_coef, fit_coef)
예제 #5
0
def test_fit_intercept():
    """Tests whether `include_intercept` in passed through to the linear models.
    """
    lasso = UoI_Lasso(fit_intercept=True)
    assert lasso._selection_lm.fit_intercept
    assert lasso._estimation_lm.fit_intercept

    lasso = UoI_Lasso(fit_intercept=False)
    assert not lasso._selection_lm.fit_intercept
    assert not lasso._estimation_lm.fit_intercept
예제 #6
0
def test_intercept():
    """Test that UoI Lasso properly calculates the intercept when centering
    the response variable."""

    X = np.array([[-1, 2], [0, 1], [1, 3], [4, 3]])
    y = np.array([8, 5, 14, 17])

    lasso = UoI_Lasso(normalize=False, fit_intercept=True)
    lasso.fit(X, y)

    assert lasso.intercept_ == np.mean(y) - np.dot(X.mean(axis=0), lasso.coef_)
예제 #7
0
def test_estimation_score_usage():
    """Test the ability to change the estimation score in UoI Lasso."""

    methods = ('r2', 'AIC', 'AICc', 'BIC')
    X, y = make_regression(n_features=10, n_informative=3, random_state=10)
    scores = []
    for method in methods:
        lasso = UoI_Lasso(estimation_score=method)
        assert_equal(lasso.estimation_score, method)
        lasso.fit(X, y)
        score = np.max(lasso.scores_)
        scores.append(score)
    assert_equal(len(np.unique(scores)), len(methods))
예제 #8
0
def test_uoi_lasso_toy():
    """Test UoI Lasso on a toy example."""

    X = np.array([[-1, 2], [4, 1], [1, 3], [4, 3], [8, 11]], dtype=float)
    beta = np.array([1, 4], dtype=float)
    y = np.dot(X, beta)

    # choose selection_frac to be slightly smaller to ensure that we get
    # good test sets
    lasso = UoI_Lasso(fit_intercept=False,
                      selection_frac=0.75,
                      estimation_frac=0.75)
    lasso.fit(X, y)

    assert_allclose(lasso.coef_, beta)
예제 #9
0
def test_pycasso_error():
    """Tests whether an error is raised if pycasso is not installed.
    """

    with pytest.raises(ImportError):
        uoi2 = UoI_Lasso(solver='pyc')
        assert (isinstance(uoi2._selection_lm, PycLasso))
예제 #10
0
def test_get_reg_params():
    """Tests whether get_reg_params works correctly for UoI Lasso."""

    X = np.array([[-1, 2], [0, 1], [1, 3], [4, 3]])
    y = np.array([7, 4, 13, 16])

    # calculate regularization parameters manually
    alpha_max = np.max(np.dot(X.T, y) / 4)
    alphas = [{'alpha': alpha_max}, {'alpha': alpha_max / 10.}]

    # calculate regularization parameters with UoI_Lasso object
    lasso = UoI_Lasso(n_lambdas=2, fit_intercept=False, eps=0.1)
    reg_params = lasso.get_reg_params(X, y)

    # check each regularization parameter and key
    for estimate, true in zip(reg_params, alphas):
        assert estimate.keys() == true.keys()
        assert_allclose(list(estimate.values()), list(true.values()))
예제 #11
0
def test_lass_bad_est_score():
    """Test that UoI Lasso raises an error when given a bad
    estimation_score value.
    """
    X = np.random.randn(20, 5)
    y = np.random.randn(20)

    with pytest.raises(ValueError):
        UoI_Lasso(estimation_score='z', n_boots_sel=10,
                  n_boots_est=10).fit(X, y)
예제 #12
0
def test_lasso_selection_sweep():
    """Tests uoi_selection_sweep for UoI_Lasso."""

    # toy data
    X = np.array([[-1, 2, 3], [4, 1, -7], [1, 3, 1], [4, 3, 12], [8, 11, 2]])
    beta = np.array([1, 4, 2])
    y = np.dot(X, beta)

    # toy regularization
    reg_param_values = [{'alpha': 1.0}, {'alpha': 2.0}]
    lasso1 = Lasso(alpha=1.0, fit_intercept=True, normalize=True)
    lasso2 = Lasso(alpha=2.0, fit_intercept=True, normalize=True)
    lasso = UoI_Lasso(fit_intercept=True, normalize=True)

    coefs = lasso.uoi_selection_sweep(X, y, reg_param_values)
    lasso1.fit(X, y)
    lasso2.fit(X, y)

    assert np.allclose(coefs[0], lasso1.coef_)
    assert np.allclose(coefs[1], lasso2.coef_)
예제 #13
0
def test_lasso_selection_sweep():
    """Tests uoi_selection_sweep for UoI_Lasso."""

    # toy data
    X = np.array([[-1, 2, 3], [4, 1, -7], [1, 3, 1], [4, 3, 12], [8, 11, 2]],
                 dtype=float)
    beta = np.array([1, 4, 2], dtype=float)
    y = np.dot(X, beta)

    # toy regularization
    reg_param_values = [{'alpha': 1.0}, {'alpha': 2.0}]
    lasso = UoI_Lasso(fit_intercept=True, warm_start=False)
    lasso1 = Lasso(alpha=1.0, fit_intercept=True, max_iter=lasso.max_iter)
    lasso2 = Lasso(alpha=2.0, fit_intercept=True, max_iter=lasso.max_iter)
    lasso.output_dim = 1

    coefs = lasso.uoi_selection_sweep(X, y, reg_param_values)
    lasso1.fit(X, y)
    lasso2.fit(X, y)

    assert np.allclose(coefs[0], lasso1.coef_)
    assert np.allclose(coefs[1], lasso2.coef_)
예제 #14
0
def test_uoi_lasso_estimation_shape_match():
    """Test UoI Lasso on a toy example."""
    n_samples = 40
    n_features = 10
    X, y = make_regression(n_samples=n_samples,
                           n_features=n_features,
                           n_informative=5,
                           random_state=1)

    lasso = UoI_Lasso()
    lasso.fit(X, y)
    with pytest.raises(ValueError,
                       match='Targets and predictions are ' +
                       'not the same shape.'):
        support = np.ones(n_features).astype(bool)
        boot_idxs = [np.arange(n_samples)] * 2
        lasso.coef_ = np.random.randn(2, n_features)
        lasso._score_predictions('r2', lasso, X, y, support, boot_idxs)

    with pytest.raises(ValueError, match='y should either have'):
        support = np.ones(n_features).astype(bool)
        boot_idxs = [np.arange(n_samples)] * 2
        lasso._score_predictions('r2', lasso, X, y[:, np.newaxis, np.newaxis],
                                 support, boot_idxs)
예제 #15
0
def test_uoi_lasso_fit_shape_match():
    """Test UoI Lasso on a toy example."""

    n_samples = 40
    n_features = 10
    X, y = make_regression(n_samples=n_samples,
                           n_features=n_features,
                           n_informative=5,
                           random_state=1)

    lasso = UoI_Lasso()

    # Check that second axis gets squeezed
    lasso.fit(X, y[:, np.newaxis])

    # Check that second axis gets squeezed
    message = 'y should either have shape'
    with pytest.raises(ValueError, match=message):
        lasso.fit(X, np.tile(y[:, np.newaxis], (1, 2)))
    with pytest.raises(ValueError, match=message):
        lasso.fit(X, y[:, np.newaxis, np.newaxis])
예제 #16
0
def test_intercept_and_coefs_no_selection():
    """Test that UoI Lasso properly calculates the intercept with and without
    standardization."""
    # create line model
    X, y, beta, intercept = make_linear_regression(n_samples=500,
                                                   n_features=2,
                                                   n_informative=2,
                                                   snr=10.,
                                                   include_intercept=True,
                                                   random_state=2332)

    # without standardization
    lasso = UoI_Lasso(standardize=False, fit_intercept=True)
    lasso.fit(X, y)
    assert_allclose(lasso.intercept_, intercept, rtol=0.25)
    assert_allclose(lasso.coef_, beta, rtol=0.25)

    # with standardization
    lasso = UoI_Lasso(standardize=True, fit_intercept=True)
    lasso.fit(X, y)
    assert_allclose(lasso.intercept_, intercept, rtol=0.25)
    assert_allclose(lasso.coef_, beta, rtol=0.25)
예제 #17
0
def test_set_random_state():
    """Tests whether random states are handled correctly."""
    X, y = make_regression(n_features=5,
                           n_informative=3,
                           random_state=16,
                           noise=.5)
    # same state
    l1log_0 = UoI_Lasso(random_state=13)
    l1log_1 = UoI_Lasso(random_state=13)
    l1log_0.fit(X, y)
    l1log_1.fit(X, y)
    assert_array_equal(l1log_0.coef_, l1log_1.coef_)

    # different state
    l1log_1 = UoI_Lasso(random_state=14)
    l1log_1.fit(X, y)
    assert not np.array_equal(l1log_0.coef_, l1log_1.coef_)

    # different state, not set
    l1log_0 = UoI_Lasso()
    l1log_1 = UoI_Lasso()
    l1log_0.fit(X, y)
    l1log_1.fit(X, y)
    assert not np.array_equal(l1log_0.coef_, l1log_1.coef_)
예제 #18
0
    def calculate_strf_for_neurons(self,
                                   method,
                                   recording_idx,
                                   window_length=0.5,
                                   cells=None,
                                   test_frac=None,
                                   return_scores=False,
                                   verbose=False,
                                   **kwargs):
        """Calculates the STRFs for specified neurons and a specified method.

        Parameters
        ----------
        method : string
            The regression method to use when calculating STRFs.

        recording_idx : int
            The recording index to obtain design and response matrices.

        window_length : float
            The number of seconds to fit in STRF window.

        test_frac : float or None
            The fraction of data to use as a test set. If None, the entire set
            will be used only for training.

        return_scores : bool
            A flag indicating whether to return explained variance over window.

        cells : int, list, np.ndarray or None
            The set of cell indices under consideration. If None, creates a
            numpy array of all cells.

        verbose : bool
            If True, function will output which frame it is currently fitting.

        Returns
        -------
        strf : np.ndarray, shape (n_cells, n_frames_per_window, n_features)
            A numpy array containing the spatio-temporal receptive field.

        intercepts : np.ndarray, shape (n_cells, n_frames_per_window)
            A numpy array containing the intercepts for the STRFs.

        training_scores : tuple of np.ndarrays, each with shape
                            (n_cells, n_frames_per_window)
            A tuple of numpy arrays containing scores measuring the predictive
            power of the STRF for each frame in the window. Returned only if
            requested.

        test_scores : tuple of np.ndarrays, each with shape
                            (n_cells, n_frames_per_window)
            A tuple of numpy arrays containing scores measuring the predictive
            power of the STRF, but on a test set for each frame in the window.
            Returned only if requested. If the test fraction is None,
            test_scores will be returned as None.
        """

        # set up array of cells to iterate over
        cells = self.check_cells(cells=cells)

        # extract design and response matrices
        stimuli = self.get_stims_for_recording(recording_idx=recording_idx,
                                               window_length=window_length)
        responses = self.get_responses_for_recording(
            recording_idx=recording_idx,
            window_length=window_length,
            cells=cells)
        # number of frames that will appear in window length
        n_frames_per_window = self.get_n_frames_per_window(
            recording_idx=recording_idx, window_length=window_length)

        # create object to perform fitting
        if method == 'OLS':
            fitter = LinearRegression()

        elif method == 'Ridge':
            fitter = RidgeCV(cv=kwargs.get('cv', 5))

        elif method == 'Lasso':
            fitter = LassoCV(normalize=kwargs.get('normalize', True),
                             cv=kwargs.get('cv', 5),
                             max_iter=kwargs.get('max_iter', 10000))

        elif method == 'UoI_Lasso':
            fitter = UoI_Lasso(
                standardize=kwargs.get('standardize', True),
                n_boots_sel=kwargs.get('n_boots_sel', 30),
                n_boots_est=kwargs.get('n_boots_est', 30),
                selection_frac=kwargs.get('selection_frac', 0.8),
                estimation_frac=kwargs.get('estimation_frac', 0.8),
                n_lambdas=kwargs.get('n_lambdas', 30),
                stability_selection=kwargs.get('stability_selection', 1.),
                estimation_score=kwargs.get('estimation_score', 'BIC'))

        else:
            raise ValueError('Method %g is not available.' % method)

        # extract dimensions and create storage
        n_features, n_samples = stimuli.shape
        n_cells = cells.size
        strf = np.zeros((n_cells, n_frames_per_window, n_features))
        intercepts = np.zeros((n_cells, n_frames_per_window))

        # training and test score storage
        r2s_training = np.zeros((n_cells, n_frames_per_window))
        aics_training = np.zeros((n_cells, n_frames_per_window))
        bics_training = np.zeros((n_cells, n_frames_per_window))

        # if we evaluate on a test set, split up the data
        if test_frac is not None:
            n_test_samples = int(test_frac * n_samples)

            # split up stimulus
            # the samples axis is different for the stimuli and responses
            # matrices
            stimuli_test, stimuli = np.split(stimuli, [n_test_samples], axis=1)
            responses_test, responses = np.split(responses, [n_test_samples],
                                                 axis=0)
            r2s_test = np.zeros((n_cells, n_frames_per_window))
            aics_test = np.zeros((n_cells, n_frames_per_window))
            bics_test = np.zeros((n_cells, n_frames_per_window))

        # iterate over cells
        for cell_idx, cell in enumerate(cells):
            if verbose:
                print('Cell ', cell)
            # copy response matrix
            responses_copy = np.copy(responses)
            if test_frac is not None:
                responses_test_copy = np.copy(responses_test)

            # iterate over frames in window
            for frame in range(n_frames_per_window):
                if verbose:
                    print('  Frame ', frame)
                # perform fit
                fitter.fit(stimuli.T, responses_copy[:, cell_idx])
                # extract coefficients
                strf[cell_idx, frame, :] = fitter.coef_.T
                intercepts[cell_idx, frame] = fitter.intercept_

                # scores
                y_true = responses_copy[:, cell_idx]
                y_pred = fitter.intercept_ + np.dot(stimuli.T, fitter.coef_)
                n_features = np.count_nonzero(fitter.coef_) + 1

                # explained variance
                r2s_training[cell_idx, frame] = r2_score(y_true=y_true,
                                                         y_pred=y_pred)

                # bics
                bics_training[cell_idx,
                              frame] = self.BIC(y_true=y_true,
                                                y_pred=y_pred,
                                                n_features=n_features)

                # aics
                aics_training[cell_idx,
                              frame] = self.AIC(y_true=y_true,
                                                y_pred=y_pred,
                                                n_features=n_features)

                # roll the window up
                responses_copy = np.roll(responses_copy, -1, axis=0)

                # act on test set if necessary
                if test_frac is not None:
                    y_true_test = responses_test_copy[:, cell_idx]
                    y_pred_test = fitter.intercept_ \
                        + np.dot(stimuli_test.T, fitter.coef_)

                    # explained variance
                    r2s_test[cell_idx, frame] = r2_score(y_true=y_true_test,
                                                         y_pred=y_pred_test)

                    # bics
                    bics_test[cell_idx,
                              frame] = self.BIC(y_true=y_true_test,
                                                y_pred=y_pred_test,
                                                n_features=n_features)

                    # aics
                    aics_test[cell_idx,
                              frame] = self.AIC(y_true=y_true_test,
                                                y_pred=y_pred_test,
                                                n_features=n_features)

                    # roll the window up
                    responses_test_copy = np.roll(responses_test_copy,
                                                  -1,
                                                  axis=0)

        # get rid of potential unnecessary dimensions
        strf = np.squeeze(strf)
        r2s_training = np.squeeze(r2s_training)
        bics_training = np.squeeze(bics_training)
        aics_training = np.squeeze(aics_training)
        training_scores = (r2s_training, bics_training, aics_training)

        if test_frac is not None:
            r2s_test = np.squeeze(r2s_test)
            bics_test = np.squeeze(bics_test)
            aics_test = np.squeeze(aics_test)
            test_scores = (r2s_test, bics_test, aics_test)
        else:
            test_scores = None

        if return_scores:
            return strf, intercepts, training_scores, test_scores
        else:
            return strf, intercepts
예제 #19
0
    def __call__(self, task_tuple):

        cov_param_idx = task_tuple[0]
        rep = task_tuple[1]
        algorithm = task_tuple[2]

        n_features = self.n_features
        n_samples = self.n_samples

        beta = gen_beta2(n_features,
                         n_features,
                         sparsity=1,
                         betawidth=-1,
                         seed=1234)

        cov_param = self.cov_params[cov_param_idx]

        sigma = gen_covariance(n_features, cov_param['correlation'],
                               cov_param['block_size'], cov_param['L'],
                               cov_param['t'])

        beta_ = sparsify_beta(beta,
                              cov_param['block_size'],
                              sparsity=0.25,
                              seed=cov_param['block_size'])

        # Follow the procedure of generating beta with a fixed betaseed at the getgo
        # and then sparsifying as one goes on. Is this reproducible subsequently?

        t0 = time.time()
        X, X_test, y, y_test, ss = gen_data(n_samples,
                                            n_features,
                                            kappa=5,
                                            covariance=sigma,
                                            beta=beta_)

        # Standardize
        X = StandardScaler().fit_transform(X)
        y -= np.mean(y)

        if algorithm == 0:
            lasso = LassoCV(fit_intercept=False, cv=5)
            lasso.fit(X, y.ravel())
            beta_hat = lasso.coef_

        elif algorithm == 1:

            uoi = UoI_Lasso(fit_intercept=False, estimation_score='r2')
            uoi.fit(X, y)
            beta_hat = uoi.coef_

        elif algorithm == 2:
            scad = PycassoCV(penalty='scad',
                             fit_intercept=False,
                             nfolds=5,
                             n_alphas=100)
            scad.fit(X, y)
            beta_hat = scad.coef_

        elif algorithm == 3:

            mcp = PycassoCV(penalty='mcp',
                            fit_intercept=False,
                            nfolds=5,
                            n_alphas=100)

            mcp.fit(X, y)
            beta_hat = mcp.coef_

        self.beta.append(beta_)
        self.beta_hat.append(beta_hat)
        self.task_signature.append((cov_param_idx, rep, algorithm))
        print('call successful, algorithm %d took %f seconds' %
              (algorithm, time.time() - t0))