def worker(
        boot_inds,
        X,
        y,
        X_noise=0.01,
        alpha=0.9,
        lambda_path=np.geomspace(1e-3, 1e-06, num=100),
):

    X_boot = X[boot_inds, :]
    y_boot = y[boot_inds]

    X_boot = scale(
        scale(X_boot +
              np.random.normal(scale=X_noise * 1e-6, size=X_boot.shape)) +
        np.random.normal(scale=X_noise, size=X_boot.shape))

    m = LogitNet(
        alpha=alpha,
        lambda_path=lambda_path,
        fit_intercept=False,
    )
    m.fit(X_boot, y_boot)

    lambdas_enet = m.lambda_path_
    coefs_enet = m.coef_path_.squeeze()

    return {
        "beta": coefs_enet != 0,
        "lambda_path": lambdas_enet,
    }
예제 #2
0
 def test_max_features(self):
     max_features = 5
     m = LogitNet(random_state=1, max_features=max_features)
     x, y = self.multinomial[3]
     m = m.fit(x, y)
     num_features = np.count_nonzero(m.coef_, axis=1)
     self.assertTrue(np.all(num_features <= max_features))
예제 #3
0
    def likelihood(self, y_obs, y_sim):
        if not isinstance(y_obs, list):
            raise TypeError('Observed data is not of allowed types')

        if not isinstance(y_sim, list):
            raise TypeError('simulated data is not of allowed types')

        # Extract summary statistics from the observed data
        if (self.stat_obs is None or self.data_set != y_obs):
            self.stat_obs = self.statistics_calc.statistics(y_obs)
            self.data_set = y_obs

        # Extract summary statistics from the simulated data
        stat_sim = self.statistics_calc.statistics(y_sim)

        # Compute the approximate likelihood for the y_obs given theta
        y = np.append(np.zeros(self.n_simulate), np.ones(self.n_simulate))
        X = np.array(np.concatenate((stat_sim, self.ref_data_stat), axis=0))
        m = LogitNet(alpha=1,
                     n_splits=self.n_folds,
                     max_iter=self.max_iter,
                     random_state=self.seed)
        m = m.fit(X, y)
        result = np.exp(-np.sum(
            (m.intercept_ +
             np.sum(np.multiply(m.coef_, self.stat_obs), axis=1)),
            axis=0))

        return result
예제 #4
0
파일: classifier.py 프로젝트: elfi-dev/zoo
 def __init__(self, config=None, parallel_cv=True, class_min=0.005):
     """Initializes logistic regression classifier."""
     self.config = self._resolve_config(config, parallel_cv)
     self.model = LogitNet(**self.config)
     self.class_min = class_min
     self.parameter_names = ['lambda', 'intercept', 'coef']
     self.store = {}
예제 #5
0
    def distance(self, d1, d2):
        """Calculates the distance between two datasets.

        Parameters
        ----------
        d1, d2: list
            A list, containing a list describing the data set
        """
        if not isinstance(d1, list):
            raise TypeError('Data is not of allowed types')
        if not isinstance(d2, list):
            raise TypeError('Data is not of allowed types')

        # Extract summary statistics from the dataset
        if (self.s1 is None or self.data_set != d1):
            self.s1 = self.statistics_calc.statistics(d1)
            self.data_set = d1
        s2 = self.statistics_calc.statistics(d2)

        # compute distnace between the statistics
        training_set_features = np.concatenate((self.s1, s2), axis=0)
        label_s1 = np.zeros(shape=(len(self.s1), 1))
        label_s2 = np.ones(shape=(len(s2), 1))
        training_set_labels = np.concatenate((label_s1, label_s2),
                                             axis=0).ravel()

        m = LogitNet(alpha=1, n_splits=10)
        m = m.fit(training_set_features, training_set_labels)
        distance = 2.0 * (m.cv_mean_score_[np.where(
            m.lambda_path_ == m.lambda_max_)[0][0]] - 0.5)

        return distance
예제 #6
0
 def test_one_row_predict_proba(self):
     # Verify that predict_proba on one row gives 2D output
     m = LogitNet(random_state=42)
     for X, y in itertools.chain(self.binomial, self.multinomial):
         m.fit(X, y)
         p = m.predict_proba(X[0].reshape((1, -1)))
         assert p.shape == (1, len(np.unique(y)))
예제 #7
0
 def test_one_row_predict(self):
     # Verify that predicting on one row gives only one row of output
     m = LogitNet(random_state=42)
     for X, y in itertools.chain(self.binomial, self.multinomial):
         m.fit(X, y)
         p = m.predict(X[0].reshape((1, -1)))
         assert p.shape == (1,)
예제 #8
0
 def test_one_row_predict(self):
     # Verify that predicting on one row gives only one row of output
     m = LogitNet(random_state=42)
     for X, y in itertools.chain(self.binomial, self.multinomial):
         m.fit(X, y)
         p = m.predict(X[0].reshape((1, -1)))
         assert p.shape == (1, )
예제 #9
0
 def test_one_row_predict_proba(self):
     # Verify that predict_proba on one row gives 2D output
     m = LogitNet(random_state=42)
     for X, y in itertools.chain(self.binomial, self.multinomial):
         m.fit(X, y)
         p = m.predict_proba(X[0].reshape((1, -1)))
         assert p.shape == (1, len(np.unique(y)))
예제 #10
0
def train_glmnet(train,
                 test,
                 save_path_pred,
                 save_path_model,
                 save_path_json,
                 n_cores=5):
    ln = LogitNet(alpha=0.5, n_splits=10, n_jobs=n_cores)
    # to sparse
    train_sparse = (csc_matrix(train[0]),
                    csc_matrix(train[1].astype(np.float64).reshape((-1, 1))))
    test_sparse = (csc_matrix(test[0]),
                   csc_matrix(test[1].astype(np.float64).reshape((-1, 1))))

    print("train the model")
    ln.fit(train_sparse[0], train[1])

    print("get predictions")
    y_pred = ln.predict_proba(test_sparse[0])[:, 1]
    auprc = cem.auprc(test[1], y_pred)
    auc = cem.auc(test[1], y_pred)

    # csv
    print("save csv")
    dt = pd.DataFrame({"y_true": test[1], "y_pred": y_pred})
    dt.to_csv(save_path_pred)

    # json
    print("save json")
    write_json({"auprc": auprc, "auc": auc}, save_path_json)
    # model
    print("save model")
    pickle.dump(ln, open(save_path_model, "wb"))
예제 #11
0
 def test_max_features(self):
     max_features = 5
     m = LogitNet(random_state=1, max_features=max_features)
     x, y = self.multinomial[3]
     m = m.fit(x, y)
     num_features = np.count_nonzero(m.coef_, axis=1)
     self.assertTrue(np.all(num_features <= max_features))
예제 #12
0
 def test_random_state_cv(self):
     random_state = 133
     m = LogitNet(random_state=random_state)
     x, y = self.binomial[0]
     m.fit(x, y)
     print(dir(m._cv))
     assert m._cv.random_state == random_state
예제 #13
0
 def test_random_state_cv(self):
     random_state = 133
     m = LogitNet(random_state=random_state)
     x, y = self.binomial[0]
     m.fit(x, y)
     print(dir(m._cv))
     assert m._cv.random_state == random_state
예제 #14
0
    def update(self, batch, batch_index):
        """Updates the inference state with a new batch and performs LFIRE.

        Parameters
        ----------
        batch: dict
        batch_index: int

        """
        # TODO: beautify this
        super(LFIRE, self).update(batch, batch_index)

        # Parse likelihood values
        likelihood = [
            batch[summary_name] for summary_name in self.summary_names
        ]
        likelihood = np.column_stack(likelihood)

        # Create training data
        X = np.vstack((likelihood, self.marginal))
        y = np.concatenate((np.ones(likelihood.shape[0]),
                            -1 * np.ones(self.marginal.shape[0])))

        # Logistic regression
        m = LogitNet(**self.logreg_config)
        m.fit(X, y)

        # Likelihood value
        log_likelihood_value = m.intercept_ + np.sum(
            np.multiply(m.coef_, self.observed))
        likelihood_value = np.exp(log_likelihood_value)

        # Joint prior value
        parameter_values = [
            batch[parameter_name] for parameter_name in self.parameter_names
        ]
        joint_prior_value = self.joint_prior.pdf(parameter_values)

        # Posterior value
        posterior_value = joint_prior_value * likelihood_value

        # Check if posterior value is non-finite
        if np.isinf(posterior_value):
            params = self.params_grid[batch_index]
            warnings.warn(
                f'Posterior value is not finite for parameters \
                          {self.parameter_names} = {params} and thus will be replaced with zero!',
                RuntimeWarning)
            posterior_value = 0
            for i, parameter_name in enumerate(self.parameter_names):
                self.state['infinity'][parameter_name] += [params[i]]

        # Update state dictionary
        self.state['posterior'][batch_index] = posterior_value
        self.state['lambda'][batch_index] = m.lambda_best_
        self.state['coef'][batch_index, :] = m.coef_
        self.state['intercept'][batch_index] = m.intercept_
        for parameter_name in self.parameter_names:
            self.state[parameter_name][batch_index] = batch[parameter_name]
예제 #15
0
 def test_coef_limits(self):
     x, y = self.binomial[0]
     lower_limits = np.repeat(-1, x.shape[1])
     upper_limits = 0
     m = LogitNet(lower_limits=lower_limits, upper_limits=upper_limits, random_state=69265, alpha=0)
     m = m.fit(x, y)
     assert(np.all(m.coef_ >= -1))
     assert(np.all(m.coef_ <= 0))
예제 #16
0
    def test_with_pandas_df(self):
        x, y = make_classification(random_state=1105)
        df = pd.DataFrame(x)
        df['y'] = y

        m = LogitNet(n_folds=3, random_state=123)
        m = m.fit(df.drop(['y'], axis=1), df.y)
        sanity_check_logistic(m, x)
예제 #17
0
    def test_with_pandas_df(self):
        x, y = make_classification(random_state=1105)
        df = pd.DataFrame(x)
        df['y'] = y

        m = LogitNet(n_splits=3, random_state=123)
        m = m.fit(df.drop(['y'], axis=1), df.y)
        sanity_check_logistic(m, x)
예제 #18
0
    def test_predict_without_cv(self):
        x, y = self.binomial[0]
        m = LogitNet(n_folds=0, random_state=399001)
        m = m.fit(x, y)

        # should not make prediction unless value is passed for lambda
        with self.assertRaises(ValueError):
            m.predict(x)
예제 #19
0
 def test_one_row_predict_proba_with_lambda(self):
     # One row to predict_proba along with lambdas should give 3D output
     m = LogitNet(random_state=42)
     lamb = [0.01, 0.02, 0.04, 0.1]
     for X, y in itertools.chain(self.binomial, self.multinomial):
         m.fit(X, y)
         p = m.predict_proba(X[0].reshape((1, -1)), lamb=lamb)
         assert p.shape == (1, len(np.unique(y)), len(lamb))
예제 #20
0
 def test_one_row_predict_proba_with_lambda(self):
     # One row to predict_proba along with lambdas should give 3D output
     m = LogitNet(random_state=42)
     lamb = [0.01, 0.02, 0.04, 0.1]
     for X, y in itertools.chain(self.binomial, self.multinomial):
         m.fit(X, y)
         p = m.predict_proba(X[0].reshape((1, -1)), lamb=lamb)
         assert p.shape == (1, len(np.unique(y)), len(lamb))
예제 #21
0
    def test_predict_without_cv(self):
        x, y = self.binomial[0]
        m = LogitNet(n_splits=0, random_state=399001)
        m = m.fit(x, y)

        # should not make prediction unless value is passed for lambda
        with self.assertRaises(ValueError):
            m.predict(x)
예제 #22
0
 def test_coef_limits(self):
     x, y = self.binomial[0]
     lower_limits = 0
     upper_limits = np.repeat(1, x.shape[1])
     m = LogitNet(lower_limits=lower_limits,
                  upper_limits=upper_limits,
                  random_state=69265)
     m = m.fit(x, y)
     assert (np.all(m.coef_) >= 0)
     assert (np.all(m.coef_) <= 1)
예제 #23
0
    def test_single_class_exception(self):
        x, y = self.binomial[0]
        y = np.ones_like(y)
        m = LogitNet()

        with self.assertRaises(ValueError) as e:
            m.fit(x, y)

        self.assertEqual("Training data need to contain at least 2 classes.",
                         str(e.exception))
예제 #24
0
    def test_single_class_exception(self):
        x, y = self.binomial[0]
        y = np.ones_like(y)
        m = LogitNet()

        with self.assertRaises(ValueError) as e:
            m.fit(x, y)

        self.assertEqual("Training data need to contain at least 2 classes.",
                         str(e.exception))
예제 #25
0
 def test_n_splits(self):
     x, y = self.binomial[0]
     for n in self.n_splits:
         m = LogitNet(n_splits=n, random_state=46657)
         if n > 0 and n < 3:
             with self.assertRaisesRegexp(ValueError,
                                          "n_splits must be at least 3"):
                 m = m.fit(x, y)
         else:
             m = m.fit(x, y)
             sanity_check_logistic(m, x)
예제 #26
0
 def test_n_folds(self):
     x, y = self.binomial[0]
     for n in self.n_folds:
         m = LogitNet(n_folds=n, random_state=46657)
         if n > 0 and n < 3:
             with self.assertRaisesRegexp(ValueError,
                                          "n_folds must be at least 3"):
                 m = m.fit(x, y)
         else:
             m = m.fit(x, y)
             sanity_check_logistic(m, x)
예제 #27
0
    def test_cv_scoring_multinomial(self):
        x, y = self.multinomial[0]
        for method in self.scoring:
            m = LogitNet(scoring=method, random_state=488881)

            if method in self.multinomial_scoring:
                m = m.fit(x, y)
                check_accuracy(y, m.predict(x), 0.65, scoring=method)
            else:
                with self.assertRaises(ValueError):
                    m.fit(x, y)
예제 #28
0
 def test_coef_limits(self):
     x, y = self.binomial[0]
     lower_limits = np.repeat(-1, x.shape[1])
     upper_limits = 0
     m = LogitNet(lower_limits=lower_limits,
                  upper_limits=upper_limits,
                  random_state=69265,
                  alpha=0)
     m = m.fit(x, y)
     assert (np.all(m.coef_ >= -1))
     assert (np.all(m.coef_ <= 0))
예제 #29
0
    def test_with_defaults(self):
        m = LogitNet(random_state=29341)
        for x, y in itertools.chain(self.binomial, self.multinomial):
            m = m.fit(x, y)
            sanity_check_logistic(m, x)

            # check selection of lambda_best
            assert m.lambda_best_inx_ <= m.lambda_max_inx_

            # check full path predict
            p = m.predict(x, lamb=m.lambda_path_)
            assert p.shape[-1] == m.lambda_path_.size
예제 #30
0
    def test_with_defaults(self):
        m = LogitNet(random_state=29341)
        for x, y in itertools.chain(self.binomial, self.multinomial):
            m = m.fit(x, y)
            sanity_check_logistic(m, x)

            # check selection of lambda_best
            ok_(m.lambda_best_inx_ <= m.lambda_max_inx_)

            # check full path predict
            p = m.predict(x, lamb=m.lambda_path_)
            eq_(p.shape[-1], m.lambda_path_.size)
예제 #31
0
    def test_use_sample_weights(self):
        x, y = self.multinomial[1]
        class_0_idx = np.where(y == 0)
        to_drop = class_0_idx[0][:-3]
        to_keep = np.ones(len(y), dtype=bool)
        to_keep[to_drop] = False
        y = y[to_keep]
        x = x[to_keep, :]
        sample_weight = class_weight.compute_sample_weight('balanced', y)
        sample_weight[0] = 0.

        unweighted = LogitNet(random_state=2, scoring='f1_micro')
        unweighted = unweighted.fit(x, y)
        unweighted_acc = f1_score(y,
                                  unweighted.predict(x),
                                  sample_weight=sample_weight,
                                  average='micro')

        weighted = LogitNet(random_state=2, scoring='f1_micro')
        weighted = weighted.fit(x, y, sample_weight=sample_weight)
        weighted_acc = f1_score(y,
                                weighted.predict(x),
                                sample_weight=sample_weight,
                                average='micro')

        self.assertTrue(weighted_acc >= unweighted_acc)
예제 #32
0
    def test_coef_interpolation(self):
        x, y = self.binomial[0]
        m = LogitNet(n_folds=0, random_state=561)
        m = m.fit(x, y)

        # predict for a value of lambda between two values on the computed path
        lamb_lo = m.lambda_path_[1]
        lamb_hi = m.lambda_path_[2]

        # a value not equal to one on the computed path
        lamb_mid = (lamb_lo + lamb_hi) / 2.0

        pred_lo = m.predict_proba(x, lamb=lamb_lo)
        pred_hi = m.predict_proba(x, lamb=lamb_hi)
        pred_mid = m.predict_proba(x, lamb=lamb_mid)

        self.assertFalse(np.allclose(pred_lo, pred_mid))
        self.assertFalse(np.allclose(pred_hi, pred_mid))
예제 #33
0
    def distance(self, d1, d2):
        # Extract summary statistics from the dataset
        s1 = self.statistics_calc.statistics(d1)
        s2 = self.statistics_calc.statistics(d2)

        # compute distnace between the statistics
        training_set_features = np.concatenate((s1, s2), axis=0)
        label_s1 = np.zeros(shape=(len(s1), 1))
        label_s2 = np.ones(shape=(len(s2), 1))
        training_set_labels = np.concatenate((label_s1, label_s2),
                                             axis=0).ravel()

        m = LogitNet(alpha=1, n_splits=10)
        m = m.fit(training_set_features, training_set_labels)
        distance = 2.0 * (m.cv_mean_score_[np.where(
            m.lambda_path_ == m.lambda_max_)[0][0]] - 0.5)

        return distance
예제 #34
0
    def test_coef_interpolation(self):
        x, y = self.binomial[0]
        m = LogitNet(n_splits=0, random_state=561)
        m = m.fit(x, y)

        # predict for a value of lambda between two values on the computed path
        lamb_lo = m.lambda_path_[1]
        lamb_hi = m.lambda_path_[2]

        # a value not equal to one on the computed path
        lamb_mid = (lamb_lo + lamb_hi) / 2.0

        pred_lo = m.predict_proba(x, lamb=lamb_lo)
        pred_hi = m.predict_proba(x, lamb=lamb_hi)
        pred_mid = m.predict_proba(x, lamb=lamb_mid)

        self.assertFalse(np.allclose(pred_lo, pred_mid))
        self.assertFalse(np.allclose(pred_hi, pred_mid))
예제 #35
0
    def distance(self, d1, d2):
        """Calculates the distance between two datasets.

        Parameters
        ----------
        d1: Python list
            Contains n1 data points.
        d2: Python list
            Contains n2 data points.

        Returns
        -------
        numpy.float
            The distance between the two input data sets.
        """
        s1, s2 = self._calculate_summary_stat(d1, d2)
        self.n_simulate = s1.shape[0]

        if not s2.shape[0] == self.n_simulate:
            raise RuntimeError(
                "The number of simulations in the two data sets should be the same in order for "
                "the classification accuracy implemented in PenLogReg to be a proper distance. Please "
                "check that `n_samples` in the `sample()` method for the sampler is equal to "
                "the number of datasets in the observations.")

        # compute distance between the statistics
        training_set_features = np.concatenate((s1, s2), axis=0)
        label_s1 = np.zeros(shape=(len(s1), 1))
        label_s2 = np.ones(shape=(len(s2), 1))
        training_set_labels = np.concatenate((label_s1, label_s2),
                                             axis=0).ravel()

        groups = np.repeat(np.arange(self.n_folds),
                           np.int(np.ceil(self.n_simulate / self.n_folds)))
        groups = groups[:self.n_simulate].tolist()
        groups += groups  # duplicate it as groups need to be defined for both datasets
        m = LogitNet(
            alpha=1,
            n_splits=self.n_folds)  # note we are not using random seed here!
        m = m.fit(training_set_features, training_set_labels, groups=groups)
        distance = 2.0 * (m.cv_mean_score_[np.where(
            m.lambda_path_ == m.lambda_max_)[0][0]] - 0.5)

        return distance
예제 #36
0
파일: approx_lhd.py 프로젝트: JBris/abcpy
    def loglikelihood(self, y_obs, y_sim):
        if not isinstance(y_obs, list):
            raise TypeError('Observed data is not of allowed types')

        if not isinstance(y_sim, list):
            raise TypeError('simulated data is not of allowed types')

        # Check whether y_obs is same as the stored dataset.
        if self.data_set is not None:
            # check that the the observations have the same length; if not, they can't be the same:
            if len(y_obs) != len(self.data_set):
                self.dataSame = False
            elif len(np.array(y_obs[0]).reshape(-1, )) == 1:
                self.dataSame = self.data_set == y_obs
            else:  # otherwise it fails when y_obs[0] is array
                self.dataSame = all(
                    [(np.array(self.data_set[i]) == np.array(y_obs[i])).all() for i in range(len(y_obs))])

        if self.stat_obs is None or self.dataSame is False:
            self.stat_obs = self.statistics_calc.statistics(y_obs)
            self.data_set = y_obs

        # Extract summary statistics from the simulated data
        stat_sim = self.statistics_calc.statistics(y_sim)
        if not stat_sim.shape[0] == self.n_simulate:
            raise RuntimeError("The number of samples in the reference data set is not the same as the number of "
                               "samples in the generated data. Please check that `n_samples` in the `sample()` method"
                               "for the sampler is equal to `n_simulate` in PenLogReg.")

        # Compute the approximate likelihood for the y_obs given theta
        y = np.append(np.zeros(self.n_simulate), np.ones(self.n_simulate))
        X = np.array(np.concatenate((stat_sim, self.ref_data_stat), axis=0))
        # define here groups for cross-validation:
        groups = np.repeat(np.arange(self.n_folds), np.int(np.ceil(self.n_simulate / self.n_folds)))
        groups = groups[:self.n_simulate].tolist()
        groups += groups  # duplicate it as groups need to be defined for both datasets
        m = LogitNet(alpha=1, n_splits=self.n_folds, max_iter=self.max_iter, random_state=self.seed, scoring="log_loss")
        m = m.fit(X, y, groups=groups)
        result = -np.sum((m.intercept_ + np.sum(np.multiply(m.coef_, self.stat_obs), axis=1)), axis=0)

        return result
예제 #37
0
    def test_lambda_clip_warning(self):
        x, y = self.binomial[0]
        m = LogitNet(n_folds=0, random_state=1729)
        m = m.fit(x, y)

        with self.assertWarns(RuntimeWarning):
            m.predict(x, lamb=m.lambda_path_[0] + 1)

        with self.assertWarns(RuntimeWarning):
            m.predict(x, lamb=m.lambda_path_[-1] - 1)
예제 #38
0
파일: glmnet.py 프로젝트: aychies/easyml-1
    def create_estimator(self):
        """
        Create an estimator.

        Creates an estimator depending on the family of regression.

        :return: A scikit-learn estimator.
        """
        if self.family == 'gaussian':
            estimator = ElasticNet(standardize=False, cut_point=0)
        elif self.family == 'binomial':
            estimator = LogitNet(standardize=False, cut_point=0)
        return estimator
예제 #39
0
파일: stats.py 프로젝트: arose13/rosey
def _parallel_permute_count_nonzero_penalised_coefs(xp, yp, lam_path,
                                                    penalties, norm_num,
                                                    is_regression):
    from glmnet import ElasticNet, LogitNet
    np.random.shuffle(yp)

    params = dict(alpha=norm_num, lambda_path=lam_path)
    pm = ElasticNet(**params) if is_regression else LogitNet(**params)
    pm.fit(xp, yp, relative_penalties=penalties)

    return np.sign(
        np.abs(np.squeeze(pm.coef_path_)) *
        vec_to_array(penalties)).sum(axis=0)
예제 #40
0
파일: stats.py 프로젝트: arose13/rosey
    def __init__(self,
                 penalty_free_indices=list(),
                 min_lambda_ratio=1e-3,
                 n_lambdas=250,
                 cv=10,
                 is_regression=True,
                 norm_num=1):
        from glmnet import ElasticNet, LogitNet

        if not (isinstance(penalty_free_indices, list)
                or isinstance(penalty_free_indices, np.ndarray)):
            raise ValueError('ols_indices must be a list or np.array')

        if is_regression:
            self.model = ElasticNet(norm_num,
                                    n_lambdas,
                                    min_lambda_ratio,
                                    n_splits=cv,
                                    n_jobs=cpu_count())
        else:
            self.model = LogitNet(norm_num,
                                  n_lambdas,
                                  min_lambda_ratio,
                                  n_splits=cv,
                                  n_jobs=cpu_count())

        self.norm_num = norm_num
        self.ols_idx = penalty_free_indices
        self.is_regression = is_regression

        self.n = None
        self.p = None
        self.coef_path = None
        self.lambdas = None
        self.fdr_grid = None
        self.fdr_analytic_grid = None
        self.n_nonzero_true_coefs = None
        self.mean_n_false_positive_coefs = None
예제 #41
0
    def test_lambda_clip_warning(self):
        x, y = self.binomial[0]
        m = LogitNet(n_splits=0, random_state=1729)
        m = m.fit(x, y)

        with self.assertWarns(RuntimeWarning):
            m.predict(x, lamb=m.lambda_path_[0] + 1)

        with self.assertWarns(RuntimeWarning):
            m.predict(x, lamb=m.lambda_path_[-1] - 1)
예제 #42
0
    def test_cv_scoring_multinomial(self):
        x, y = self.multinomial[0]
        for method in self.scoring:
            m = LogitNet(scoring=method, random_state=488881)

            if method in self.multinomial_scoring:
                m = m.fit(x, y)
                check_accuracy(y, m.predict(x), 0.65, scoring=method)
            else:
                with self.assertRaises(ValueError):
                    m.fit(x, y)
예제 #43
0
def avg_graphs(X, f_true, method):
    assert method in methods
    n_simulations = 10
    probs = [[1 - ff, ff] for ff in f_true]
    avg_square_bias, avg_variance, avg_mse = defaultdict(list), defaultdict(
        list), defaultdict(list)
    loop_list = np.arange(1, 101, 1) if method == 'knn' else np.exp(
        np.arange(-3, 7, 0.1))
    for simulation in range(n_simulations):
        y = [np.random.choice(2, p=prob) for prob in probs]
        for l in loop_list:
            if method == 'knn':
                model = KNeighborsClassifier(n_neighbors=l).fit(X.T, y)
            elif method == 'lasso':
                model = LogisticRegression(penalty='l1',
                                           solver='liblinear',
                                           C=l).fit(X.T, y)
            else:
                model = LogitNet(alpha=0, lambda_path=[l]).fit(X.T, y)
            f_hat = model.predict_proba(X.T)[:, 1]
            x_val = l if method == 'knn' else (np.count_nonzero(
                model.coef_) if method == 'lasso' else np.log(l))
            avg_square_bias[x_val].append(mean_squared_error(f_true, f_hat.T))
            avg_variance[x_val].append(np.mean(np.var(f_hat)))
            avg_mse[x_val].append(mean_squared_error(f_hat.T, y))

    asb_x, asb_y = reorder_dict(avg_square_bias)
    av_x, av_y = reorder_dict(avg_variance)
    am_x, am_y = reorder_dict(avg_mse)

    plt.plot(asb_x, asb_y, label='avg_square_bias')
    plt.plot(av_x, av_y, label='avg_variance')
    plt.plot(am_x, am_y, label='avg_MSE')
    plt.title(f"graphs for {method} predictor")
    plt.xlabel('k' if method == 'knn' else (
        'num of non-zero coefficients' if method == 'lasso' else 'log lambda'))
    plt.legend()
    plt.show()

    print(
        f"for {method}, the optimal MSE is {np.min(am_y)} and we get it when {x_label_dict[method]} is {am_x[np.argmin(am_y)]}"
    )
예제 #44
0
    def test_relative_penalties(self):
        x, y = self.binomial[0]
        p = x.shape[1]

        # m1 no relative penalties applied
        m1 = LogitNet(alpha=1)
        m1.fit(x, y)

        # find the nonzero indices from LASSO
        nonzero = np.nonzero(m1.coef_[0])

        # unpenalize those nonzero coefs
        penalty = np.repeat(1, p)
        penalty[nonzero] = 0

        # refit the model with the unpenalized coefs
        m2 = LogitNet(alpha=1)
        m2.fit(x, y, relative_penalties=penalty)

        # verify that the unpenalized coef ests exceed the penalized ones
        # in absolute value
        assert (np.all(np.abs(m1.coef_[0]) <= np.abs(m2.coef_[0])))
예제 #45
0
    def test_use_sample_weights(self):
        x, y = self.multinomial[1]
        class_0_idx = np.where(y==0)
        to_drop = class_0_idx[0][:-3]
        to_keep = np.ones(len(y), dtype=bool)
        to_keep[to_drop] = False
        y = y[to_keep]
        x = x[to_keep, :]
        sample_weight = class_weight.compute_sample_weight('balanced', y)
        sample_weight[0] = 0.

        unweighted = LogitNet(random_state=2, scoring='f1_micro')
        unweighted = unweighted.fit(x, y)
        unweighted_acc = f1_score(y, unweighted.predict(x), sample_weight=sample_weight,
                                  average='micro')

        weighted = LogitNet(random_state=2, scoring='f1_micro')
        weighted = weighted.fit(x, y, sample_weight=sample_weight)
        weighted_acc = f1_score(y, weighted.predict(x), sample_weight=sample_weight,
                                average='micro')

        self.assertTrue(weighted_acc >= unweighted_acc)
예제 #46
0
    def test_relative_penalties(self):
        x, y = self.binomial[0]
        p = x.shape[1]

        # m1 no relative penalties applied
        m1 = LogitNet(alpha=1)
        m1.fit(x, y)

        # find the nonzero indices from LASSO
        nonzero = np.nonzero(m1.coef_[0])

        # unpenalize those nonzero coefs
        penalty = np.repeat(1, p)
        penalty[nonzero] = 0

        # refit the model with the unpenalized coefs
        m2 = LogitNet(alpha=1)
        m2.fit(x, y, relative_penalties=penalty)

        # verify that the unpenalized coef ests exceed the penalized ones
        # in absolute value
        assert(np.all(np.abs(m1.coef_[0]) <= np.abs(m2.coef_[0])))
예제 #47
0
 def test_cv_scoring(self):
     x, y = self.binomial[0]
     for method in self.scoring:
         m = LogitNet(scoring=method, random_state=52633)
         m = m.fit(x, y)
         check_accuracy(y, m.predict(x), 0.85, scoring=method)
예제 #48
0
 def test_alphas(self):
     x, y = self.binomial[0]
     for alpha in self.alphas:
         m = LogitNet(alpha=alpha, random_state=41041)
         m = m.fit(x, y)
         check_accuracy(y, m.predict(x), 0.85, alpha=alpha)