def main():
    prior = .5
    x, y, x_t, y_t = gen_twonorm_pumil(n_p=30, n_u=200, prior_u=prior, n_t=100)
    print(x)
    print(len(x))
    print(type(x))
    # print(x[0])
    # print(type(x[0]))
    # print(y)
    # print(type(y))
    breakpoint()
    param_grid = {
        'prior': [prior],
        'lam': np.logspace(-3, 1, 5),
        'basis': ['minimax']
    }
    lambda_list = np.logspace(-3, 1, 5)
    clf = GridSearchCV(estimator=pumil_mr.PUMIL_SL(),
                       param_grid=param_grid,
                       cv=5,
                       n_jobs=-1)
    clf.fit(x, y)
    y_h = clf.predict(x_t)
    print(y_h)
    print(y_t)
    err = 100 * bin_clf_err(y_h, y_t, prior)
    print("MR: {}%".format(err))
Exemple #2
0
 def test_fit(self):
     prior = .5
     x, y, x_t, y_t = gen_twonorm_pumil(n_p=30, n_u=200, 
                                        prior_u=prior, n_t=100)
     pu_sl = pumil_mr.PUMIL_SL(prior, basis='minimax')
     pu_sl.fit(x, y)
     y_h = pu_sl.predict(x_t)
     err = bin_clf_err(y_h, y_t, prior)
     self.assertLess(err, .2)
Exemple #3
0
 def test_cv(self):
     prior = .5
     x, y, x_t, y_t = gen_twonorm_pumil(n_p=30, n_u=200, 
                                        prior_u=prior, n_t=100)
     param_grid = {'prior': [prior], 
                   'lam': np.logspace(-3, 1, 5), 
                   'basis': ['minimax']}
     lambda_list = np.logspace(-3, 1, 5)
     clf = GridSearchCV(estimator=pumil_mr.PUMIL_SL(), 
                        param_grid=param_grid,
                        cv=5, n_jobs=-1)
     clf.fit(x, y)
     y_h = clf.predict(x_t)
     err = bin_clf_err(y_h, y_t, prior)
     self.assertLess(err, .2)
def main():
    prior = .5
    x, y, x_t, y_t = gen_twonorm_pu(n_p=30, n_u=200, prior_u=prior, n_t=100)
    print(x)
    print(y)
    lambda_list = np.logspace(-3, 1, 5)
    param_grid = {'prior': [prior], 'lam': lambda_list, 'basis': ['lm']}
    clf = GridSearchCV(estimator=pu_mr.PU_SL(),
                       param_grid=param_grid,
                       cv=5,
                       n_jobs=-1)
    clf.fit(x, y)
    y_h = clf.predict(x_t)
    print(y_t)
    print(y_h)
    err = 100 * bin_clf_err(y_h, y_t, prior)
    print("MR: {:.2f}%".format(err))
Exemple #5
0
    priorh = cpe(x_l, y_l, x_u)
    clf = PNU_SL(prior=priorh, basis='lm')
    params = {'eta': eta_list, 'lam': [.1]}
    etah = calc_etab(np.sum(y == +1), np.sum(y == -1), priorh)
    clf = GridSearchCV(estimator=clf,
                       param_grid=params,
                       scoring=make_scorer(pnu_risk, prior=priorh, eta=etah),
                       cv=3,
                       n_jobs=-1)
    timer.tic("Start PNU_SL")
    clf.fit(x, y)
    timer.toc()
    y_h = clf.predict(x_t)
    print(y_h)
    print(y_t)
    err1 = 100 * bin_clf_err(y_h, y_t, prior=prior_u)
    print("Error: {:.2f}\n".format(err1))

    timer.tic("Start PNU_SL_FastCV")
    clf2 = PNU_SL_FastCV(x,
                         y,
                         priorh,
                         eta_list,
                         lambda_list=[.1],
                         n_fold=3,
                         basis='lm',
                         nargout=1)
    timer.toc()
    y_h = clf2(x_t)
    err2 = 100 * bin_clf_err(y_h, y_t, prior=prior_u)
    print("Error: {:.2f}".format(err2))
    n_trial = 20

    best_err = np.inf
    errs1 = np.empty(n_trial)
    errs2 = np.empty((n_trial, len(eta_list)))
    priors = np.empty(n_trial)
    for ite in range(n_trial):
        x, y, x_t, y_t = gen_twonorm_ssl(n_l, prior_l, n_u, prior_u, n_t)
        x_l, y_l, x_u = x[y != 0, :], y[y != 0], x[y == 0, :]
        priorh = cpe(x_l, y_l, x_u)
        clf = PNU_SL(prior=priorh, lam=.1, basis='lm')
        params = {'eta': eta_list}
        clf = GridSearchCV(estimator=clf, param_grid=params, cv=2) #, n_jobs=-1)
        clf.fit(x, y)
        y_h = clf.predict(x_t)
        errs1[ite] = 100*bin_clf_err(y_h, y_t, prior=prior_u)
        if errs1[ite] < best_err:
            best_err = errs1[ite]
            best_w = clf.best_estimator_.coef_
            best_x, best_y = x, y

        for ite_eta, eta in enumerate(eta_list):
            clf = PNU_SL(prior=priorh, eta=eta, lam=.1, basis='lm')
            clf.fit(x, y)
            y_h = clf.predict(x_t)
            errs2[ite, ite_eta] = 100*bin_clf_err(y_h, y_t, prior=prior_u)
                                      
        priors[ite] = priorh

    print("Average of misclassification rates: {:.1f} ({:.2f})".format(
        np.mean(errs1), np.std(errs1)/np.sqrt(n_trial)))