def main(): prior = .5 x, y, x_t, y_t = gen_twonorm_pumil(n_p=30, n_u=200, prior_u=prior, n_t=100) print(x) print(len(x)) print(type(x)) # print(x[0]) # print(type(x[0])) # print(y) # print(type(y)) breakpoint() param_grid = { 'prior': [prior], 'lam': np.logspace(-3, 1, 5), 'basis': ['minimax'] } lambda_list = np.logspace(-3, 1, 5) clf = GridSearchCV(estimator=pumil_mr.PUMIL_SL(), param_grid=param_grid, cv=5, n_jobs=-1) clf.fit(x, y) y_h = clf.predict(x_t) print(y_h) print(y_t) err = 100 * bin_clf_err(y_h, y_t, prior) print("MR: {}%".format(err))
def test_fit(self): prior = .5 x, y, x_t, y_t = gen_twonorm_pumil(n_p=30, n_u=200, prior_u=prior, n_t=100) pu_sl = pumil_mr.PUMIL_SL(prior, basis='minimax') pu_sl.fit(x, y) y_h = pu_sl.predict(x_t) err = bin_clf_err(y_h, y_t, prior) self.assertLess(err, .2)
def test_cv(self): prior = .5 x, y, x_t, y_t = gen_twonorm_pumil(n_p=30, n_u=200, prior_u=prior, n_t=100) param_grid = {'prior': [prior], 'lam': np.logspace(-3, 1, 5), 'basis': ['minimax']} lambda_list = np.logspace(-3, 1, 5) clf = GridSearchCV(estimator=pumil_mr.PUMIL_SL(), param_grid=param_grid, cv=5, n_jobs=-1) clf.fit(x, y) y_h = clf.predict(x_t) err = bin_clf_err(y_h, y_t, prior) self.assertLess(err, .2)
def main(): prior = .5 x, y, x_t, y_t = gen_twonorm_pu(n_p=30, n_u=200, prior_u=prior, n_t=100) print(x) print(y) lambda_list = np.logspace(-3, 1, 5) param_grid = {'prior': [prior], 'lam': lambda_list, 'basis': ['lm']} clf = GridSearchCV(estimator=pu_mr.PU_SL(), param_grid=param_grid, cv=5, n_jobs=-1) clf.fit(x, y) y_h = clf.predict(x_t) print(y_t) print(y_h) err = 100 * bin_clf_err(y_h, y_t, prior) print("MR: {:.2f}%".format(err))
priorh = cpe(x_l, y_l, x_u) clf = PNU_SL(prior=priorh, basis='lm') params = {'eta': eta_list, 'lam': [.1]} etah = calc_etab(np.sum(y == +1), np.sum(y == -1), priorh) clf = GridSearchCV(estimator=clf, param_grid=params, scoring=make_scorer(pnu_risk, prior=priorh, eta=etah), cv=3, n_jobs=-1) timer.tic("Start PNU_SL") clf.fit(x, y) timer.toc() y_h = clf.predict(x_t) print(y_h) print(y_t) err1 = 100 * bin_clf_err(y_h, y_t, prior=prior_u) print("Error: {:.2f}\n".format(err1)) timer.tic("Start PNU_SL_FastCV") clf2 = PNU_SL_FastCV(x, y, priorh, eta_list, lambda_list=[.1], n_fold=3, basis='lm', nargout=1) timer.toc() y_h = clf2(x_t) err2 = 100 * bin_clf_err(y_h, y_t, prior=prior_u) print("Error: {:.2f}".format(err2))
n_trial = 20 best_err = np.inf errs1 = np.empty(n_trial) errs2 = np.empty((n_trial, len(eta_list))) priors = np.empty(n_trial) for ite in range(n_trial): x, y, x_t, y_t = gen_twonorm_ssl(n_l, prior_l, n_u, prior_u, n_t) x_l, y_l, x_u = x[y != 0, :], y[y != 0], x[y == 0, :] priorh = cpe(x_l, y_l, x_u) clf = PNU_SL(prior=priorh, lam=.1, basis='lm') params = {'eta': eta_list} clf = GridSearchCV(estimator=clf, param_grid=params, cv=2) #, n_jobs=-1) clf.fit(x, y) y_h = clf.predict(x_t) errs1[ite] = 100*bin_clf_err(y_h, y_t, prior=prior_u) if errs1[ite] < best_err: best_err = errs1[ite] best_w = clf.best_estimator_.coef_ best_x, best_y = x, y for ite_eta, eta in enumerate(eta_list): clf = PNU_SL(prior=priorh, eta=eta, lam=.1, basis='lm') clf.fit(x, y) y_h = clf.predict(x_t) errs2[ite, ite_eta] = 100*bin_clf_err(y_h, y_t, prior=prior_u) priors[ite] = priorh print("Average of misclassification rates: {:.1f} ({:.2f})".format( np.mean(errs1), np.std(errs1)/np.sqrt(n_trial)))