def logistic_regression_large(self, alpha, mu): X, w0, gt = self._problem(n=1000, nlf=100, lf_prior=0.2, lf_mean=0.7, lf_sd=0.25, nf=5000, f_prior=0.05, f_mean=0.6, f_sd=0.25) l = LogReg() l.train(X=X, n_iter=1000, tol=1e-4, w0=w0, sample=False, alpha=alpha, mu=mu, rate=0.01, verbose=True) self.assertGreater(np.mean(np.sign(X.dot(l.w)) == gt), 0.85)
def test_logistic_regression_sample(self): print("Running logistic regression test with sparse operations") X, w0, gt = self._problem(n=500, nlf=75, lf_prior=0.3, lf_mean=0.7, lf_sd=0.25, nf=1000, f_prior=0.1, f_mean=0.6, f_sd=0.25) mu = 1e-4 l_d = LogReg() l_d.train(X=X, n_iter=2500, tol=1e-4, w0=w0, sample=False, alpha=self.ridge, mu=mu, rate=0.01, verbose=True) l_s = LogReg() l_s.train(X=X, n_iter=2500, tol=1e-4, w0=w0, sample=True, n_samples=200, alpha=self.ridge, mu=mu, rate=0.01, verbose=True) # Check sample marginals are close to deterministic solutio ld, ls = odds_to_prob(X.dot(l_d.w)), odds_to_prob(X.dot(l_s.w)) self.assertLessEqual( np.linalg.norm(ld - ls) / np.linalg.norm(ld), 0.05)
gen_model.train(L_train, n_iter=1000, rate=1e-5) from snorkel.learning import NaiveBayes gen_model = NaiveBayes() gen_model.train(L_train, n_iter=1000, rate=1e-5) train_marginals = gen_model.marginals(L_train) from snorkel.learning import LogReg from snorkel.learning_utils import RandomSearch, ListParameter, RangeParameter iter_param = ListParameter('n_iter', [250, 500, 1000, 2000]) rate_param = RangeParameter('rate', 1e-4, 1e-2, step=0.75, log_base=10) reg_param = RangeParameter('mu', 1e-8, 1e-2, step=1, log_base=10) disc_model = LogReg() %load_ext autoreload %autoreload 2 %matplotlib inline from snorkel import SnorkelSession session = SnorkelSession() from snorkel.models import CandidateSet from snorkel.models import candidate_subclass from snorkel.annotations import FeatureManager feature_manager = FeatureManager() entity = candidate_subclass('entity', ['entity1', 'entity2']) dev = session.query(CandidateSet).filter(CandidateSet.name == 'Protein1 Development Candidates').one() %time F_dev = feature_manager.update(session, dev, 'Train1 Features', False)
#%time F_train = feature_manager.load(session, train, 'Train Features') from snorkel.annotations import LabelManager label_manager = LabelManager() %time L_train = label_manager.create(session, c, 'LF Labels', f=LFs) L_train from snorkel.learning import NaiveBayes gen_model = NaiveBayes() gen_model.train(L_train, n_iter=1000, rate=1e-5) gen_model.save(session, 'Generative Params') train_marginals = gen_model.marginals(L_train) gen_model.w from snorkel.learning import LogReg from snorkel.learning_utils import RandomSearch, ListParameter, RangeParameter iter_param = ListParameter('n_iter', [250, 500, 1000, 2000]) rate_param = RangeParameter('rate', 1e-4, 1e-2, step=0.75, log_base=10) reg_param = RangeParameter('mu', 1e-8, 1e-2, step=1, log_base=10) disc_model = LogReg() %time F_dev = feature_manager.update(session, dev, 'Train Features', False) searcher = RandomSearch(disc_model, F_train, train_marginals, 10, iter_param, rate_param, reg_param)