Beispiel #1
0
 def test_logistic_regression_sample(self):
     print("Running logistic regression test with sparse operations")
     X, w0, gt = self._problem(n=500,
                               nlf=75,
                               lf_prior=0.3,
                               lf_mean=0.7,
                               lf_sd=0.25,
                               nf=1000,
                               f_prior=0.1,
                               f_mean=0.6,
                               f_sd=0.25)
     mu = 1e-4
     l_d = LogReg()
     l_d.train(X=X,
               n_iter=2500,
               tol=1e-4,
               w0=w0,
               sample=False,
               alpha=self.ridge,
               mu=mu,
               rate=0.01,
               verbose=True)
     l_s = LogReg()
     l_s.train(X=X,
               n_iter=2500,
               tol=1e-4,
               w0=w0,
               sample=True,
               n_samples=200,
               alpha=self.ridge,
               mu=mu,
               rate=0.01,
               verbose=True)
     # Check sample marginals are close to deterministic solutio
     ld, ls = odds_to_prob(X.dot(l_d.w)), odds_to_prob(X.dot(l_s.w))
     self.assertLessEqual(
         np.linalg.norm(ld - ls) / np.linalg.norm(ld), 0.05)
Beispiel #2
0
 def logistic_regression_large(self, alpha, mu):
     X, w0, gt = self._problem(n=1000,
                               nlf=100,
                               lf_prior=0.2,
                               lf_mean=0.7,
                               lf_sd=0.25,
                               nf=5000,
                               f_prior=0.05,
                               f_mean=0.6,
                               f_sd=0.25)
     l = LogReg()
     l.train(X=X,
             n_iter=1000,
             tol=1e-4,
             w0=w0,
             sample=False,
             alpha=alpha,
             mu=mu,
             rate=0.01,
             verbose=True)
     self.assertGreater(np.mean(np.sign(X.dot(l.w)) == gt), 0.85)
Beispiel #3
0
gen_model.train(L_train, n_iter=1000, rate=1e-5)

from snorkel.learning import NaiveBayes

gen_model = NaiveBayes()
gen_model.train(L_train, n_iter=1000, rate=1e-5)
train_marginals = gen_model.marginals(L_train)

from snorkel.learning import LogReg
from snorkel.learning_utils import RandomSearch, ListParameter, RangeParameter

iter_param = ListParameter('n_iter', [250, 500, 1000, 2000])
rate_param = RangeParameter('rate', 1e-4, 1e-2, step=0.75, log_base=10)
reg_param  = RangeParameter('mu', 1e-8, 1e-2, step=1, log_base=10)

disc_model = LogReg()

%load_ext autoreload
%autoreload 2
%matplotlib inline

from snorkel import SnorkelSession
session = SnorkelSession()
from snorkel.models import CandidateSet
from snorkel.models import candidate_subclass
from snorkel.annotations import FeatureManager

feature_manager = FeatureManager()
entity = candidate_subclass('entity', ['entity1', 'entity2'])
dev = session.query(CandidateSet).filter(CandidateSet.name == 'Protein1 Development Candidates').one()
%time F_dev = feature_manager.update(session, dev, 'Train1 Features', False)