コード例 #1
0
ファイル: InferenceTests.py プロジェクト: rap9430/snorkel
 def logistic_regression_large(self, alpha, mu):
     X, w0, gt = self._problem(n=1000,
                               nlf=100,
                               lf_prior=0.2,
                               lf_mean=0.7,
                               lf_sd=0.25,
                               nf=5000,
                               f_prior=0.05,
                               f_mean=0.6,
                               f_sd=0.25)
     l = LogReg()
     l.train(X=X,
             n_iter=1000,
             tol=1e-4,
             w0=w0,
             sample=False,
             alpha=alpha,
             mu=mu,
             rate=0.01,
             verbose=True)
     self.assertGreater(np.mean(np.sign(X.dot(l.w)) == gt), 0.85)
コード例 #2
0
ファイル: InferenceTests.py プロジェクト: rap9430/snorkel
 def test_logistic_regression_sample(self):
     print("Running logistic regression test with sparse operations")
     X, w0, gt = self._problem(n=500,
                               nlf=75,
                               lf_prior=0.3,
                               lf_mean=0.7,
                               lf_sd=0.25,
                               nf=1000,
                               f_prior=0.1,
                               f_mean=0.6,
                               f_sd=0.25)
     mu = 1e-4
     l_d = LogReg()
     l_d.train(X=X,
               n_iter=2500,
               tol=1e-4,
               w0=w0,
               sample=False,
               alpha=self.ridge,
               mu=mu,
               rate=0.01,
               verbose=True)
     l_s = LogReg()
     l_s.train(X=X,
               n_iter=2500,
               tol=1e-4,
               w0=w0,
               sample=True,
               n_samples=200,
               alpha=self.ridge,
               mu=mu,
               rate=0.01,
               verbose=True)
     # Check sample marginals are close to deterministic solutio
     ld, ls = odds_to_prob(X.dot(l_d.w)), odds_to_prob(X.dot(l_s.w))
     self.assertLessEqual(
         np.linalg.norm(ld - ls) / np.linalg.norm(ld), 0.05)
コード例 #3
0
gen_model.train(L_train, n_iter=1000, rate=1e-5)

from snorkel.learning import NaiveBayes

gen_model = NaiveBayes()
gen_model.train(L_train, n_iter=1000, rate=1e-5)
train_marginals = gen_model.marginals(L_train)

from snorkel.learning import LogReg
from snorkel.learning_utils import RandomSearch, ListParameter, RangeParameter

iter_param = ListParameter('n_iter', [250, 500, 1000, 2000])
rate_param = RangeParameter('rate', 1e-4, 1e-2, step=0.75, log_base=10)
reg_param  = RangeParameter('mu', 1e-8, 1e-2, step=1, log_base=10)

disc_model = LogReg()

%load_ext autoreload
%autoreload 2
%matplotlib inline

from snorkel import SnorkelSession
session = SnorkelSession()
from snorkel.models import CandidateSet
from snorkel.models import candidate_subclass
from snorkel.annotations import FeatureManager

feature_manager = FeatureManager()
entity = candidate_subclass('entity', ['entity1', 'entity2'])
dev = session.query(CandidateSet).filter(CandidateSet.name == 'Protein1 Development Candidates').one()
%time F_dev = feature_manager.update(session, dev, 'Train1 Features', False)
コード例 #4
0
#%time F_train = feature_manager.load(session, train, 'Train Features')						
						
from snorkel.annotations import LabelManager

label_manager = LabelManager()

%time L_train = label_manager.create(session, c, 'LF Labels', f=LFs)
L_train

from snorkel.learning import NaiveBayes

gen_model = NaiveBayes()
gen_model.train(L_train, n_iter=1000, rate=1e-5)


gen_model.save(session, 'Generative Params')
train_marginals = gen_model.marginals(L_train)
gen_model.w

from snorkel.learning import LogReg
from snorkel.learning_utils import RandomSearch, ListParameter, RangeParameter

iter_param = ListParameter('n_iter', [250, 500, 1000, 2000])
rate_param = RangeParameter('rate', 1e-4, 1e-2, step=0.75, log_base=10)
reg_param  = RangeParameter('mu', 1e-8, 1e-2, step=1, log_base=10)

disc_model = LogReg()

%time F_dev = feature_manager.update(session, dev, 'Train Features', False)

searcher = RandomSearch(disc_model, F_train, train_marginals, 10, iter_param, rate_param, reg_param)