def test2(): # aggrevate print print '# test sequence labeler on mod data with AggreVaTe' n_types = 10 n_labels = 4 data = macarico.util.make_sequence_mod_data(100, 5, n_types, n_labels) data = [Example(x, y, n_labels) for x, y in data] tRNN = TransitionRNN( [RNNFeatures(n_types)], [AttendAt()], n_labels, ) policy = LinearPolicy(tRNN, n_labels) p_rollin_ref = stochastic(ExponentialAnnealing(0.99)) optimizer = torch.optim.Adam(policy.parameters(), lr=0.01) macarico.util.trainloop( training_data=data[:len(data) // 2], dev_data=data[len(data) // 2:], policy=policy, Learner=lambda: AggreVaTe(HammingLossReference(), policy, p_rollin_ref ), losses=HammingLoss(), optimizer=optimizer, run_per_epoch=[p_rollin_ref.step], n_epochs=4, train_eval_skip=1, )
def test0(): print print '# test sequence labeler on mod data with DAgger' n_types = 10 n_labels = 4 data = [ Example(x, y, n_labels) for x, y in macarico.util.make_sequence_mod_data( 100, 5, n_types, n_labels) ] tRNN = Actor([RNNFeatures(n_types, output_field='mytok_rnn')], [AttendAt(field='mytok_rnn')], n_labels) policy = LinearPolicy(tRNN, n_labels) p_rollin_ref = stochastic(ExponentialAnnealing(0.99)) optimizer = torch.optim.Adam(policy.parameters(), lr=0.01) macarico.util.trainloop( training_data=data[:len(data) // 2], dev_data=data[len(data) // 2:], policy=policy, Learner=lambda: DAgger(HammingLossReference(), policy, p_rollin_ref), losses=HammingLoss(), optimizer=optimizer, run_per_epoch=[p_rollin_ref.step], n_epochs=4, train_eval_skip=1, )
def test1(): n_types = 10 n_labels = 4 print print '# test sequence labeler on mod data with LOLS' data = macarico.util.make_sequence_mod_data(20, 6, n_types, n_labels) data = [Example(x, y, n_labels) for x, y in data] tRNN = TransitionRNN([RNNFeatures(n_types)], [AttendAt()], n_labels) policy = LinearPolicy(tRNN, n_labels) optimizer = torch.optim.Adam(policy.parameters(), lr=0.01) p_rollin_ref = stochastic(ExponentialAnnealing(0.9)) p_rollout_ref = stochastic(ExponentialAnnealing(0.9)) macarico.util.trainloop( training_data=data[:len(data) // 2], dev_data=data[len(data) // 2:], policy=policy, learning_alg=lambda ex: LOLS.lols(ex, HammingLoss, HammingLossReference(), policy, p_rollin_ref, p_rollout_ref), losses=HammingLoss(), optimizer=optimizer, run_per_epoch=[p_rollin_ref.step, p_rollout_ref.step], train_eval_skip=1, )
def test_wsj(): print print '# test on wsj subset' from macarico.data import nlp_data tr,de,te,vocab,label_id = \ nlp_data.read_wsj_pos('data/wsj.pos', n_tr=50, n_de=50, n_te=0) n_types = len(vocab) n_labels = len(label_id) print 'n_train: %s, n_dev: %s, n_test: %s' % (len(tr), len(de), len(te)) print 'n_types: %s, n_labels: %s' % (n_types, n_labels) tRNN = TransitionRNN([RNNFeatures(n_types, rnn_type='RNN')], [AttendAt()], n_labels) policy = LinearPolicy(tRNN, n_labels) p_rollin_ref = stochastic(ExponentialAnnealing(0.9)) optimizer = torch.optim.Adam(policy.parameters(), lr=0.01) macarico.util.trainloop( training_data=tr, dev_data=de, policy=policy, Learner=lambda: DAgger(HammingLossReference(), policy, p_rollin_ref), # Learner = lambda: MaximumLikelihood(HammingLossReference(), policy), losses=HammingLoss(), optimizer=optimizer, run_per_epoch=[p_rollin_ref.step], n_epochs=10, # train_eval_skip = None, )
def test1(learning_method, exploration): print print '# testing learning_method=%d exploration=%d' % (learning_method, exploration) print n_types = 10 n_labels = 4 data = macarico.util.make_sequence_mod_data(100, 6, n_types, n_labels) data = [Example(x, y, n_labels) for x, y in data] tRNN = TransitionRNN([RNNFeatures(n_types)], [AttendAt()], n_labels) policy = LinearPolicy(tRNN, n_labels) optimizer = torch.optim.Adam(policy.parameters(), lr=0.001) p_rollin_ref = stochastic(ExponentialAnnealing(0.9)) p_rollout_ref = stochastic(ExponentialAnnealing(0.99999)) macarico.util.trainloop( training_data=data[:len(data) // 2], dev_data=data[len(data) // 2:], policy=policy, Learner=lambda: BanditLOLS( HammingLossReference(), policy, p_rollin_ref, p_rollout_ref, learning_method, # LEARN_IPS, LEARN_DR, LEARN_BIASED exploration, ), losses=HammingLoss(), optimizer=optimizer, run_per_epoch=[p_rollin_ref.step, p_rollout_ref.step], train_eval_skip=10, )
def test_restore(n_types, n_labels, data, model): actor = TransitionRNN([RNNFeatures(n_types)], [AttendAt()], n_labels) policy = LinearPolicy(actor, n_labels) print 'evaluating new model: %g' % \ macarico.util.evaluate(data, policy, HammingLoss()) policy.load_state_dict(model) print 'evaluating restored model: %g' % \ macarico.util.evaluate(data, policy, HammingLoss())
def test1(use_bootstrap): n_types = 10 n_labels = 4 print print '# test sequence labeler on mod data with Reslope and', ( 'bootstrap' if use_bootstrap else 'boltzmann'), 'exploration' data = macarico.util.make_sequence_mod_data(3000, 6, n_types, n_labels) data = [Example(x, y, n_labels) for x, y in data] if not use_bootstrap: tRNN = TransitionRNN([RNNFeatures(n_types)], [AttendAt()], n_labels) policy = LinearPolicy(tRNN, n_labels) else: rnns = [ TransitionRNN([RNNFeatures(n_types)], [AttendAt()], n_labels, h_name='h%d' % i) for i in xrange(5) ] policy = BootstrapPolicy(rnns, n_labels) optimizer = torch.optim.Adam(policy.parameters(), lr=0.01) p_ref = stochastic(ExponentialAnnealing(0.9)) macarico.util.trainloop( training_data = data[:2048], dev_data = data[2048:], policy = policy, Learner = lambda: Reslope(HammingLossReference(), policy, p_ref, exploration=BanditLOLS.EXPLORE_BOOTSTRAP if use_bootstrap else \ BanditLOLS.EXPLORE_BOLTZMANN ), losses = HammingLoss(), optimizer = optimizer, run_per_epoch = [p_ref.step], train_eval_skip = 1, bandit_evaluation = True, n_epochs = 1, )
def test1(learning_method, exploration): print print '# testing learning_method=%d exploration=%d' % (learning_method, exploration) print n_types = 10 n_labels = 2 data = macarico.util.make_sequence_mod_data(100, 1, n_types, n_labels) data = [Example(x, y, n_labels) for x, y in data] bag_size = 5 tRNN = [ TransitionRNN([RNNFeatures(n_types)], [AttendAt()], n_labels) for i in range(bag_size) ] policy = BootstrapPolicy(tRNN, n_labels) #policy = LinearPolicy(tRNN[0], n_labels) #print 'policy=', policy #print 'parameters=', list(policy.parameters()) optimizer = torch.optim.Adam(policy.parameters(), lr=0.01) p_rollin_ref = stochastic(ExponentialAnnealing(0.9)) p_rollout_ref = stochastic(ExponentialAnnealing(0.99999)) macarico.util.trainloop( training_data=data[:len(data) // 2], dev_data=data[len(data) // 2:], policy=policy, Learner=lambda: BanditLOLS( HammingLossReference(), policy, p_rollin_ref, p_rollout_ref, learning_method, exploration, ), losses=HammingLoss(), optimizer=optimizer, run_per_batch=[p_rollin_ref.step, p_rollout_ref.step], train_eval_skip=1, n_epochs=2, )
def run_train(n_types, n_labels, data): actor = TransitionRNN([RNNFeatures(n_types)], [AttendAt()], n_labels) policy = LinearPolicy(actor, n_labels) print 'training' _, model = macarico.util.trainloop( training_data=data[:len(data) // 2], dev_data=data[len(data) // 2:], policy=policy, Learner=lambda: MaximumLikelihood(HammingLossReference(), policy), losses=HammingLoss(), optimizer=torch.optim.Adam(policy.parameters(), lr=0.01), n_epochs=2, train_eval_skip=1, returned_parameters='best', ) print 'evaluating learned model: %g' % \ macarico.util.evaluate(data, policy, HammingLoss()) policy.load_state_dict(model) print 'evaluating learned model: %g' % \ macarico.util.evaluate(data, policy, HammingLoss()) return model
def test1(task=0, LEARNER=LearnerOpts.DAGGER): print print 'Running test 1 (v%d) with learner=%s' % (task, LEARNER) print '=======================================================' if task == 0: print 'Sequence reversal task, easy version' data = macarico.util.make_sequence_reversal_data(100, 5, 5) foci = [AttendAt(lambda s: s.N - s.n - 1)] elif task == 1: print 'Sequence reversal task, hard version' data = macarico.util.make_sequence_reversal_data(1000, 5, 5) foci = [AttendAt()] elif task == 2: print 'Sequence reversal task, multi-focus version' data = macarico.util.make_sequence_reversal_data(100, 5, 5) foci = [AttendAt(), AttendAt(lambda s: s.N - s.n - 1)] elif task == 3: print 'Memoryless task, add-one mod K' data = macarico.util.make_sequence_mod_data(50, 5, 10, 3) foci = [AttendAt()] elif task == 4: print 'Matti-style data' data = make_matti_data(1000, 20, 2, 0.05) foci = [AttendAt()] n_types = 1 + max({x for X, _ in data for x in X}) n_labels = 1 + max({y for _, Y in data for y in Y}) data = [Example(x, y, n_labels) for x, y in data] random.shuffle(data) m = len(data) // 2 train = data[:m] dev = data[m:] print 'n_train: %s, n_dev: %s' % (len(train), len(dev)) print 'n_types: %s, n_labels: %s' % (n_types, n_labels) print 'learner:', LEARNER print tRNN = Actor([RNNFeatures(n_types)], foci, n_labels) policy = LinearPolicy(tRNN, n_labels) baseline = EWMA(0.8) p_rollin_ref = stochastic(ExponentialAnnealing(0.5)) p_rollout_ref = stochastic(ExponentialAnnealing(0.5)) if LEARNER == LearnerOpts.AC: from macarico.lts.reinforce import AdvantageActorCritic, LinearValueFn baseline = LinearValueFn(policy.features) policy.vfa = baseline # adds params to policy via nn.module optimizer = torch.optim.Adam(policy.parameters(), lr=0.01) if LEARNER == LearnerOpts.DAGGER: learner = lambda: DAgger(HammingLossReference(), policy, p_rollin_ref) elif LEARNER == LearnerOpts.TWISTED: learner = lambda: TwistedDAgger(HammingLossReference(), policy, p_rollin_ref) elif LEARNER == LearnerOpts.MAXLIK: learner = lambda: MaximumLikelihood(HammingLossReference(), policy) elif LEARNER == LearnerOpts.AC: learner = lambda: AdvantageActorCritic(policy, baseline) elif LEARNER == LearnerOpts.REINFORCE: learner = lambda: Reinforce(policy, baseline) elif LEARNER == LearnerOpts.BANDITLOLS: learner = lambda: BanditLOLS(HammingLossReference( ), policy, p_rollin_ref, p_rollout_ref, BanditLOLS.LEARN_DR, BanditLOLS .EXPLORE_UNIFORM, baseline) macarico.util.trainloop( training_data=train, dev_data=dev, policy=policy, Learner=learner, losses=HammingLoss(), optimizer=optimizer, run_per_epoch=[p_rollin_ref.step, p_rollout_ref.step], n_epochs=10, train_eval_skip=1, )