def test1(learning_method, exploration): print print '# testing learning_method=%d exploration=%d' % (learning_method, exploration) print n_types = 10 n_labels = 4 data = macarico.util.make_sequence_mod_data(100, 6, n_types, n_labels) data = [Example(x, y, n_labels) for x, y in data] tRNN = TransitionRNN([RNNFeatures(n_types)], [AttendAt()], n_labels) policy = LinearPolicy(tRNN, n_labels) optimizer = torch.optim.Adam(policy.parameters(), lr=0.001) p_rollin_ref = stochastic(ExponentialAnnealing(0.9)) p_rollout_ref = stochastic(ExponentialAnnealing(0.99999)) macarico.util.trainloop( training_data=data[:len(data) // 2], dev_data=data[len(data) // 2:], policy=policy, Learner=lambda: BanditLOLS( HammingLossReference(), policy, p_rollin_ref, p_rollout_ref, learning_method, # LEARN_IPS, LEARN_DR, LEARN_BIASED exploration, ), losses=HammingLoss(), optimizer=optimizer, run_per_epoch=[p_rollin_ref.step, p_rollout_ref.step], train_eval_skip=10, )
def test1(): n_types = 10 n_labels = 4 print print '# test sequence labeler on mod data with LOLS' data = macarico.util.make_sequence_mod_data(20, 6, n_types, n_labels) data = [Example(x, y, n_labels) for x, y in data] tRNN = TransitionRNN([RNNFeatures(n_types)], [AttendAt()], n_labels) policy = LinearPolicy(tRNN, n_labels) optimizer = torch.optim.Adam(policy.parameters(), lr=0.01) p_rollin_ref = stochastic(ExponentialAnnealing(0.9)) p_rollout_ref = stochastic(ExponentialAnnealing(0.9)) macarico.util.trainloop( training_data=data[:len(data) // 2], dev_data=data[len(data) // 2:], policy=policy, learning_alg=lambda ex: LOLS.lols(ex, HammingLoss, HammingLossReference(), policy, p_rollin_ref, p_rollout_ref), losses=HammingLoss(), optimizer=optimizer, run_per_epoch=[p_rollin_ref.step, p_rollout_ref.step], train_eval_skip=1, )
def test1(LEARNER=LearnerOpts.DAGGER): print print 'Running test 1 with learner=%s' % LEARNER print '=======================================================' n_states = 3 n_actions = 2 tRNN = TransitionRNN([mdp.MDPFeatures(n_states, noise_rate=0.5)], [AttendAt(lambda _: 0, 's')], n_actions) policy = LinearPolicy(tRNN, n_actions) p_rollin_ref = stochastic(ExponentialAnnealing(0.99)) p_rollout_ref = stochastic(ExponentialAnnealing(1)) optimizer = torch.optim.Adam(policy.parameters(), lr=0.01) test_mdp, pi_ref = make_ross_mdp() if LEARNER == LearnerOpts.DAGGER: learner = lambda: DAgger(pi_ref, policy, p_rollin_ref) elif LEARNER == LearnerOpts.TWISTED: learner = lambda: TwistedDAgger(pi_ref, policy, p_rollin_ref) elif LEARNER == LearnerOpts.MAXLIK: learner = lambda: MaximumLikelihood(pi_ref, policy) elif LEARNER == LearnerOpts.AGGREVATE: learner = lambda: AggreVaTe(pi_ref, policy, p_rollin_ref) elif LEARNER == LearnerOpts.LOLS: learner = None losses = [] for epoch in xrange(101): optimizer.zero_grad() if learner is not None: l = learner() env = test_mdp.mk_env() res = env.run_episode(l) loss = mdp.MDPLoss()(test_mdp, env) l.update(loss) elif LEARNER == LearnerOpts.LOLS: lols(test_mdp, mdp.MDPLoss, pi_ref, policy, p_rollin_ref, p_rollout_ref) optimizer.step() p_rollin_ref.step() p_rollout_ref.step() env = test_mdp.mk_env() res = env.run_episode(policy) loss = mdp.MDPLoss()(test_mdp, env) losses.append(loss) if epoch % 20 == 0: print epoch, sum(losses[-100:]) / len(losses[-100:]), '\t', res
def test2(): # aggrevate print print '# test sequence labeler on mod data with AggreVaTe' n_types = 10 n_labels = 4 data = macarico.util.make_sequence_mod_data(100, 5, n_types, n_labels) data = [Example(x, y, n_labels) for x, y in data] tRNN = TransitionRNN( [RNNFeatures(n_types)], [AttendAt()], n_labels, ) policy = LinearPolicy(tRNN, n_labels) p_rollin_ref = stochastic(ExponentialAnnealing(0.99)) optimizer = torch.optim.Adam(policy.parameters(), lr=0.01) macarico.util.trainloop( training_data=data[:len(data) // 2], dev_data=data[len(data) // 2:], policy=policy, Learner=lambda: AggreVaTe(HammingLossReference(), policy, p_rollin_ref ), losses=HammingLoss(), optimizer=optimizer, run_per_epoch=[p_rollin_ref.step], n_epochs=4, train_eval_skip=1, )
def test0(): print print '# test sequence labeler on mod data with DAgger' n_types = 10 n_labels = 4 data = [ Example(x, y, n_labels) for x, y in macarico.util.make_sequence_mod_data( 100, 5, n_types, n_labels) ] tRNN = Actor([RNNFeatures(n_types, output_field='mytok_rnn')], [AttendAt(field='mytok_rnn')], n_labels) policy = LinearPolicy(tRNN, n_labels) p_rollin_ref = stochastic(ExponentialAnnealing(0.99)) optimizer = torch.optim.Adam(policy.parameters(), lr=0.01) macarico.util.trainloop( training_data=data[:len(data) // 2], dev_data=data[len(data) // 2:], policy=policy, Learner=lambda: DAgger(HammingLossReference(), policy, p_rollin_ref), losses=HammingLoss(), optimizer=optimizer, run_per_epoch=[p_rollin_ref.step], n_epochs=4, train_eval_skip=1, )
def test_wsj(): print print '# test on wsj subset' from macarico.data import nlp_data tr,de,te,vocab,label_id = \ nlp_data.read_wsj_pos('data/wsj.pos', n_tr=50, n_de=50, n_te=0) n_types = len(vocab) n_labels = len(label_id) print 'n_train: %s, n_dev: %s, n_test: %s' % (len(tr), len(de), len(te)) print 'n_types: %s, n_labels: %s' % (n_types, n_labels) tRNN = TransitionRNN([RNNFeatures(n_types, rnn_type='RNN')], [AttendAt()], n_labels) policy = LinearPolicy(tRNN, n_labels) p_rollin_ref = stochastic(ExponentialAnnealing(0.9)) optimizer = torch.optim.Adam(policy.parameters(), lr=0.01) macarico.util.trainloop( training_data=tr, dev_data=de, policy=policy, Learner=lambda: DAgger(HammingLossReference(), policy, p_rollin_ref), # Learner = lambda: MaximumLikelihood(HammingLossReference(), policy), losses=HammingLoss(), optimizer=optimizer, run_per_epoch=[p_rollin_ref.step], n_epochs=10, # train_eval_skip = None, )
def test1(learning_method, exploration): print print '# testing learning_method=%d exploration=%d' % (learning_method, exploration) print n_types = 10 n_labels = 2 data = macarico.util.make_sequence_mod_data(100, 1, n_types, n_labels) data = [Example(x, y, n_labels) for x, y in data] bag_size = 5 tRNN = [ TransitionRNN([RNNFeatures(n_types)], [AttendAt()], n_labels) for i in range(bag_size) ] policy = BootstrapPolicy(tRNN, n_labels) #policy = LinearPolicy(tRNN[0], n_labels) #print 'policy=', policy #print 'parameters=', list(policy.parameters()) optimizer = torch.optim.Adam(policy.parameters(), lr=0.01) p_rollin_ref = stochastic(ExponentialAnnealing(0.9)) p_rollout_ref = stochastic(ExponentialAnnealing(0.99999)) macarico.util.trainloop( training_data=data[:len(data) // 2], dev_data=data[len(data) // 2:], policy=policy, Learner=lambda: BanditLOLS( HammingLossReference(), policy, p_rollin_ref, p_rollout_ref, learning_method, exploration, ), losses=HammingLoss(), optimizer=optimizer, run_per_batch=[p_rollin_ref.step, p_rollout_ref.step], train_eval_skip=1, n_epochs=2, )
def test1(use_bootstrap): n_types = 10 n_labels = 4 print print '# test sequence labeler on mod data with Reslope and', ( 'bootstrap' if use_bootstrap else 'boltzmann'), 'exploration' data = macarico.util.make_sequence_mod_data(3000, 6, n_types, n_labels) data = [Example(x, y, n_labels) for x, y in data] if not use_bootstrap: tRNN = TransitionRNN([RNNFeatures(n_types)], [AttendAt()], n_labels) policy = LinearPolicy(tRNN, n_labels) else: rnns = [ TransitionRNN([RNNFeatures(n_types)], [AttendAt()], n_labels, h_name='h%d' % i) for i in xrange(5) ] policy = BootstrapPolicy(rnns, n_labels) optimizer = torch.optim.Adam(policy.parameters(), lr=0.01) p_ref = stochastic(ExponentialAnnealing(0.9)) macarico.util.trainloop( training_data = data[:2048], dev_data = data[2048:], policy = policy, Learner = lambda: Reslope(HammingLossReference(), policy, p_ref, exploration=BanditLOLS.EXPLORE_BOOTSTRAP if use_bootstrap else \ BanditLOLS.EXPLORE_BOLTZMANN ), losses = HammingLoss(), optimizer = optimizer, run_per_epoch = [p_ref.step], train_eval_skip = 1, bandit_evaluation = True, n_epochs = 1, )
def test3(labeled=False, use_tag_stream=False, big_test=None, load_embeddings=None): # TODO: limit to short sentences print() print('# Testing wsj parser, labeled=%s, use_tag_stream=%s, load_embeddings=%s' \ % (labeled, use_tag_stream, load_embeddings)) if big_test is None: train, dev, _, word_vocab, tag_vocab, rel_vocab = \ nlp_data.read_wsj_deppar(labeled=labeled, n_tr=50, n_de=50, n_te=0) else: train, dev, _, word_vocab, tag_vocab, rel_vocab = \ nlp_data.read_wsj_deppar(labeled=labeled, min_freq=2) if big_test == 'medium': train = train[:200] elif big_test != 'big': train = train[:1000] initial_embeddings = None learn_embeddings = True d_emb, d_rnn, d_actor = 256, 256, 256 if load_embeddings is not None and load_embeddings != 'None': learn_embeddings = True if load_embeddings[0] == '!': learn_embeddings = False load_embeddings = load_embeddings[1:] initial_embeddings = nlp_data.read_embeddings(load_embeddings, word_vocab) n_actions = 3 + len(rel_vocab or []) print('|word vocab| = %d, |tag vocab| = %d, n_actions = %d' % (len(word_vocab), len(tag_vocab), n_actions)) # construct policy to learn word_embed = EmbeddingFeatures( len(word_vocab), d_emb=d_emb if initial_embeddings is None else None, initial_embeddings=initial_embeddings, learn_embeddings=learn_embeddings) word_features = RNN(word_embed, d_rnn, dropout=0.2) #word_features = DilatedCNN(word_embed) attention = [DependencyAttention(word_features)] if use_tag_stream: #tag_features = RNN(BOWFeatures(len(tag_vocab), input_field='tags'), d_rnn=10) tag_features = BOWFeatures(len(tag_vocab), input_field='tags', window_size=2) #tag_features = DilatedCNN(BOWFeatures(len(tag_vocab), input_field='tags')) attention.append(DependencyAttention(tag_features)) #actor = BOWActor(attention, n_actions) actor = RNNActor(attention, n_actions, d_hid=d_actor) policy = CSOAAPolicy(actor, n_actions) learner = DAgger(policy, AttachmentLossReference(), p_rollin_ref=ExponentialAnnealing(0.99999)) optimizer = torch.optim.Adam(policy.parameters(), lr=0.001) def print_it(): return print(sum((p.norm().data[0] for p in policy.parameters()))) print_it() # TODO: move this to a unit test. print('reference loss on train = %g' % \ util.evaluate(DependencyParser, train, AttachmentLossReference(), AttachmentLoss())) if big_test == 'predict': print('stupid policy loss on train = %g' % \ util.evaluate(DependencyParser, train, AttachmentLossReference(), AttachmentLoss())) return # print(learner) util.TrainLoop( DependencyParser, policy, learner, optimizer, losses=[AttachmentLoss, GlobalAttachmentLoss], progress_bar=False, minibatch_size=1, print_freq=1, # reshuffle=False, # checkpoint_per_batch=(1, '.tmp.checkpoint'), ).train(train, dev, 10, '.tmp.checkpoint' ) # TODO fix bug in progress_bar when n_tr > print_freq
def test1(task=0, LEARNER=LearnerOpts.DAGGER): print print 'Running test 1 (v%d) with learner=%s' % (task, LEARNER) print '=======================================================' if task == 0: print 'Sequence reversal task, easy version' data = macarico.util.make_sequence_reversal_data(100, 5, 5) foci = [AttendAt(lambda s: s.N - s.n - 1)] elif task == 1: print 'Sequence reversal task, hard version' data = macarico.util.make_sequence_reversal_data(1000, 5, 5) foci = [AttendAt()] elif task == 2: print 'Sequence reversal task, multi-focus version' data = macarico.util.make_sequence_reversal_data(100, 5, 5) foci = [AttendAt(), AttendAt(lambda s: s.N - s.n - 1)] elif task == 3: print 'Memoryless task, add-one mod K' data = macarico.util.make_sequence_mod_data(50, 5, 10, 3) foci = [AttendAt()] elif task == 4: print 'Matti-style data' data = make_matti_data(1000, 20, 2, 0.05) foci = [AttendAt()] n_types = 1 + max({x for X, _ in data for x in X}) n_labels = 1 + max({y for _, Y in data for y in Y}) data = [Example(x, y, n_labels) for x, y in data] random.shuffle(data) m = len(data) // 2 train = data[:m] dev = data[m:] print 'n_train: %s, n_dev: %s' % (len(train), len(dev)) print 'n_types: %s, n_labels: %s' % (n_types, n_labels) print 'learner:', LEARNER print tRNN = Actor([RNNFeatures(n_types)], foci, n_labels) policy = LinearPolicy(tRNN, n_labels) baseline = EWMA(0.8) p_rollin_ref = stochastic(ExponentialAnnealing(0.5)) p_rollout_ref = stochastic(ExponentialAnnealing(0.5)) if LEARNER == LearnerOpts.AC: from macarico.lts.reinforce import AdvantageActorCritic, LinearValueFn baseline = LinearValueFn(policy.features) policy.vfa = baseline # adds params to policy via nn.module optimizer = torch.optim.Adam(policy.parameters(), lr=0.01) if LEARNER == LearnerOpts.DAGGER: learner = lambda: DAgger(HammingLossReference(), policy, p_rollin_ref) elif LEARNER == LearnerOpts.TWISTED: learner = lambda: TwistedDAgger(HammingLossReference(), policy, p_rollin_ref) elif LEARNER == LearnerOpts.MAXLIK: learner = lambda: MaximumLikelihood(HammingLossReference(), policy) elif LEARNER == LearnerOpts.AC: learner = lambda: AdvantageActorCritic(policy, baseline) elif LEARNER == LearnerOpts.REINFORCE: learner = lambda: Reinforce(policy, baseline) elif LEARNER == LearnerOpts.BANDITLOLS: learner = lambda: BanditLOLS(HammingLossReference( ), policy, p_rollin_ref, p_rollout_ref, BanditLOLS.LEARN_DR, BanditLOLS .EXPLORE_UNIFORM, baseline) macarico.util.trainloop( training_data=train, dev_data=dev, policy=policy, Learner=learner, losses=HammingLoss(), optimizer=optimizer, run_per_epoch=[p_rollin_ref.step, p_rollout_ref.step], n_epochs=10, train_eval_skip=1, )