コード例 #1
0
ファイル: test_banditlols.py プロジェクト: yyht/macarico
def test1(learning_method, exploration):
    print
    print '# testing learning_method=%d exploration=%d' % (learning_method,
                                                           exploration)
    print
    n_types = 10
    n_labels = 4
    data = macarico.util.make_sequence_mod_data(100, 6, n_types, n_labels)
    data = [Example(x, y, n_labels) for x, y in data]

    tRNN = TransitionRNN([RNNFeatures(n_types)], [AttendAt()], n_labels)
    policy = LinearPolicy(tRNN, n_labels)
    optimizer = torch.optim.Adam(policy.parameters(), lr=0.001)

    p_rollin_ref = stochastic(ExponentialAnnealing(0.9))
    p_rollout_ref = stochastic(ExponentialAnnealing(0.99999))

    macarico.util.trainloop(
        training_data=data[:len(data) // 2],
        dev_data=data[len(data) // 2:],
        policy=policy,
        Learner=lambda: BanditLOLS(
            HammingLossReference(),
            policy,
            p_rollin_ref,
            p_rollout_ref,
            learning_method,  # LEARN_IPS, LEARN_DR, LEARN_BIASED
            exploration,
        ),
        losses=HammingLoss(),
        optimizer=optimizer,
        run_per_epoch=[p_rollin_ref.step, p_rollout_ref.step],
        train_eval_skip=10,
    )
コード例 #2
0
ファイル: test_lols.py プロジェクト: yyht/macarico
def test1():
    n_types = 10
    n_labels = 4
    print
    print '# test sequence labeler on mod data with LOLS'
    data = macarico.util.make_sequence_mod_data(20, 6, n_types, n_labels)
    data = [Example(x, y, n_labels) for x, y in data]

    tRNN = TransitionRNN([RNNFeatures(n_types)], [AttendAt()], n_labels)
    policy = LinearPolicy(tRNN, n_labels)
    optimizer = torch.optim.Adam(policy.parameters(), lr=0.01)

    p_rollin_ref = stochastic(ExponentialAnnealing(0.9))
    p_rollout_ref = stochastic(ExponentialAnnealing(0.9))

    macarico.util.trainloop(
        training_data=data[:len(data) // 2],
        dev_data=data[len(data) // 2:],
        policy=policy,
        learning_alg=lambda ex: LOLS.lols(ex, HammingLoss,
                                          HammingLossReference(), policy,
                                          p_rollin_ref, p_rollout_ref),
        losses=HammingLoss(),
        optimizer=optimizer,
        run_per_epoch=[p_rollin_ref.step, p_rollout_ref.step],
        train_eval_skip=1,
    )
コード例 #3
0
ファイル: test_mdp.py プロジェクト: yyht/macarico
def test1(LEARNER=LearnerOpts.DAGGER):
    print
    print 'Running test 1 with learner=%s' % LEARNER
    print '======================================================='

    n_states = 3
    n_actions = 2

    tRNN = TransitionRNN([mdp.MDPFeatures(n_states, noise_rate=0.5)],
                         [AttendAt(lambda _: 0, 's')], n_actions)
    policy = LinearPolicy(tRNN, n_actions)

    p_rollin_ref = stochastic(ExponentialAnnealing(0.99))
    p_rollout_ref = stochastic(ExponentialAnnealing(1))

    optimizer = torch.optim.Adam(policy.parameters(), lr=0.01)

    test_mdp, pi_ref = make_ross_mdp()

    if LEARNER == LearnerOpts.DAGGER:
        learner = lambda: DAgger(pi_ref, policy, p_rollin_ref)
    elif LEARNER == LearnerOpts.TWISTED:
        learner = lambda: TwistedDAgger(pi_ref, policy, p_rollin_ref)
    elif LEARNER == LearnerOpts.MAXLIK:
        learner = lambda: MaximumLikelihood(pi_ref, policy)
    elif LEARNER == LearnerOpts.AGGREVATE:
        learner = lambda: AggreVaTe(pi_ref, policy, p_rollin_ref)
    elif LEARNER == LearnerOpts.LOLS:
        learner = None

    losses = []
    for epoch in xrange(101):
        optimizer.zero_grad()
        if learner is not None:
            l = learner()
            env = test_mdp.mk_env()
            res = env.run_episode(l)
            loss = mdp.MDPLoss()(test_mdp, env)
            l.update(loss)
        elif LEARNER == LearnerOpts.LOLS:
            lols(test_mdp, mdp.MDPLoss, pi_ref, policy, p_rollin_ref,
                 p_rollout_ref)

        optimizer.step()
        p_rollin_ref.step()
        p_rollout_ref.step()

        env = test_mdp.mk_env()
        res = env.run_episode(policy)
        loss = mdp.MDPLoss()(test_mdp, env)
        losses.append(loss)
        if epoch % 20 == 0:
            print epoch, sum(losses[-100:]) / len(losses[-100:]), '\t', res
コード例 #4
0
ファイル: test_lols.py プロジェクト: yyht/macarico
def test2():
    # aggrevate
    print
    print '# test sequence labeler on mod data with AggreVaTe'
    n_types = 10
    n_labels = 4

    data = macarico.util.make_sequence_mod_data(100, 5, n_types, n_labels)
    data = [Example(x, y, n_labels) for x, y in data]

    tRNN = TransitionRNN(
        [RNNFeatures(n_types)],
        [AttendAt()],
        n_labels,
    )
    policy = LinearPolicy(tRNN, n_labels)

    p_rollin_ref = stochastic(ExponentialAnnealing(0.99))
    optimizer = torch.optim.Adam(policy.parameters(), lr=0.01)

    macarico.util.trainloop(
        training_data=data[:len(data) // 2],
        dev_data=data[len(data) // 2:],
        policy=policy,
        Learner=lambda: AggreVaTe(HammingLossReference(), policy, p_rollin_ref
                                  ),
        losses=HammingLoss(),
        optimizer=optimizer,
        run_per_epoch=[p_rollin_ref.step],
        n_epochs=4,
        train_eval_skip=1,
    )
コード例 #5
0
def test0():
    print
    print '# test sequence labeler on mod data with DAgger'
    n_types = 10
    n_labels = 4

    data = [
        Example(x, y, n_labels)
        for x, y in macarico.util.make_sequence_mod_data(
            100, 5, n_types, n_labels)
    ]

    tRNN = Actor([RNNFeatures(n_types, output_field='mytok_rnn')],
                 [AttendAt(field='mytok_rnn')], n_labels)
    policy = LinearPolicy(tRNN, n_labels)

    p_rollin_ref = stochastic(ExponentialAnnealing(0.99))
    optimizer = torch.optim.Adam(policy.parameters(), lr=0.01)

    macarico.util.trainloop(
        training_data=data[:len(data) // 2],
        dev_data=data[len(data) // 2:],
        policy=policy,
        Learner=lambda: DAgger(HammingLossReference(), policy, p_rollin_ref),
        losses=HammingLoss(),
        optimizer=optimizer,
        run_per_epoch=[p_rollin_ref.step],
        n_epochs=4,
        train_eval_skip=1,
    )
コード例 #6
0
def test_wsj():
    print
    print '# test on wsj subset'
    from macarico.data import nlp_data
    tr,de,te,vocab,label_id = \
      nlp_data.read_wsj_pos('data/wsj.pos', n_tr=50, n_de=50, n_te=0)

    n_types = len(vocab)
    n_labels = len(label_id)

    print 'n_train: %s, n_dev: %s, n_test: %s' % (len(tr), len(de), len(te))
    print 'n_types: %s, n_labels: %s' % (n_types, n_labels)

    tRNN = TransitionRNN([RNNFeatures(n_types, rnn_type='RNN')], [AttendAt()],
                         n_labels)
    policy = LinearPolicy(tRNN, n_labels)

    p_rollin_ref = stochastic(ExponentialAnnealing(0.9))
    optimizer = torch.optim.Adam(policy.parameters(), lr=0.01)

    macarico.util.trainloop(
        training_data=tr,
        dev_data=de,
        policy=policy,
        Learner=lambda: DAgger(HammingLossReference(), policy, p_rollin_ref),
        #        Learner         = lambda: MaximumLikelihood(HammingLossReference(), policy),
        losses=HammingLoss(),
        optimizer=optimizer,
        run_per_epoch=[p_rollin_ref.step],
        n_epochs=10,
        #        train_eval_skip = None,
    )
コード例 #7
0
ファイル: test_bootstrap.py プロジェクト: yyht/macarico
def test1(learning_method, exploration):
    print
    print '# testing learning_method=%d exploration=%d' % (learning_method,
                                                           exploration)
    print
    n_types = 10
    n_labels = 2
    data = macarico.util.make_sequence_mod_data(100, 1, n_types, n_labels)
    data = [Example(x, y, n_labels) for x, y in data]

    bag_size = 5
    tRNN = [
        TransitionRNN([RNNFeatures(n_types)], [AttendAt()], n_labels)
        for i in range(bag_size)
    ]
    policy = BootstrapPolicy(tRNN, n_labels)
    #policy = LinearPolicy(tRNN[0], n_labels)
    #print 'policy=', policy
    #print 'parameters=', list(policy.parameters())
    optimizer = torch.optim.Adam(policy.parameters(), lr=0.01)

    p_rollin_ref = stochastic(ExponentialAnnealing(0.9))
    p_rollout_ref = stochastic(ExponentialAnnealing(0.99999))

    macarico.util.trainloop(
        training_data=data[:len(data) // 2],
        dev_data=data[len(data) // 2:],
        policy=policy,
        Learner=lambda: BanditLOLS(
            HammingLossReference(),
            policy,
            p_rollin_ref,
            p_rollout_ref,
            learning_method,
            exploration,
        ),
        losses=HammingLoss(),
        optimizer=optimizer,
        run_per_batch=[p_rollin_ref.step, p_rollout_ref.step],
        train_eval_skip=1,
        n_epochs=2,
    )
コード例 #8
0
def test1(use_bootstrap):
    n_types = 10
    n_labels = 4
    print
    print '# test sequence labeler on mod data with Reslope and', (
        'bootstrap' if use_bootstrap else 'boltzmann'), 'exploration'
    data = macarico.util.make_sequence_mod_data(3000, 6, n_types, n_labels)
    data = [Example(x, y, n_labels) for x, y in data]

    if not use_bootstrap:
        tRNN = TransitionRNN([RNNFeatures(n_types)], [AttendAt()], n_labels)
        policy = LinearPolicy(tRNN, n_labels)
    else:
        rnns = [
            TransitionRNN([RNNFeatures(n_types)], [AttendAt()],
                          n_labels,
                          h_name='h%d' % i) for i in xrange(5)
        ]
        policy = BootstrapPolicy(rnns, n_labels)

    optimizer = torch.optim.Adam(policy.parameters(), lr=0.01)

    p_ref = stochastic(ExponentialAnnealing(0.9))

    macarico.util.trainloop(
        training_data   = data[:2048],
        dev_data        = data[2048:],
        policy          = policy,
        Learner         = lambda: Reslope(HammingLossReference(), policy, p_ref,
                                          exploration=BanditLOLS.EXPLORE_BOOTSTRAP if use_bootstrap else \
                                                      BanditLOLS.EXPLORE_BOLTZMANN
        ),
        losses          = HammingLoss(),
        optimizer       = optimizer,
        run_per_epoch   = [p_ref.step],
        train_eval_skip = 1,
        bandit_evaluation = True,
        n_epochs = 1,
    )
コード例 #9
0
def test3(labeled=False,
          use_tag_stream=False,
          big_test=None,
          load_embeddings=None):
    # TODO: limit to short sentences
    print()
    print('# Testing wsj parser, labeled=%s, use_tag_stream=%s, load_embeddings=%s' \
        % (labeled, use_tag_stream, load_embeddings))
    if big_test is None:
        train, dev, _, word_vocab, tag_vocab, rel_vocab = \
          nlp_data.read_wsj_deppar(labeled=labeled, n_tr=50, n_de=50, n_te=0)
    else:
        train, dev, _, word_vocab, tag_vocab, rel_vocab = \
          nlp_data.read_wsj_deppar(labeled=labeled, min_freq=2)
        if big_test == 'medium':
            train = train[:200]
        elif big_test != 'big':
            train = train[:1000]

    initial_embeddings = None
    learn_embeddings = True
    d_emb, d_rnn, d_actor = 256, 256, 256
    if load_embeddings is not None and load_embeddings != 'None':
        learn_embeddings = True
        if load_embeddings[0] == '!':
            learn_embeddings = False
            load_embeddings = load_embeddings[1:]
        initial_embeddings = nlp_data.read_embeddings(load_embeddings,
                                                      word_vocab)

    n_actions = 3 + len(rel_vocab or [])
    print('|word vocab| = %d, |tag vocab| = %d, n_actions = %d' %
          (len(word_vocab), len(tag_vocab), n_actions))

    # construct policy to learn
    word_embed = EmbeddingFeatures(
        len(word_vocab),
        d_emb=d_emb if initial_embeddings is None else None,
        initial_embeddings=initial_embeddings,
        learn_embeddings=learn_embeddings)
    word_features = RNN(word_embed, d_rnn, dropout=0.2)
    #word_features = DilatedCNN(word_embed)
    attention = [DependencyAttention(word_features)]
    if use_tag_stream:
        #tag_features = RNN(BOWFeatures(len(tag_vocab), input_field='tags'), d_rnn=10)
        tag_features = BOWFeatures(len(tag_vocab),
                                   input_field='tags',
                                   window_size=2)
        #tag_features = DilatedCNN(BOWFeatures(len(tag_vocab), input_field='tags'))
        attention.append(DependencyAttention(tag_features))

    #actor = BOWActor(attention, n_actions)
    actor = RNNActor(attention, n_actions, d_hid=d_actor)
    policy = CSOAAPolicy(actor, n_actions)

    learner = DAgger(policy,
                     AttachmentLossReference(),
                     p_rollin_ref=ExponentialAnnealing(0.99999))
    optimizer = torch.optim.Adam(policy.parameters(), lr=0.001)

    def print_it():
        return
        print(sum((p.norm().data[0] for p in policy.parameters())))

    print_it()
    # TODO: move this to a unit test.
    print('reference loss on train = %g' % \
        util.evaluate(DependencyParser, train, AttachmentLossReference(), AttachmentLoss()))

    if big_test == 'predict':
        print('stupid policy loss on train = %g' % \
            util.evaluate(DependencyParser, train, AttachmentLossReference(), AttachmentLoss()))
        return


#    print(learner)
    util.TrainLoop(
        DependencyParser,
        policy,
        learner,
        optimizer,
        losses=[AttachmentLoss, GlobalAttachmentLoss],
        progress_bar=False,
        minibatch_size=1,
        print_freq=1,
        #                   reshuffle=False,
        #                   checkpoint_per_batch=(1, '.tmp.checkpoint'),
    ).train(train, dev, 10, '.tmp.checkpoint'
            )  # TODO fix bug in progress_bar when n_tr > print_freq
コード例 #10
0
def test1(task=0, LEARNER=LearnerOpts.DAGGER):
    print
    print 'Running test 1 (v%d) with learner=%s' % (task, LEARNER)
    print '======================================================='

    if task == 0:
        print 'Sequence reversal task, easy version'
        data = macarico.util.make_sequence_reversal_data(100, 5, 5)
        foci = [AttendAt(lambda s: s.N - s.n - 1)]
    elif task == 1:
        print 'Sequence reversal task, hard version'
        data = macarico.util.make_sequence_reversal_data(1000, 5, 5)
        foci = [AttendAt()]
    elif task == 2:
        print 'Sequence reversal task, multi-focus version'
        data = macarico.util.make_sequence_reversal_data(100, 5, 5)
        foci = [AttendAt(), AttendAt(lambda s: s.N - s.n - 1)]
    elif task == 3:
        print 'Memoryless task, add-one mod K'
        data = macarico.util.make_sequence_mod_data(50, 5, 10, 3)
        foci = [AttendAt()]
    elif task == 4:
        print 'Matti-style data'
        data = make_matti_data(1000, 20, 2, 0.05)
        foci = [AttendAt()]

    n_types = 1 + max({x for X, _ in data for x in X})
    n_labels = 1 + max({y for _, Y in data for y in Y})

    data = [Example(x, y, n_labels) for x, y in data]

    random.shuffle(data)
    m = len(data) // 2
    train = data[:m]
    dev = data[m:]

    print 'n_train: %s, n_dev: %s' % (len(train), len(dev))
    print 'n_types: %s, n_labels: %s' % (n_types, n_labels)
    print 'learner:', LEARNER
    print

    tRNN = Actor([RNNFeatures(n_types)], foci, n_labels)
    policy = LinearPolicy(tRNN, n_labels)

    baseline = EWMA(0.8)
    p_rollin_ref = stochastic(ExponentialAnnealing(0.5))
    p_rollout_ref = stochastic(ExponentialAnnealing(0.5))

    if LEARNER == LearnerOpts.AC:
        from macarico.lts.reinforce import AdvantageActorCritic, LinearValueFn
        baseline = LinearValueFn(policy.features)
        policy.vfa = baseline  # adds params to policy via nn.module

    optimizer = torch.optim.Adam(policy.parameters(), lr=0.01)

    if LEARNER == LearnerOpts.DAGGER:
        learner = lambda: DAgger(HammingLossReference(), policy, p_rollin_ref)
    elif LEARNER == LearnerOpts.TWISTED:
        learner = lambda: TwistedDAgger(HammingLossReference(), policy,
                                        p_rollin_ref)
    elif LEARNER == LearnerOpts.MAXLIK:
        learner = lambda: MaximumLikelihood(HammingLossReference(), policy)
    elif LEARNER == LearnerOpts.AC:
        learner = lambda: AdvantageActorCritic(policy, baseline)
    elif LEARNER == LearnerOpts.REINFORCE:
        learner = lambda: Reinforce(policy, baseline)
    elif LEARNER == LearnerOpts.BANDITLOLS:
        learner = lambda: BanditLOLS(HammingLossReference(
        ), policy, p_rollin_ref, p_rollout_ref, BanditLOLS.LEARN_DR, BanditLOLS
                                     .EXPLORE_UNIFORM, baseline)

    macarico.util.trainloop(
        training_data=train,
        dev_data=dev,
        policy=policy,
        Learner=learner,
        losses=HammingLoss(),
        optimizer=optimizer,
        run_per_epoch=[p_rollin_ref.step, p_rollout_ref.step],
        n_epochs=10,
        train_eval_skip=1,
    )