Beispiel #1
0
def train_model(model, loss_fn, X_train, t_train, X_val, t_val, num_epochs=1000, learning_rate=0.1, batch_size=100,
                weight_decay=0.001, print_every=10):
    for ep in range(num_epochs):
        for X_batch, t_batch in util.get_batches(X_train, t_train, batch_size):
            # compute the activations
            act = model.compute_activations(X_batch)

            # do the gradient descent update
            dLdz = loss_fn.derivatives(act['z'], t_batch)
            param_derivs = model.cost_derivatives(X_batch, act, dLdz)
            model.gradient_descent_update(param_derivs, learning_rate)

            # apply weight decay
            model.apply_weight_decay(weight_decay, learning_rate)

        if ep % print_every == 0:
            # evaluate the training loss and error
            act = model.compute_activations(X_train)
            train_loss = loss_fn.value(act['z'], t_train).mean()
            y = model.get_predictions(act)
            train_err = np.mean(y != t_train)

            # evaluate the validation loss and error
            act = model.compute_activations(X_val)
            val_loss = loss_fn.value(act['z'], t_val).mean()
            y = model.get_predictions(act)
            val_err = np.mean(y != t_val)

            print 'Epoch {}; train_loss={:1.5f}, train_err={:1.5f}, val_loss={:1.5f}, val_err={:1.5f}'.format(
                ep, train_loss, train_err, val_loss, val_err)

    return model
Beispiel #2
0
 def fit(self, sess, saver,data,epochs):
     prev_loss = 999
     train_writer = tf.summary.FileWriter('script/log',
                                   sess.graph)
     x,y = self.preprocess_sequence_data(data)
     for epoch in range(epochs):
         print('Start epoch: {}'.format(epoch))        
         train_loss = []
         state = np.zeros((self.config.batch_size, self.config.state_size*self.config.n_layers))
         for inputs_batch,labels_batch in util.get_batches(x,y, self.config.seq_len):
             loss, state, log_summary = self.train(sess, inputs_batch,labels_batch,state)
             train_writer.add_summary(log_summary)
             train_loss.append(loss)
         train_loss = sum(train_loss)/len(train_loss)
         print('Epoch: {0} Training loss {1:.4f}'.format(epoch,train_loss))
         if train_loss <prev_loss:
             prev_loss = train_loss
             if not os.path.exists('script/rnn_check_points/{}'.format(self.config.country_code)):
                 os.makedirs('script/rnn_check_points/{}'.format(self.config.country_code))
             saver.save(sess, 'script/rnn_check_points/{}/rnn'.format(self.config.country_code),global_step=self.global_step)
             print('Best training loss. Parameters saved.')
Beispiel #3
0
            train_op_ = optimizer_.apply_gradients(capped_gradients_, global_step=global_step_)

loss_list = []
reward_list = []
loss_list_critic = []
reward_valid_list = []
count = 0

with critic_sess.as_default():
        with critic_graph.as_default():
            critic_sess.run(tf.global_variables_initializer())

for epoch_i in range(epochs):
    for batch_i, (source_batch, target_batch, sources_lengths, targets_lengths) in enumerate(
                util.get_batches(train_source, train_target, batch_size,
                                 source_vocab_to_int['<PAD>'],
                                 target_vocab_to_int['<PAD>'])):
        if batch_i == 0:
            with train_sess.as_default():
                with train_graph.as_default():
                    rewards_all = 0
                    for batch_j, (valid_sources_batch, valid_targets_batch, valid_sources_lengths, valid_targets_lengths) in enumerate(
                            util.get_batches(valid_source, valid_target, batch_size,
                                             source_vocab_to_int['<PAD>'],
                                             target_vocab_to_int['<PAD>'])):
                        batch_valid_logits = train_sess.run(
                            inference_logits,
                            {input_data: valid_sources_batch,
                             source_sequence_length: valid_sources_lengths,
                             target_sequence_length: valid_targets_lengths,
                             keep_prob: 1.0})
Beispiel #4
0
    chunk_vocab.add(u"<chunk_unk>")
    print "chunk count:", chunk_vocab.size

if args.mode == "baseline":
    lm = models.BaselineLanguageModel(model, args, vocab)
elif args.mode == "lattice":
    lm = models.LatticeLanguageModel(model, args, vocab, chunk_vocab)
elif args.mode == "memb":
    lm = models.MultiEmbLanguageModel(model, args, vocab)
else:
    raise Exception("unrecognized mode")

if args.load: model.populate(args.save)

if not args.evaluate and not args.debug:
    train_batches = util.get_batches(train_data, args.minibatch_size,
                                     args.max_batched_sentence_len)
    valid_batches = util.get_batches(valid_data, args.minibatch_size,
                                     args.max_batched_sentence_len)

best_score = None
args.update_num = 0
train_accumulator = Accumulator(accs, disps)
_start = time.time()
for epoch_i in range(args.epochs):
    args.completed_epochs = epoch_i
    print "Epoch %d. Shuffling..." % epoch_i,
    if epoch_i == 0: train_batches = util.shuffle_preserve_first(train_batches)
    else: random.shuffle(train_batches)
    print "done."

    for i, batch in enumerate(train_batches):
Beispiel #5
0
def train(args):
    torch.manual_seed(args.seed)

    # Get data loader
    train_data, dev_data, word2id, id2word, char2id, new_args = data_loader(
        args)
    model = QAxl(new_args)

    if args.use_cuda:
        model = model.cuda()

    dev_batches = get_batches(dev_data, args.batch_size, evaluation=True)

    # Get optimizer and scheduler
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = torch.optim.Adamax(parameters, lr=args.lrate)
    lrate = args.lrate

    if args.eval:
        model.load_state_dict(torch.load(args.model_dir))
        model.eval()
        model.SelfEvaluate(dev_batches,
                           args.data_dir + 'dev_eval.json',
                           answer_file=args.answer_file,
                           drop_file=args.data_dir + 'drop.json',
                           dev=args.data_dir + 'dev.json')
        exit()

    if args.load_model:
        model.load_state_dict(torch.load(args.model_dir))

    best_score = 0.0

    ## Training
    for epoch in range(1, args.epochs + 1):
        train_batches = get_batches(train_data, args.batch_size)
        dev_batches = get_batches(dev_data, args.batch_size, evaluation=True)

        model.train()
        for i, train_batch in enumerate(train_batches):
            loss = model(train_batch)
            model.zero_grad()
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(parameters,
                                           new_args['grad_clipping'])
            optimizer.step()
            model.reset_parameters()

            if i % 100 == 0:
                print(
                    'epoch = %d,  loss = %.5f, step = %d, lrate = %.5f best_score = %.3f'
                    % (epoch, model.train_loss.value, i, lrate, best_score))
                sys.stdout.flush()

        model.eval()
        exact_match_score, F1 = model.SelfEvaluate(
            dev_batches,
            args.data_dir + 'dev_eval.json',
            answer_file=args.answer_file,
            drop_file=args.data_dir + 'drop.json',
            dev=args.data_dir + 'dev-v2.0.json')

        if best_score < F1:
            best_score = F1
            print('saving %s ...' % args.model_dir)
            torch.save(model.state_dict(), args.model_dir)
        if epoch > 0 and epoch % args.decay_period == 0:
            lrate *= args.decay
            for param_group in optimizer.param_groups:
                param_group['lr'] = lrate
Beispiel #6
0
def train(epochs=20, clip=5, val_frac=0.1, print_every=100):
    global data
    net.train()

    # create training and validation data
    val_idx = int(len(data) * (1 - val_frac))
    data, val_data = data[:val_idx], data[val_idx:]

    counter = 0
    n_chars = len(net.chars)

    for e in range(epochs):
        # initialize hidden state
        h = net.init_hidden(batch_size)

        for x, y in get_batches(data, batch_size, seq_length):
            counter += 1

            # One-hot encode our data and make them Torch tensors
            x = one_hot_encode(x, n_chars)
            inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
            inputs, targets = inputs.to(device), targets.cuda(device)

            h = tuple([each.data for each in h])
            net.zero_grad()
            # get the output from the model
            output, h = net(inputs, h)

            # calculate the loss and perform backprop
            loss = criterion(output,
                             targets.view(batch_size * seq_length).long())
            loss.backward()
            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            nn.utils.clip_grad_norm_(net.parameters(), clip)
            opt.step()

            # loss stats
            if counter % print_every == 0:
                # Get validation loss
                val_h = net.init_hidden(batch_size)
                val_losses = []
                net.eval()
                for x, y in get_batches(val_data, batch_size, seq_length):
                    # One-hot encode our data and make them Torch tensors
                    x = one_hot_encode(x, n_chars)
                    x, y = torch.from_numpy(x), torch.from_numpy(y)

                    # Creating new variables for the hidden state, otherwise
                    # we'd backprop through the entire training history
                    val_h = tuple([each.data for each in val_h])

                    inputs, targets = x, y
                    if (on_gpu()):
                        inputs, targets = inputs.cuda(), targets.cuda()

                    output, val_h = net(inputs, val_h)
                    val_loss = criterion(
                        output,
                        targets.view(batch_size * seq_length).long())

                    val_losses.append(val_loss.item())

                net.train(
                )  # reset to train mode after iterationg through validation data

                print("Epoch: {}/{}...".format(e + 1, epochs),
                      "Step: {}...".format(counter),
                      "Loss: {:.4f}...".format(loss.item()),
                      "Val Loss: {:.4f}".format(np.mean(val_losses)))
Beispiel #7
0
            train_op_ = optimizer_.apply_gradients(capped_gradients_,
                                                   global_step=global_step_)

with critic_sess.as_default():
    with critic_sess.graph.as_default():
        critic_sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        tf.add_to_collection('train_op_critic', train_op_)
        tf.add_to_collection('loss_critic', l_)

loss_list = []
count = 0
for batch_i, (source_batch, target_batch, sources_lengths,
              targets_lengths) in enumerate(
                  util.get_batches(source_train, target_train, batch_size,
                                   source_vocab_to_int['<PAD>'],
                                   target_vocab_to_int['<PAD>'])):
    count += 1
    with train_sess.as_default():
        with train_sess.graph.as_default():

            translate_logits = train_sess.run(
                logits, {
                    input_data: source_batch,
                    target_sequence_length: targets_lengths,
                    source_sequence_length: sources_lengths,
                    keep_prob: 1.0
                })

    lens = [[translate_logits.shape[1]] * batch_size]
    lens = np.squeeze(lens)