Ejemplo n.º 1
0
def train_network(params,
                  ntags,
                  train_data,
                  dev_set,
                  telemetry_file,
                  randstring,
                  very_common_tag=-1):
    global MIN_ACC
    prev_acc = 0
    m = params[0]
    t0 = time.clock()
    # train the network
    trainer = dy.SimpleSGDTrainer(m)
    total_loss = 0
    seen_instances = 0
    train_good = 0
    very_common_tag_count = 0
    for x_data, train_y in train_data:
        dy.renew_cg()
        output = build_network(params, x_data)
        # l2 regularization did not look promising at all, so it's commented out
        loss = -dy.log(
            output[train_y]
        )  #+ REG_LAMBDA * sum([dy.l2_norm(p) for p in params[2:]])
        if train_y == np.argmax(output.npvalue()):
            train_good += 1
        seen_instances += 1
        total_loss += loss.value()
        loss.backward()
        trainer.update()
        if seen_instances % 20000 == 0:
            # measure elapsed seconds
            secs = time.clock() - t0
            t0 = time.clock()
            good = case = 0
            max_dev_instances = 70 * 1000
            dev_instances = 0
            for x_tuple, dev_y in dev_set:
                output = build_network(params, x_tuple)
                y_hat = np.argmax(output.npvalue())
                case += 1
                if y_hat == dev_y and y_hat == very_common_tag:
                    case -= 1  # don't count this case
                    very_common_tag_count += 1
                elif y_hat == dev_y:
                    good += 1

                dev_instances += 1
                if dev_instances >= max_dev_instances:
                    break
            acc = float(good) / case
            print(
                "iterations: {}. train_accuracy: {} accuracy: {} avg loss: {} secs per 1000:{}"
                .format(seen_instances,
                        float(train_good) / 20000, acc,
                        total_loss / (seen_instances + 1), secs / 20))
            train_good = 0
            if acc > MIN_ACC and acc > prev_acc:
                print("saving.")
                dy.save("params_" + randstring, list(params)[1:])
                prev_acc = acc

            telemetry_file.write("{}\t{}\t{}\t{}\n".format(
                seen_instances, acc, total_loss / (seen_instances + 1),
                secs / 20))
            print("very common tag count: {}".format(very_common_tag_count))
Ejemplo n.º 2
0
    def train_batched(self, tasks, batch_size, scale_gradient_factor,
                      validation_data, seqs_trg, early_stopping, patience,
                      num_epochs, min_num_epochs, num_updates, prob_main_task,
                      prob_adv):
        trainer = dn.SimpleSGDTrainer(self.model)

        # stores best observed validation accuracy
        val_best = 0
        # stores the number of iterations without improvement
        no_improvement = 0
        val_prev = 0

        for epoch in range(num_epochs):
            sum_losses = 0
            adversarial_loss = 0
            losses_prediction_task = []
            losses_aux_task = []
            batch_dict = self.generate_batches_across_tasks(tasks, batch_size)

            # number of updates is twice the length of the main task batch list
            num_updates = len(batch_dict[self.prediction_layer]) * 2
            print(num_updates)
            #logging.INFO('Number of updates to do: {}'.format(num_updates))
            # sample batches according to some schema
            update_counter = 0
            while update_counter <= num_updates:
                update_counter += 1

                # with prob 1-prob_adv, do a task update
                outcome = np.random.binomial(1, prob_adv, size=None)
                if outcome == 0:
                    task_id, batch_ids = self.sample_task_batch(
                        batch_dict, prob_main_task=prob_main_task)
                    losses = []
                    dn.renew_cg()
                    # iterate through the batch
                    for i in batch_ids:
                        seq = tasks[task_id].train_seqs[i]
                        label = tasks[task_id].train_labels[i]
                        loss = self.compute_loss_multilabel(
                            task_id, seq, label)
                        losses.append(loss)

                    batch_loss = dn.esum(losses) / len(batch_ids)
                    batch_loss_value = batch_loss.value()
                    batch_loss.backward()
                    trainer.update()
                    sum_losses += batch_loss_value

                    if task_id == self.prediction_layer:
                        losses_prediction_task.append(batch_loss_value)
                    else:
                        losses_aux_task.append(batch_loss_value)
                else:
                    # do adversarial step
                    losses = []
                    dn.renew_cg()
                    seqs, labels = self.generate_adversarial_batch(
                        seqs_src=tasks[self.src_domain].train_seqs,
                        seqs_trg=seqs_trg,
                        batch_size=batch_size)
                    for i in range(len(seqs)):
                        seq = seqs[i]
                        label = labels[i]
                        loss = self.compute_loss_multilabel(task='adversarial',
                                                            seq=seq,
                                                            multi_y=label)
                        losses.append(loss)
                    batch_loss = dn.esum(losses) / len(seqs)
                    batch_loss_value = batch_loss.value()
                    batch_loss.backward()
                    trainer.update()
                    adversarial_loss += batch_loss_value

            # compute the validation accuracy to monitor early stopping
            # use the micro averaged f as criterion
            res = evaluate_model_predictions(
                self.predict(self.main_task, validation_data['seq']),
                validation_data['label'], validation_data['labelset'])
            f_avg = res['f_avg']
            logging.info(
                'Epoch {}. Sum loss: {}. Avg loss: {}. Avg loss predtask {}. Avg loss aux tasks: {}. No improv: {}. Best f_val: {}. Avg f_val: {}'
                .format(epoch, sum_losses, sum_losses / num_updates,
                        np.mean(losses_prediction_task),
                        np.mean(losses_aux_task), no_improvement, val_best,
                        f_avg))
            logging.info(
                'Epoch {}. Adv loss: {}. Avg loss: {}. Avg loss predtask {}. Avg loss aux tasks: {}. No improv: {}. Best f_val: {}. Avg f_val: {}'
                .format(epoch, adversarial_loss, sum_losses / num_updates,
                        np.mean(losses_prediction_task),
                        np.mean(losses_aux_task), no_improvement, val_best,
                        f_avg))

            # init early stopping after min number of epochs
            if epoch == min_num_epochs - 1:
                val_prev = f_avg
                no_improvement = 0
                self.save(self.exp_path)

            # if early_stopping:
            if f_avg <= val_prev:
                no_improvement += 1
                if early_stopping:
                    if no_improvement >= patience and epoch > min_num_epochs:
                        break
            else:
                if epoch >= min_num_epochs:
                    self.save(self.exp_path)
                no_improvement = 0
                if f_avg >= val_best:
                    val_best = f_avg
                val_prev = f_avg

        return epoch, f_avg, sum_losses, no_improvement, val_best
Ejemplo n.º 3
0
        #last = dy.cmult(layers[-1], me) + e

        #print("gradient", last.value())
        #log_loss = dy.log(last + epsilon)
        #print(log_loss.value())
        ys = dy.vecInput(self.dim_out)
        ys.set([1 if i in targets else 0 for i in range(self.dim_out)])
        loss = dy.binary_log_loss(layers[-1], ys)
        return dy.sum_elems(loss)


if __name__ == "__main__":
    import dynet

    model = dy.Model()
    trainer = dy.SimpleSGDTrainer(model)

    classifier = MLP_sigmoid(2, 2, 2, 10, dy.rectify, model)

    dataset = [([-1, -1], {0}), ([-1, 1], {1}), ([1, -1], {1}), ([1, 1], {0})]

    for e in range(10040):
        for xs, y in dataset:
            dy.renew_cg()
            x = dy.vecInput(2)
            x.set(xs)
            l = classifier.get_loss(x, y)
            l.backward()
            trainer.update()

        loss = 0
Ejemplo n.º 4
0
params_decoder["R"] = pc.add_parameters((VOCAB_SIZE_out, HIDDEN_DIM))
params_decoder["bias"] = pc.add_parameters((VOCAB_SIZE_out))
params_decoder["attention_w"] = pc.add_parameters((ATTENTION_DIM, HIDDEN_DIM))
params_decoder["attention_b"] = pc.add_parameters((ATTENTION_DIM))
params_decoder["attention_wc"] = pc.add_parameters((ATTENTION_DIM, HIDDEN_DIM))
params_decoder["attention_bc"] = pc.add_parameters((ATTENTION_DIM, HIDDEN_DIM))
params_decoder["attention_v"] = pc.add_parameters((1, ATTENTION_DIM))

char_encoder = dy.CompactVanillaLSTMBuilder(LAYERS, 50, 75, pc)
params_encoder["char_lookup"] = pc.add_lookup_parameters((VOCAB_char, 50))
params_encoder["pos_lookup"] = pc.add_lookup_parameters((7, 25))
dropout = 0.05
encoder.set_dropouts(0, dropout)
decoder.set_dropouts(0, dropout)
char_encoder.set_dropouts(0, dropout)
trainer = dy.SimpleSGDTrainer(pc)

## TRAIN
### 200 epoch unless dev acc. on instruction above 0.58
dev_interaction = []
dev_instruction = []
for i in range(200):
    print('Epoch%d' % i)
    count = 0
    sum = 0
    batch_loss = []
    dy.renew_cg()
    previous = None
    first = True
    for sentence, output, env in zip(ins, act, env_int):
        if count % 5 != 0:
Ejemplo n.º 5
0
 def __init__(self, model, type, lrate, moment=None):
     self._tt = {
         "sgd": dy.SimpleSGDTrainer(model, lrate),
         "momentum": dy.MomentumSGDTrainer(model, lrate, moment),
         "adam": dy.AdamTrainer(model, lrate)
     }[type]
Ejemplo n.º 6
0
    def train_network(self, train_data, epochs = 3, dev_data = None, test_data = None):
        trainer = dy.SimpleSGDTrainer(self.pc,0.05)
        i = 0
        mloss = 0.
        goods = 0.
        loss = []
        dy.renew_cg()
 
        max_dev_acc = MIN_SAVE_ACC
        run_id = randint(0,9999)
        save_path = "{}{:04d}".format(SAVE_TO,run_id)
        report_path = "{}{:04d}.txt".format(SAVE_REPORT_TO,run_id)
        test_path = "{}{:04d}.txt".format(SAVE_TAGGED_TEST_TO,run_id)
        rprt = open(report_path,'wt')
        print report_path
        for e in range(epochs):
            shuffle(train_data)
            for x, y in train_data:
                i = i + 1
                loss = loss + [self.eval_loss(x, y, dropout=True)]
                good = y == self.last_case_class
                goods += int(good)
                if i % UPDATE_EVERY == 0:
                    losses = dy.esum(loss)
                    mloss += losses.value()
                    losses.backward()
                    trainer.update()
                    loss = []
                    dy.renew_cg()
    
                if i % EVALUATE_LOSS_EVERY == 1000:
                    goods_dev = 0.
                    j = 0
                    for d in dev_data or []:
                        dy.renew_cg()
                        j+=1
                        x, y = d
                        self.eval_loss(x, y)
                        goods_dev += 1 if y==self.last_case_class else 0
                    dev_acc = goods_dev / len(dev_data or 'a') 

                    message = "{} average loss after {} iterations: {} acc: {}".format(
                        now_string(), i, mloss/EVALUATE_LOSS_EVERY, goods/EVALUATE_LOSS_EVERY)
                    dev_acc_str = " dev acc: {}".format(dev_acc) if dev_data else ""
                    print(message + dev_acc_str)
                    rprt.write(message + dev_acc_str+'\n')
                    mloss = 0.
                    goods = 0.

                    if dev_acc > max_dev_acc and i > START_SAVE_AFTER:
                        max_dev_acc = dev_acc
                        print("saving.")
                        rprt.write("saving.\n")
                        self.save(save_path)
                        if test_data:
                            outf = open(test_path,'wt')
                            k = 0
                            goods_test = 0.
                            print("tagging test data.")
                            for dd in test_data:
                                dy.renew_cg()
                                k += 1
                                x, y = dd
                                self.eval_loss(x,y)
                                y_hat = self.last_case_class
                                goods_test += 1 if y == y_hat else 0
                                outf.write("{}{}{}\n".format(x, y, y_hat))
                            outf.close()
                            test_acc = goods_test / len(test_data)
                            print("accurcy on test: {}".format(test_acc))



                rprt.flush()
Ejemplo n.º 7
0
Archivo: dynmt.py Proyecto: ufwt/TraFix
def train_model(model, encoder, decoder, params, train_inputs, train_outputs,
                dev_inputs, dev_outputs, y2int, int2y, epochs, optimization,
                results_file_path, plot, batch_size, eval_after, min_epochs):
    print 'training...'
    sys.stdout.flush()

    np.random.seed(17)
    random.seed(17)

    # sort training sentences by length in descending order
    train_data = zip(train_inputs, train_outputs)
    train_data.sort(key=lambda t: -len(t[0]))
    train_order = [
        x * batch_size for x in range(len(train_data) / batch_size + 1)
    ]

    # sort dev sentences by length in descending order
    dev_batch_size = 1
    dev_data = zip(dev_inputs, dev_outputs)
    dev_data.sort(key=lambda t: -len(t[0]))
    dev_order = [
        x * dev_batch_size for x in range(len(dev_data) / dev_batch_size + 1)
    ]

    if optimization == 'ADAM':
        trainer = dn.AdamTrainer(
            model
        )  # lam=REGULARIZATION, alpha=LEARNING_RATE, beta_1=0.9, beta_2=0.999, eps=1e-8)
    elif optimization == 'MOMENTUM':
        trainer = dn.MomentumSGDTrainer(model)
    elif optimization == 'SGD':
        trainer = dn.SimpleSGDTrainer(model)
    elif optimization == 'ADAGRAD':
        trainer = dn.AdagradTrainer(model)
    elif optimization == 'ADADELTA':
        trainer = dn.AdadeltaTrainer(model)
    else:
        trainer = dn.SimpleSGDTrainer(model)

    trainer.set_clip_threshold(float(arguments['--grad-clip']))
    seen_examples_count = 0
    total_loss = 0
    best_dev_epoch = 0
    best_train_epoch = 0
    patience = 0
    train_len = len(train_outputs)
    dev_len = len(dev_inputs)
    avg_train_loss = -1
    train_loss_patience = 0
    train_loss_patience_threshold = 99999999
    max_patience = int(arguments['--max-patience'])
    log_path = results_file_path + '_log.txt'
    start_epoch, checkpoints_x, train_loss_y, dev_loss_y, dev_accuracy_y = read_from_log(
        log_path)

    if len(train_loss_y) > 0:
        total_batches = checkpoints_x[-1]
        best_avg_train_loss = max(train_loss_y)
        best_dev_accuracy = max(dev_accuracy_y)
        best_dev_loss = max(dev_loss_y)
    else:
        total_batches = 0
        best_avg_train_loss = 999999
        best_dev_loss = 999999
        best_dev_accuracy = 0

    # progress bar init
    # noinspection PyArgumentList
    # widgets = [progressbar.Bar('>'), ' ', progressbar.ETA()]
    # train_progress_bar = progressbar.ProgressBar(widgets=widgets, maxval=epochs).start()

    e = -1
    for e in xrange(start_epoch, epochs):
        try:
            # shuffle the batch start indices in each epoch
            random.shuffle(train_order)
            batches_per_epoch = len(train_order)
            start = time.time()

            # go through batches
            for i, batch_start_index in enumerate(train_order, start=1):
                # get batch examples
                batch_inputs = [
                    x[0]
                    for x in train_data[batch_start_index:batch_start_index +
                                        batch_size]
                ]
                batch_outputs = [
                    x[1]
                    for x in train_data[batch_start_index:batch_start_index +
                                        batch_size]
                ]
                actual_batch_size = len(batch_inputs)

                # skip empty batches
                if actual_batch_size == 0 or len(batch_inputs[0]) == 0:
                    continue

                # compute batch loss

                # debug prints for batch seq lengths
                # print 'batch {} seq lens'.format(i)
                # print [len(s) for s in batch_inputs]
                loss = compute_batch_loss(encoder, decoder, batch_inputs,
                                          batch_outputs, y2int)

                # forward pass
                total_loss += loss.scalar_value()
                loss.backward()

                total_batches += 1

                # update parameters
                trainer.update()

                seen_examples_count += actual_batch_size

                # avg loss per sample
                avg_train_loss = total_loss / float(i * batch_size +
                                                    e * train_len)

                # start patience counts only after 20 batches
                if avg_train_loss < best_avg_train_loss and total_batches > 20:
                    best_avg_train_loss = avg_train_loss
                    train_loss_patience = 0
                else:
                    train_loss_patience += 1
                    if train_loss_patience > train_loss_patience_threshold:
                        print 'train loss patience exceeded: {}'.format(
                            train_loss_patience)
                        sys.stdout.flush()
                        return model, params, e, best_dev_epoch

                if total_batches % 100 == 0 and total_batches > 0:
                    print 'epoch {}: {} batches out of {} ({} examples out of {}) total: {} batches, {} examples. avg \
	loss per example: {}'.format(e, i, batches_per_epoch, i * batch_size,
                              train_len, total_batches,
                              total_batches * batch_size, avg_train_loss)
                    sys.stdout.flush()

                    # print sentences per second
                    end = time.time()
                    elapsed_seconds = end - start
                    print '{} sentences per second'.format(
                        seen_examples_count / elapsed_seconds)
                    sys.stdout.flush()
                    seen_examples_count = 0
                    start = time.time()

                # checkpoint
                if total_batches % eval_after == 0:

                    print 'starting checkpoint evaluation'
                    sys.stdout.flush()
                    dev_bleu, dev_loss = checkpoint_eval(
                        encoder,
                        decoder,
                        params,
                        dev_batch_size,
                        dev_data,
                        dev_inputs,
                        dev_len,
                        dev_order,
                        dev_outputs,
                        int2y,
                        y2int,
                        results_file_path=results_file_path)

                    log_to_file(log_path, e, total_batches, avg_train_loss,
                                dev_loss, dev_bleu)
                    save_model(model,
                               results_file_path,
                               total_batches,
                               models_to_save=int(
                                   arguments['--models-to-save']))
                    if dev_bleu > best_dev_accuracy:
                        best_dev_accuracy = dev_bleu
                        best_dev_epoch = e

                        # save best model to disk
                        save_best_model(model, results_file_path)
                        print 'saved new best model'
                        sys.stdout.flush()
                        patience = 0
                    else:
                        patience += 1

                    if dev_loss < best_dev_loss:
                        best_dev_loss = dev_loss

                    print 'epoch: {0} train loss: {1:.4f} dev loss: {2:.4f} dev bleu: {3:.4f} \
	best dev bleu {4:.4f} (epoch {5}) patience = {6}'.format(
                        e, avg_train_loss, dev_loss, dev_bleu,
                        best_dev_accuracy, best_dev_epoch, patience)
                    sys.stdout.flush()

                    if (patience == max_patience) and (e >= min_epochs):
                        print 'out of patience after {0} checkpoints'.format(
                            str(e))
                        sys.stdout.flush()
                        # train_progress_bar.finish()
                        if plot:
                            plt.cla()
                        print 'checkpoint patience exceeded'
                        sys.stdout.flush()
                        return model, params, e, best_dev_epoch

                    # plotting results from checkpoint evaluation
                    if plot:
                        train_loss_y.append(avg_train_loss)
                        checkpoints_x.append(total_batches)
                        dev_accuracy_y.append(dev_bleu)
                        dev_loss_y.append(dev_loss)

                        y_vals = [('train_loss', train_loss_y),
                                  ('dev loss', dev_loss_y),
                                  ('dev_bleu', dev_accuracy_y)]
                        common.plot_to_file(y_vals,
                                            x_name='total batches',
                                            x_vals=checkpoints_x,
                                            file_path=results_file_path +
                                            '_learning_curve.png')
        except RuntimeError as exception:
            # sometimes the above two instructions fail due to memory allocation failure.
            # I was unable to find a fix for these failures.
            # perhaps we can just "skip" the failures.
            print 'WARNING: Skipping epoch due to RuntimeError (' + str(
                exception) + ')'
            sys.stdout.flush()

    # update progress bar after completing epoch
    # train_progress_bar.update(e)

    # update progress bar after completing training
    # train_progress_bar.finish()
    if plot:
        # clear plot when done
        plt.cla()
    print 'finished training. average loss: {} best epoch on dev: {} best epoch on train: {}'.format(
        str(avg_train_loss), best_dev_epoch, best_train_epoch)
    sys.stdout.flush()

    return model, params, e, best_dev_epoch