예제 #1
0
def init_model():
    characters = list("0123456789abcd ")
    characters.append("<EOS>")
    global int2char
    int2char = list(characters)
    global char2int
    char2int = {c: i for i, c in enumerate(characters)}
    global VOCAB_SIZE
    VOCAB_SIZE = len(characters)

    dyparams = dy.DynetParams()
    dyparams.set_random_seed(666)
    dyparams.init()

    pc = dy.ParameterCollection()
    lstm = dy.LSTMBuilder(LAYERS, INPUT_DIM, HIDDEN_DIM, pc)
    params = {}
    params["lookup"] = pc.add_lookup_parameters((VOCAB_SIZE, INPUT_DIM))
    params["R"] = pc.add_parameters((LSTM_OUTPUT_SIZE, HIDDEN_DIM))
    params["bias"] = pc.add_parameters((LSTM_OUTPUT_SIZE))
    params["w1"] = pc.add_parameters((N1, LSTM_OUTPUT_SIZE))
    params["w2"] = pc.add_parameters((MLP_OUTPUT_SIZE, N1))
    params["b1"] = pc.add_parameters((N1))
    params["b2"] = pc.add_parameters((MLP_OUTPUT_SIZE))

    trainer = dy.RMSPropTrainer(pc)
    return (lstm, params, pc), trainer
예제 #2
0
def get_trainer(opt, s2s):
    if opt.trainer == 'sgd':
        trainer = dy.SimpleSGDTrainer(s2s.pc,
                                      e0=opt.learning_rate,
                                      edecay=opt.learning_rate_decay)
    elif opt.trainer == 'clr':
        trainer = dy.CyclicalSGDTrainer(s2s.pc,
                                        e0_min=opt.learning_rate / 10.0,
                                        e0_max=opt.learning_rate,
                                        edecay=opt.learning_rate_decay)
    elif opt.trainer == 'momentum':
        trainer = dy.MomentumSGDTrainer(s2s.pc,
                                        e0=opt.learning_rate,
                                        edecay=opt.learning_rate_decay)
    elif opt.trainer == 'rmsprop':
        trainer = dy.RMSPropTrainer(s2s.pc,
                                    e0=opt.learning_rate,
                                    edecay=opt.learning_rate_decay)
    elif opt.trainer == 'adam':
        trainer = dy.AdamTrainer(s2s.pc,
                                 opt.learning_rate,
                                 edecay=opt.learning_rate_decay)
    else:
        print('Trainer name invalid or not provided, using SGD',
              file=sys.stderr)
        trainer = dy.SimpleSGDTrainer(s2s.pc,
                                      e0=opt.learning_rate,
                                      edecay=opt.learning_rate_decay)

    trainer.set_clip_threshold(opt.gradient_clip)

    return trainer
예제 #3
0
    def _init_optimizer(self, model, **kwargs):
        mom = float(kwargs.get('mom', 0.0))
        optim = kwargs.get('optim', 'sgd')
        clip = kwargs.get('clip')

        self.current_lr = kwargs.get('eta', kwargs.get('lr', 0.01))
        if optim == 'adadelta':
            self.optimizer = dy.AdadeltaTrainer(model.pc)
        elif optim == 'adam':
            self.optimizer = dy.AdamTrainer(model.pc,
                                            alpha=self.current_lr,
                                            beta_1=kwargs.get('beta1', 0.9),
                                            beta_2=kwargs.get('beta2', 0.999),
                                            eps=kwargs.get('epsilon', 1e-8))
        elif optim == 'rmsprop':
            self.optimizer = dy.RMSPropTrainer(model.pc,
                                               learning_rate=self.current_lr)
        else:
            if mom == 0 or mom is None:
                self.optimizer = dy.SimpleSGDTrainer(
                    model.pc, learning_rate=self.current_lr)
            else:
                logging.info('Using mom %f', mom)
                self.optimizer = dy.MomentumSGDTrainer(
                    model.pc, learning_rate=self.current_lr, mom=mom)
        if clip is not None:
            self.optimizer.set_clip_threshold(clip)
        self.optimizer.set_sparse_updates(False)
예제 #4
0
    def add_parameters(self,
                       dropout,
                       lstm_size,
                       optimizer,
                       model_type,
                       gru=True):

        if model_type == "gru":
            self.encoder_rnn = dy.GRUBuilder(NUM_LAYERS, EMBEDDING_SIZE,
                                             lstm_size, self.model)
            self.encoder_rnn.set_dropout(dropout)
            self.encoder_rnn2 = dy.GRUBuilder(NUM_LAYERS, EMBEDDING_SIZE,
                                              lstm_size, self.model)
            self.encoder_rnn2.set_dropout(dropout)
            self.decoder_rnn = dy.GRUBuilder(NUM_LAYERS,
                                             EMBEDDING_SIZE + lstm_size,
                                             lstm_size, self.model)
            self.decoder_rnn.set_dropout(dropout)
        else:

            self.encoder_rnn = dy.LSTMBuilder(NUM_LAYERS, EMBEDDING_SIZE,
                                              lstm_size, self.model)
            self.encoder_rnn.set_dropout(dropout)
            self.encoder_rnn2 = dy.LSTMBuilder(NUM_LAYERS, EMBEDDING_SIZE,
                                               lstm_size, self.model)
            self.encoder_rnn2.set_dropout(dropout)
            self.decoder_rnn = dy.LSTMBuilder(NUM_LAYERS,
                                              EMBEDDING_SIZE + lstm_size,
                                              lstm_size, self.model)
            self.decoder_rnn.set_dropout(dropout)

        global DROPOUT
        DROPOUT = dropout

        self.W1 = self.model.add_parameters((200, lstm_size))
        self.b1 = self.model.add_parameters((200, 1))
        self.W2 = self.model.add_parameters((100, 200))
        self.b2 = self.model.add_parameters((100, 1))
        self.W3 = self.model.add_parameters((len(self.C2I), 100))
        self.b3 = self.model.add_parameters((len(self.C2I), 1))
        self.W_query = self.model.add_parameters((lstm_size, lstm_size))
        self.W_key = self.model.add_parameters((lstm_size, lstm_size))
        self.W_val = self.model.add_parameters((lstm_size, lstm_size))
        self.W_att = self.model.add_parameters((1, EMBEDDING_SIZE))
        self.W_c_s = self.model.add_parameters((lstm_size, EMBEDDING_SIZE))
        self.W_direct = self.model.add_parameters((len(self.C2I), lstm_size))
        self.b_att = self.model.add_parameters((lstm_size, 1))
        self.b_direct = self.model.add_parameters((len(self.C2I), 1))
        self.E_lang = self.model.add_lookup_parameters((7, EMBEDDING_SIZE))

        if optimizer == "sgd":
            self.trainer = dy.SimpleSGDTrainer(self.model)
        elif optimizer == "rms":
            self.trainer = dy.RMSPropTrainer(self.model)
        if optimizer == "cyclic":
            self.trainer = dy.CyclicalSGDTrainer(self.model)
        elif optimizer == "adam":
            self.trainer = dy.AdamTrainer(self.model)
        else:
            self.trainer = dy.AdagradTrainer(self.model)
예제 #5
0
def training(model,
             train_set,
             val_set,
             val_ints,
             args,
             fsa_builder=None):
    """ Training function given a training and validation set.

    Inputs:
        train_set (list of examples): List of training examples.
        val_set (list of examples): List of validation examples.
        fsa_builder (ExecutableFSA): Builder for the FSA.
    """
    trainer = dy.RMSPropTrainer(model.get_params())
    trainer.set_clip_threshold(1)

    best_val_accuracy = 0.0
    best_token_accuracy = 0.0
    best_model = None
    patience = args.patience
    countdown = patience

    for epoch in range(args.max_epochs):
        token_acc, int_acc = do_one_epoch(model, train_set, val_set, val_ints, fsa_builder, args, epoch, trainer)

        # Save model.
        model_file_name = '%s/model-epoch%d.dy' % (args.logdir, epoch)
        model.save_params(model_file_name)

        # Stopping.
        if token_acc > best_token_accuracy:
            best_token_accuracy = token_acc
            patience *= 1.005
            countdown = patience
            print('Validation token accuracy increased to ' + str(token_acc))
            print('Countdown reset and patience set to %f' % (patience))
        else:
            countdown -= 1

        if int_acc > best_val_accuracy or best_model is None:
            best_model = model_file_name
            best_val_accuracy = int_acc
            print('Interaction accuracy increased to ' + str(int_acc))

        if countdown <= 0:
            print('Patience ran out -- stopping')
            break

    print('Loading parameters from best model: %s' % (best_model))
    model.load_params(best_model)
예제 #6
0
 def set_trainer(self, optimization):
     if optimization == 'MomentumSGD':
         self.trainer = dy.MomentumSGDTrainer(
             self.model, learning_rate=self.hp.learning_rate)
     if optimization == 'CyclicalSGD':
         self.trainer = dy.CyclicalSGDTrainer(
             self.model,
             learning_rate_max=self.hp.learning_rate_max,
             learning_rate_min=self.hp.learning_rate_min)
     if optimization == 'Adam':
         self.trainer = dy.AdamTrainer(self.model)
     if optimization == 'RMSProp':
         self.trainer = dy.RMSPropTrainer(self.model)
     else:  # 'SimpleSGD'
         self.trainer = dy.SimpleSGDTrainer(
             self.model, learning_rate=self.hp.learning_rate)
예제 #7
0
def train(args, builder, params):
    trainer = dynet.RMSPropTrainer(params, args.learning_rate)
    trainer.set_clip_threshold(args.clip_threshold)
    for group_no in range(args.iterations):
        print('batch group #%d...' % (group_no + 1))
        batch_group_loss = 0.0
        for batch_no in range(args.batch_group_size):
            # Sample a new batch of training data
            length = random.randint(*args.training_length_range)
            batch = [
                random_sequence(length, args.source_alphabet_size)
                for i in range(args.batch_size)
            ]
            # Arrange the input and output halves of the sequences into batches
            # of individual symbols
            input_sequence_batch = transpose(s.input_sequence() for s in batch)
            output_sequence_batch = transpose(s.output_sequence()
                                              for s in batch)
            # Start building the computation graph for this batch
            dynet.renew_cg()
            state = builder.initial_state(args.batch_size)
            # Feed everything up to the separator symbol into the model; ignore
            # outputs
            for symbol_batch in input_sequence_batch:
                index_batch = [input_symbol_to_index(s) for s in symbol_batch]
                state = state.next(index_batch, StackLSTMBuilder.INPUT_MODE)
            # Feed the rest of the sequence into the model and sum up the loss
            # over the predicted symbols
            symbol_losses = []
            for symbol_batch in output_sequence_batch:
                index_batch = [output_symbol_to_index(s) for s in symbol_batch]
                symbol_loss = dynet.pickneglogsoftmax_batch(
                    state.output(), index_batch)
                symbol_losses.append(symbol_loss)
                state = state.next(index_batch, StackLSTMBuilder.OUTPUT_MODE)
            loss = dynet.sum_batches(dynet.esum(symbol_losses))
            # Forward pass
            loss_value = loss.value()
            batch_group_loss += loss_value
            # Backprop
            loss.backward()
            # Update parameters
            trainer.update()
        avg_loss = batch_group_loss / (args.batch_size * args.batch_group_size)
        print('  average loss: %0.2f' % avg_loss)
예제 #8
0
파일: train.py 프로젝트: aiedward/baseline
 def __init__(self, model, optim='sgd', clip=5, mom=0.9, **kwargs):
     super(ClassifyTrainerDynet, self).__init__()
     self.model = model
     eta = kwargs.get('eta', kwargs.get('lr', 0.01))
     print("Using eta [{:.4f}]".format(eta))
     print("Using optim [{}]".format(optim))
     self.labels = model.labels
     if optim == 'adadelta':
         self.optimizer = dy.AdadeltaTrainer(model.pc)
     elif optim == 'adam':
         self.optimizer = dy.AdamTrainer(model.pc)
     elif optim == 'rmsprop':
         self.optimizer = dy.RMSPropTrainer(model.pc, learning_rate=eta)
     else:
         print("using mom {:.3f}".format(mom))
         self.optimizer = dy.MomentumSGDTrainer(model.pc,
                                                learning_rate=eta,
                                                mom=mom)
     self.optimizer.set_clip_threshold(clip)
예제 #9
0
def optimizer(model, optim='sgd', eta=0.01, clip=None, mom=0.9, **kwargs):
    if 'lr' in kwargs:
        eta = kwargs['lr']
    print('Using eta [{:.4f}]'.format(eta))
    print('Using optim [{}]'.format(optim))
    if optim == 'adadelta':
        opt = dy.AdadeltaTrainer(model.pc)
    elif optim == 'adam':
        opt = dy.AdamTrainer(model.pc)
    elif optim == 'rmsprop':
        opt = dy.RMSPropTrainer(model.pc, learning_rate=eta)
    else:
        if mom == 0 or mom is None:
            opt = dy.SimpleSGDTrainer(model.pc, learning_rate=eta)
        else:
            print('Using mom {:.3f}'.format(mom))
            opt = dy.MomentumSGDTrainer(model.pc, learning_rate=eta, mom=mom)
    if clip is not None:
        opt.set_clip_threshold(clip)
    opt.set_sparse_updates(False)
    return opt
예제 #10
0
def mse_loss(predictions, target):
    diff = predictions - target
    square = dy.square(diff)
    mean = dy.mean_elems(square)

    return mean


m = dy.ParameterCollection()

W = m.add_parameters((1, 1))
b = m.add_parameters((1, ))

dy.renew_cg()

optimizer = dy.RMSPropTrainer(m)

BATCH_SIZE = 250
EPOCHS = 20000
TARGET_UPDATE = 1

x, y = generate_data()

# Training loop
losses = list()
for epoch in range(EPOCHS):
    # Sample a minibatch
    indices = random.choice(5000, BATCH_SIZE, False)
    mb_x, mb_y = x[indices], y[indices]

    mb_x = x
예제 #11
0
def reinforcement_learning(model,
                           train_set,
                           val_set,
                           val_interactions,
                           log_dir,
                           fsa_builder,
                           reward_fn,
                           entropy_function,
                           args,
                           batch_size=1,
                           epochs=20,
                           single_head=True,
                           explore_with_fsa=False):
    """Performs training with exploration.

    Inputs:
        model (Model): Model to train.
        train_set (list of Examples): The set of training examples.
        val_set (list of Examples): The set of validation examples.
        val_interactions (list of Interactions): Full interactions for validation.
        log_dir (str): Location to log.

    """
    trainer = dy.RMSPropTrainer(model.get_params())
    trainer.set_clip_threshold(1)

    mode = get_rl_mode(args.rl_mode)

    best_val_accuracy = 0.0
    best_val_reward = -float('inf')
    best_model = None

    try:
        from pycrayon import CrayonClient
        crayon = CrayonClient(hostname="localhost")
        experiment = crayon.create_experiment(log_dir)
    except ValueError or ImportError:
        print(
            "If you want to use Crayon, please use `pip install pycrayon` to install it. "
        )
        experiment = None

    num_batches = 0
    train_file = open(os.path.join(log_dir, "train.log"), "w")

    patience = args.patience
    countdown = patience

    for epoch in range(epochs):
        random.shuffle(train_set)
        batches = chunks(train_set, batch_size)

        num_examples = 0
        num_tokens = 0
        num_tokens_zero = 0
        progbar = progressbar.ProgressBar(maxval=len(batches),
                                          widgets=[
                                              "Epoch " + str(epoch),
                                              progressbar.Bar('=', '[', ']'),
                                              ' ',
                                              progressbar.Percentage(), ' ',
                                              progressbar.ETA()
                                          ])
        progbar.start()
        for i, batch in enumerate(batches):
            dy.renew_cg()

            prob_seqs, predictions = model.sample_sequences(
                batch,
                length=args.sample_length_limit,
                training=True,
                fsa_builder=fsa_builder)

            batch_entropy_sum = dy.inputTensor([0.])
            batch_rewards = []
            processed_predictions = []

            train_file.write("--- NEW BATCH # " + str(num_batches) + " ---\n")
            action_probabilities = {}
            for action in model.output_action_vocabulary:
                if action != BEG:
                    action_probabilities[action] = []

            for example, prob_seq, prediction in zip(batch, prob_seqs,
                                                     predictions):
                # Get reward (and other evaluation information)
                prediction = process_example(example, prediction, prob_seq,
                                             reward_fn, entropy_function,
                                             model, args, fsa_builder)
                for distribution in prob_seq:
                    action_probability = model.action_probabilities(
                        distribution)
                    for action, prob_exp in action_probability.items():
                        action_probabilities[action].append(prob_exp)

                batch_rewards.extend(prediction.reward_expressions)
                batch_entropy_sum += dy.esum(prediction.entropies)
                processed_predictions.append(prediction)

                num_examples += 1

            # Now backpropagate given these rewards
            batch_action_probabilities = {}
            for action, prob_exps in action_probabilities.items():
                batch_action_probabilities[action] = dy.esum(prob_exps) / len(
                    batch_rewards)

            num_reward_exps = len(batch_rewards)
            loss = dy.esum(batch_rewards)
            if args.entropy_coefficient > 0:
                loss += args.entropy_coefficient * batch_entropy_sum
            loss = -loss / num_reward_exps
            loss.backward()
            try:
                trainer.update()
            except RuntimeError as r:
                print(loss.npvalue())
                for lookup_param in model._pc.lookup_parameters_list():
                    print(lookup_param.name())
                    print(lookup_param.grad_as_array())
                for param in model._pc.parameters_list():
                    print(param.name())
                    print(param.grad_as_array())
                print(r)
                exit()

            # Calculate metrics
            stop_tok = (EOS if single_head else (EOS, NO_ARG, NO_ARG))
            per_token_metrics = compute_metrics(processed_predictions,
                                                num_reward_exps,
                                                ["entropy", "reward"], args)
            gold_token_metrics = compute_metrics(
                processed_predictions,
                sum([len(ex.actions) for ex in batch]) + len(batch),
                ["gold_probability"],
                args,
                model=model)
            per_example_metrics = compute_metrics(
                processed_predictions,
                len(batch), [
                    "distance", "completion", "invalid", "num_tokens",
                    "prefix_length"
                ],
                args,
                model=model)

            for prediction in processed_predictions:
                train_file.write(str(prediction) + "\n")
            train_file.write("=====\n")
            log_metrics({"loss": loss.npvalue()[0]}, train_file, experiment,
                        num_batches)
            log_metrics(per_token_metrics, train_file, experiment, num_batches)
            log_metrics(gold_token_metrics, train_file, experiment,
                        num_batches)
            log_metrics(per_example_metrics, train_file, experiment,
                        num_batches)
            train_file.flush()

            num_batches += 1
            progbar.update(i)

        progbar.finish()
        train_acc, _, _ = utterance_accuracy(model,
                                             train_set,
                                             fsa_builder=fsa_builder,
                                             logfile=log_dir + "/rl-train" +
                                             str(epoch) + ".log")
        val_acc, val_reward, _ = utterance_accuracy(
            model,
            val_set,
            fsa_builder=fsa_builder,
            logfile=log_dir + "/rl-val-" + str(epoch) + ".log",
            args=args,
            reward_function=reward_fn)

        val_int_acc = interaction_accuracy(model,
                                           val_interactions,
                                           fsa_builder=fsa_builder,
                                           logfile=log_dir + "/rl-val-int-" +
                                           str(epoch) + ".log")

        log_metrics(
            {
                "train_accuracy": train_acc,
                "validation_accuracy": val_acc,
                "validation_int_acc": val_int_acc,
                "validation_reward": val_reward,
                "countdown": countdown
            }, train_file, experiment, num_batches)
        if experiment is not None:
            experiment.to_zip(
                os.path.join(log_dir, "crayon-" + str(epoch) + ".zip"))
        model_file_name = log_dir + "/model-rl-epoch" + str(epoch) + ".dy"
        model.save_params(model_file_name)
        if val_int_acc > best_val_accuracy or best_model is None:
            best_model = model_file_name
            best_val_accuracy = val_int_acc

        if val_reward > best_val_reward:
            patience *= 1.005
            countdown = patience
            best_val_reward = val_reward
        else:
            countdown -= 1
        if countdown <= 0:
            print("Patience ran out -- stopping")
            break
    train_file.close()
    print('Loading parameters from best model: %s' % (best_model))
    model.load_params(best_model)
    model.save_params(log_dir + "/best_rl_model.dy")
    print(train_set[0])
    print(
        model.generate(train_set[0].utterance, train_set[0].initial_state,
                       train_set[0].history)[0])
예제 #12
0
파일: run.py 프로젝트: StevenLOL/dynet-att
def train(opt):
    # Load data =========================================================
    if opt.verbose:
        print('Reading corpora')
    # Read vocabs
    if opt.dic_src:
        widss, ids2ws = data.load_dic(opt.dic_src)
    else:
        widss, ids2ws = data.read_dic(opt.train_src, max_size=opt.src_vocab_size)
        data.save_dic(opt.exp_name + '_src_dic.txt', widss)

    if opt.dic_dst:
        widst, ids2wt = data.load_dic(opt.dic_dst)
    else:
        widst, ids2wt = data.read_dic(opt.train_dst, max_size=opt.trg_vocab_size)
        data.save_dic(opt.exp_name + '_trg_dic.txt', widst)

    # Read training
    trainings_data = data.read_corpus(opt.train_src, widss)
    trainingt_data = data.read_corpus(opt.train_dst, widst)
    # Read validation
    valids_data = data.read_corpus(opt.valid_src, widss)
    validt_data = data.read_corpus(opt.valid_dst, widst)

    # Create model ======================================================
    if opt.verbose:
        print('Creating model')
        sys.stdout.flush()
    s2s = seq2seq.Seq2SeqModel(opt.emb_dim,
                               opt.hidden_dim,
                               opt.att_dim,
                               widss,
                               widst,
                               model_file=opt.model,
                               bidir=opt.bidir,
                               word_emb=opt.word_emb,
                               dropout=opt.dropout_rate,
                               max_len=opt.max_len)

    if s2s.model_file is not None:
        s2s.load()
    s2s.model_file = opt.exp_name+'_model.txt'
    # Trainer ==========================================================
    if opt.trainer == 'sgd':
        trainer = dy.SimpleSGDTrainer(
            s2s.model, e0=opt.learning_rate, edecay=opt.learning_rate_decay)
    if opt.trainer == 'clr':
        trainer = dy.CyclicalSGDTrainer(s2s.model, e0_min=opt.learning_rate / 10,
                                        e0_max=opt.learning_rate, edecay=opt.learning_rate_decay)
    elif opt.trainer == 'momentum':
        trainer = dy.MomentumSGDTrainer(
            s2s.model, e0=opt.learning_rate, edecay=opt.learning_rate_decay)
    elif opt.trainer == 'rmsprop':
        trainer = dy.RMSPropTrainer(s2s.model, e0=opt.learning_rate,
                                    edecay=opt.learning_rate_decay)
    elif opt.trainer == 'adam':
        trainer = dy.AdamTrainer(s2s.model, opt.learning_rate, edecay=opt.learning_rate_decay)
    else:
        print('Trainer name invalid or not provided, using SGD', file=sys.stderr)
        trainer = dy.SimpleSGDTrainer(
            s2s.model, e0=opt.learning_rate, edecay=opt.learning_rate_decay)
    if opt.verbose:
        print('Using '+opt.trainer+' optimizer')
    trainer.set_clip_threshold(opt.gradient_clip)
    # Print configuration ===============================================
    if opt.verbose:
        options.print_config(opt, src_dict_size=len(widss), trg_dict_size=len(widst))
        sys.stdout.flush()
    # Creat batch loaders ===============================================
    if opt.verbose:
        print('Creating batch loaders')
        sys.stdout.flush()
    trainbatchloader = data.BatchLoader(trainings_data, trainingt_data, opt.batch_size)
    devbatchloader = data.BatchLoader(valids_data, validt_data, opt.dev_batch_size)
    # Start training ====================================================
    if opt.verbose:
        print('starting training')
        sys.stdout.flush()
    start = time.time()
    train_loss = 0
    processed = 0
    best_bleu = 0
    i = 0
    for epoch in range(opt.num_epochs):
        for x, y in trainbatchloader:
            processed += sum(map(len, y))
            bsize = len(y)
            # Compute loss
            loss = s2s.calculate_loss(x, y)
            # Backward pass and parameter update
            loss.backward()
            trainer.update()
            train_loss += loss.scalar_value() * bsize
            if (i+1) % opt.check_train_error_every == 0:
                # Check average training error from time to time
                logloss = train_loss / processed
                ppl = np.exp(logloss)
                elapsed = time.time()-start
                trainer.status()
                print(" Training_loss=%f, ppl=%f, time=%f s, tokens processed=%d" %
                      (logloss, ppl, elapsed, processed))
                start = time.time()
                train_loss = 0
                processed = 0
                sys.stdout.flush()
            if (i+1) % opt.check_valid_error_every == 0:
                # Check generalization error on the validation set from time to time
                dev_loss = 0
                dev_processed = 0
                dev_start = time.time()
                for x, y in devbatchloader:
                    dev_processed += sum(map(len, y))
                    bsize = len(y)
                    loss = s2s.calculate_loss(x, y, test=True)
                    dev_loss += loss.scalar_value() * bsize
                dev_logloss = dev_loss/dev_processed
                dev_ppl = np.exp(dev_logloss)
                dev_elapsed = time.time()-dev_start
                print("[epoch %d] Dev loss=%f, ppl=%f, time=%f s, tokens processed=%d" %
                      (epoch, dev_logloss, dev_ppl, dev_elapsed, dev_processed))
                sys.stdout.flush()
                start = time.time()

            if (i+1) % opt.valid_bleu_every == 0:
                # Check BLEU score on the validation set from time to time
                print('Start translating validation set, buckle up!')
                sys.stdout.flush()
                bleu_start = time.time()
                with open(opt.valid_out, 'w+') as f:
                    for x in valids_data:
                        y_hat = s2s.translate(x, beam_size=opt.beam_size)
                        translation = [ids2wt[w] for w in y_hat[1:-1]]
                        print(' '.join(translation), file=f)
                bleu, details = evaluation.bleu_score(opt.valid_dst, opt.valid_out)
                bleu_elapsed = time.time()-bleu_start
                print('Finished translating validation set', bleu_elapsed, 'elapsed.')
                print(details)
                # Early stopping : save the latest best model
                if bleu > best_bleu:
                    best_bleu = bleu
                    print('Best BLEU score up to date, saving model to', s2s.model_file)
                    s2s.save()
                sys.stdout.flush()
                start = time.time()
            i = i+1
        trainer.update_epoch()
예제 #13
0
    def __init__(self,
                 word_count,
                 tag_count,
                 word_dims,
                 tag_dims,
                 lstm_units,
                 hidden_units,
                 struct_out,
                 label_out,
                 droprate=0,
                 struct_spans=4,
                 label_spans=3,
                 optimizer=1):

        self.word_count = word_count
        self.tag_count = tag_count
        self.word_dims = word_dims
        self.tag_dims = tag_dims
        self.lstm_units = lstm_units
        self.hidden_units = hidden_units
        self.struct_out = struct_out
        self.label_out = label_out

        self.droprate = droprate

        self.model = dynet.Model()

        if optimizer == 1:
            self.trainer = dynet.SimpleSGDTrainer(self.model)
        elif optimizer == 2:
            self.trainer = dynet.MomentumSGDTrainer(self.model)
        elif optimizer == 3:
            self.trainer = dynet.AdagradTrainer(self.model,
                                                learning_rate=0.01,
                                                eps=0.001)
        elif optimizer == 4:
            self.trainer = dynet.RMSPropTrainer(self.model)
        elif optimizer == 5:
            self.trainer = dynet.AdamTrainer(self.model)
        random.seed(1)

        self.activation = dynet.rectify

        self.word_embed = self.model.add_lookup_parameters(
            (word_count, word_dims), )
        self.tag_embed = self.model.add_lookup_parameters(
            (tag_count, tag_dims), )

        self.fwd_lstm1 = LSTM(word_dims + tag_dims, lstm_units, self.model)
        self.back_lstm1 = LSTM(word_dims + tag_dims, lstm_units, self.model)

        self.fwd_lstm2 = LSTM(2 * lstm_units, lstm_units, self.model)
        self.back_lstm2 = LSTM(2 * lstm_units, lstm_units, self.model)

        self.struct_hidden_W = self.model.add_parameters(
            (hidden_units, 4 * struct_spans * lstm_units),
            dynet.UniformInitializer(0.01),
        )
        self.struct_hidden_b = self.model.add_parameters(
            (hidden_units, ),
            dynet.ConstInitializer(0),
        )
        self.struct_output_W = self.model.add_parameters(
            (struct_out, hidden_units),
            dynet.ConstInitializer(0),
        )
        self.struct_output_b = self.model.add_parameters(
            (struct_out, ),
            dynet.ConstInitializer(0),
        )

        self.label_hidden_W = self.model.add_parameters(
            (hidden_units, 4 * label_spans * lstm_units),
            dynet.UniformInitializer(0.01),
        )
        self.label_hidden_b = self.model.add_parameters(
            (hidden_units, ),
            dynet.ConstInitializer(0),
        )
        self.label_output_W = self.model.add_parameters(
            (label_out, hidden_units),
            dynet.ConstInitializer(0),
        )
        self.label_output_b = self.model.add_parameters(
            (label_out, ),
            dynet.ConstInitializer(0),
        )