def generator_fn(x, y, shuffle=True, looping=True):
     return nmt_train_generator(x,
                                y,
                                de_vocab_size,
                                batch_size,
                                shuffle=shuffle,
                                looping=looping)
 def __init__(self, data, eval_every, batch_size, tar_vocab_size, encoder,
              decoder):
     self.src, self.tar = data
     self.generator = nmt_train_generator(self.src, self.tar,
                                          tar_vocab_size, batch_size)
     self.eval_every = eval_every
     self.batch_size = batch_size
     self.encoder = encoder
     self.decoder = decoder
        return Member(model,
                      param_names=['lr', 'dropout'],
                      tune_lr=True,
                      use_eval_metric='bleu',
                      custom_metrics={
                          'bleu':
                          lambda x, y, _: bleu_score_enc_dec(
                              encoder_model, decoder_model, x, y, batch_size)
                      })

    steps = en_train.shape[0] // batch_size
    steps_ready = 1000
    eval_every = 100

    generator_fn = lambda x, y, shuffle=True, looping=True: nmt_train_generator(
        x, y, de_vocab_size, batch_size, shuffle=shuffle, looping=looping)
    pbt = PbtPsoOptimizer(build_member,
                          population_size,
                          parameters,
                          steps_ready=steps_ready,
                          omega=0.5,
                          phi1=0.5,
                          phi2=1.0)
    model, results = pbt.train(en_train_t,
                               de_train_t,
                               en_train_v,
                               de_train_v,
                               steps=steps,
                               eval_every=eval_every,
                               generator_fn=generator_fn)
Ejemplo n.º 4
0
    # model parameters
    hidden_size = 96
    embedding_size = 100
    timesteps = 30

    # hyperparameters
    lr = 0.001
    dropout = 0.2

    model, encoder_model, decoder_model = define_nmt(hidden_size,
                                                     embedding_size, timesteps,
                                                     en_vocab_size,
                                                     de_vocab_size, dropout,
                                                     lr)

    train_generator = nmt_train_generator(en_train_t, de_train_t,
                                          de_vocab_size, batch_size)

    steps = en_train.shape[0] // batch_size
    eval_every = 100

    bleu_logger = BleuLogger((en_train_v, de_train_v), eval_every, batch_size,
                             de_vocab_size, encoder_model, decoder_model)

    model.fit_generator(train_generator,
                        steps_per_epoch=steps,
                        callbacks=[bleu_logger])
    model.save_weights('baseline.h5')

    results = pd.DataFrame({
        'loss': bleu_logger.losses,
        'bleu': bleu_logger.scores