Exemple #1
0
 def test_train_dev_loss_equal(self):
   layer_dim = 512
   batcher = SrcBatcher(batch_size=5, break_ties_randomly=False)
   train_args = {}
   train_args['src_file'] = "examples/data/head.ja"
   train_args['trg_file'] = "examples/data/head.en"
   train_args['loss_calculator'] = AutoRegressiveMLELoss()
   train_args['model'] = DefaultTranslator(src_reader=PlainTextReader(),
                                           trg_reader=PlainTextReader(),
                                           src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
                                           encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim),
                                           attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim,
                                                                hidden_dim=layer_dim),
                                           trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
                                           decoder=AutoRegressiveDecoder(input_dim=layer_dim,
                                                                     trg_embed_dim=layer_dim,
                                                                     rnn=UniLSTMSeqTransducer(input_dim=layer_dim,
                                                                                                    hidden_dim=layer_dim,
                                                                                                    decoder_input_dim=layer_dim,
                                                                                                    yaml_path="model.decoder.rnn"),
                                                                     transform=NonLinear(input_dim=layer_dim*2, output_dim=layer_dim),
                                                                     scorer=Softmax(input_dim=layer_dim, vocab_size=100),
                                                                     bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)),
                                           )
   train_args['dev_tasks'] = [LossEvalTask(model=train_args['model'],
                                           src_file="examples/data/head.ja",
                                           ref_file="examples/data/head.en",
                                           batcher=batcher)]
   train_args['trainer'] = DummyTrainer()
   train_args['batcher'] = batcher
   train_args['run_for_epochs'] = 1
   training_regimen = xnmt.training_regimen.SimpleTrainingRegimen(**train_args)
   training_regimen.run_training(save_fct = lambda: None)
   self.assertAlmostEqual(training_regimen.train_loss_tracker.epoch_loss.sum_factors() / training_regimen.train_loss_tracker.epoch_words,
                          training_regimen.dev_loss_tracker.dev_score.loss, places=5)
Exemple #2
0
 def test_overfitting(self):
     layer_dim = 16
     batcher = SrcBatcher(batch_size=10, break_ties_randomly=False)
     train_args = {}
     train_args['src_file'] = "examples/data/head.ja"
     train_args['trg_file'] = "examples/data/head.en"
     train_args['loss_calculator'] = MLELoss()
     train_args['model'] = DefaultTranslator(
         src_reader=PlainTextReader(),
         trg_reader=PlainTextReader(),
         src_embedder=SimpleWordEmbedder(vocab_size=100, emb_dim=layer_dim),
         encoder=BiLSTMSeqTransducer(input_dim=layer_dim,
                                     hidden_dim=layer_dim),
         attender=MlpAttender(input_dim=layer_dim,
                              state_dim=layer_dim,
                              hidden_dim=layer_dim),
         trg_embedder=SimpleWordEmbedder(vocab_size=100, emb_dim=layer_dim),
         decoder=MlpSoftmaxDecoder(input_dim=layer_dim,
                                   trg_embed_dim=layer_dim,
                                   rnn_layer=UniLSTMSeqTransducer(
                                       input_dim=layer_dim,
                                       hidden_dim=layer_dim,
                                       decoder_input_dim=layer_dim,
                                       yaml_path="model.decoder.rnn_layer"),
                                   mlp_layer=MLP(
                                       input_dim=layer_dim,
                                       hidden_dim=layer_dim,
                                       decoder_rnn_dim=layer_dim,
                                       vocab_size=100,
                                       yaml_path="model.decoder.rnn_layer"),
                                   bridge=CopyBridge(dec_dim=layer_dim,
                                                     dec_layers=1)),
     )
     train_args['dev_tasks'] = [
         LossEvalTask(model=train_args['model'],
                      src_file="examples/data/head.ja",
                      ref_file="examples/data/head.en",
                      batcher=batcher)
     ]
     train_args['run_for_epochs'] = 1
     train_args['trainer'] = AdamTrainer(alpha=0.1)
     train_args['batcher'] = batcher
     training_regimen = xnmt.training_regimen.SimpleTrainingRegimen(
         **train_args)
     for _ in range(50):
         training_regimen.run_training(save_fct=lambda: None,
                                       update_weights=True)
     self.assertAlmostEqual(
         0.0,
         training_regimen.train_loss_tracker.epoch_loss.sum() /
         training_regimen.train_loss_tracker.epoch_words,
         places=2)
Exemple #3
0
 def test_train_dev_loss_equal(self):
     layer_dim = 512
     batcher = SrcBatcher(batch_size=5, break_ties_randomly=False)
     train_args = {}
     train_args['src_file'] = "examples/data/head.ja"
     train_args['trg_file'] = "examples/data/head.en"
     train_args['loss_calculator'] = LossCalculator()
     train_args['model'] = DefaultTranslator(
         src_reader=PlainTextReader(),
         trg_reader=PlainTextReader(),
         src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
         encoder=BiLSTMSeqTransducer(input_dim=layer_dim,
                                     hidden_dim=layer_dim),
         attender=MlpAttender(input_dim=layer_dim,
                              state_dim=layer_dim,
                              hidden_dim=layer_dim),
         trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100),
         decoder=MlpSoftmaxDecoder(input_dim=layer_dim,
                                   lstm_dim=layer_dim,
                                   mlp_hidden_dim=layer_dim,
                                   trg_embed_dim=layer_dim,
                                   vocab_size=100,
                                   bridge=CopyBridge(dec_layers=1,
                                                     dec_dim=layer_dim)),
     )
     train_args['dev_tasks'] = [
         LossEvalTask(model=train_args['model'],
                      src_file="examples/data/head.ja",
                      ref_file="examples/data/head.en",
                      batcher=batcher)
     ]
     train_args['trainer'] = None
     train_args['batcher'] = batcher
     train_args['run_for_epochs'] = 1
     training_regimen = xnmt.training_regimen.SimpleTrainingRegimen(
         **train_args)
     training_regimen.run_training(save_fct=lambda: None,
                                   update_weights=False)
     self.assertAlmostEqual(training_regimen.logger.epoch_loss.sum() /
                            training_regimen.logger.epoch_words,
                            training_regimen.logger.dev_score.loss,
                            places=5)
Exemple #4
0
 def test_overfitting(self):
     self.exp_global = ExpGlobal(
         dynet_param_collection=NonPersistentParamCollection(), dropout=0.0)
     self.exp_global.default_layer_dim = 16
     batcher = SrcBatcher(batch_size=10, break_ties_randomly=False)
     train_args = {}
     train_args['src_file'] = "examples/data/head.ja"
     train_args['trg_file'] = "examples/data/head.en"
     train_args['loss_calculator'] = LossCalculator()
     train_args['model'] = DefaultTranslator(
         src_reader=PlainTextReader(),
         trg_reader=PlainTextReader(),
         src_embedder=SimpleWordEmbedder(self.exp_global, vocab_size=100),
         encoder=BiLSTMSeqTransducer(self.exp_global),
         attender=MlpAttender(self.exp_global),
         trg_embedder=SimpleWordEmbedder(self.exp_global, vocab_size=100),
         decoder=MlpSoftmaxDecoder(self.exp_global,
                                   vocab_size=100,
                                   bridge=CopyBridge(
                                       exp_global=self.exp_global,
                                       dec_layers=1)),
     )
     train_args['dev_tasks'] = [
         LossEvalTask(model=train_args['model'],
                      src_file="examples/data/head.ja",
                      ref_file="examples/data/head.en",
                      batcher=batcher)
     ]
     train_args['run_for_epochs'] = 1
     train_args['trainer'] = AdamTrainer(self.exp_global, alpha=0.1)
     train_args['batcher'] = batcher
     training_regimen = xnmt.training_regimen.SimpleTrainingRegimen(
         exp_global=self.exp_global, **train_args)
     training_regimen.exp_global = self.exp_global
     for _ in range(50):
         training_regimen.run_training(save_fct=lambda: None,
                                       update_weights=True)
     self.assertAlmostEqual(0.0,
                            training_regimen.logger.epoch_loss.sum() /
                            training_regimen.logger.epoch_words,
                            places=2)
Exemple #5
0
 def test_overfitting(self):
     self.model_context = ModelContext()
     self.model_context.dynet_param_collection = PersistentParamCollection(
         "some_file", 1)
     self.model_context.default_layer_dim = 16
     train_args = {}
     training_corpus = BilingualTrainingCorpus(
         train_src="examples/data/head.ja",
         train_trg="examples/data/head.en",
         dev_src="examples/data/head.ja",
         dev_trg="examples/data/head.en")
     train_args['corpus_parser'] = BilingualCorpusParser(
         training_corpus=training_corpus,
         src_reader=PlainTextReader(),
         trg_reader=PlainTextReader())
     train_args['training_strategy'] = TrainingStrategy()
     train_args['model'] = DefaultTranslator(
         src_embedder=SimpleWordEmbedder(self.model_context,
                                         vocab_size=100),
         encoder=BiLSTMSeqTransducer(self.model_context),
         attender=MlpAttender(self.model_context),
         trg_embedder=SimpleWordEmbedder(self.model_context,
                                         vocab_size=100),
         decoder=MlpSoftmaxDecoder(self.model_context, vocab_size=100),
     )
     train_args['model_file'] = None
     train_args['save_num_checkpoints'] = 0
     train_args['trainer'] = AdamTrainer(self.model_context, alpha=0.1)
     train_args['batcher'] = SrcBatcher(batch_size=10,
                                        break_ties_randomly=False)
     training_regimen = xnmt.train.TrainingRegimen(
         yaml_context=self.model_context, **train_args)
     training_regimen.model_context = self.model_context
     for _ in range(50):
         training_regimen.one_epoch(update_weights=True)
     self.assertAlmostEqual(
         0.0,
         training_regimen.logger.epoch_loss.loss_values['loss'] /
         training_regimen.logger.epoch_words,
         places=2)
Exemple #6
0
 def test_train_dev_loss_equal(self):
     self.model_context = ModelContext()
     self.model_context.dynet_param_collection = NonPersistentParamCollection(
     )
     train_args = {}
     training_corpus = BilingualTrainingCorpus(
         train_src="examples/data/head.ja",
         train_trg="examples/data/head.en",
         dev_src="examples/data/head.ja",
         dev_trg="examples/data/head.en")
     train_args['corpus_parser'] = BilingualCorpusParser(
         training_corpus=training_corpus,
         src_reader=PlainTextReader(),
         trg_reader=PlainTextReader())
     train_args['loss_calculator'] = LossCalculator()
     train_args['model'] = DefaultTranslator(
         src_embedder=SimpleWordEmbedder(self.model_context,
                                         vocab_size=100),
         encoder=BiLSTMSeqTransducer(self.model_context),
         attender=MlpAttender(self.model_context),
         trg_embedder=SimpleWordEmbedder(self.model_context,
                                         vocab_size=100),
         decoder=MlpSoftmaxDecoder(self.model_context, vocab_size=100),
     )
     train_args['trainer'] = None
     train_args['batcher'] = SrcBatcher(batch_size=5,
                                        break_ties_randomly=False)
     train_args['run_for_epochs'] = 1
     training_regimen = xnmt.training_regimen.SimpleTrainingRegimen(
         yaml_context=self.model_context, **train_args)
     training_regimen.model_context = self.model_context
     training_regimen.run_training(update_weights=False)
     self.assertAlmostEqual(
         training_regimen.logger.epoch_loss.loss_values['loss'] /
         training_regimen.logger.epoch_words,
         training_regimen.logger.dev_score.loss)
Exemple #7
0
EXP_DIR = os.path.dirname(__file__)
EXP = "programmatic"

model_file = f"{EXP_DIR}/models/{EXP}.mod"
log_file = f"{EXP_DIR}/logs/{EXP}.log"

xnmt.tee.set_out_file(log_file)

ParamManager.init_param_col()
ParamManager.param_col.model_file = model_file

src_vocab = Vocab(vocab_file="examples/data/head.ja.vocab")
trg_vocab = Vocab(vocab_file="examples/data/head.en.vocab")

batcher = SrcBatcher(batch_size=64)

inference = SimpleInference(batcher=batcher)

layer_dim = 512

model = DefaultTranslator(
    src_reader=PlainTextReader(vocab=src_vocab),
    trg_reader=PlainTextReader(vocab=trg_vocab),
    src_embedder=SimpleWordEmbedder(emb_dim=layer_dim,
                                    vocab_size=len(src_vocab)),
    encoder=BiLSTMSeqTransducer(input_dim=layer_dim,
                                hidden_dim=layer_dim,
                                layers=1),
    attender=MlpAttender(hidden_dim=layer_dim,
                         state_dim=layer_dim,
Exemple #8
0
    def __init__(self,
                 yaml_context,
                 corpus_parser,
                 model,
                 glob={},
                 dev_every=0,
                 batcher=None,
                 loss_calculator=None,
                 pretrained_model_file="",
                 src_format="text",
                 run_for_epochs=None,
                 lr_decay=1.0,
                 lr_decay_times=3,
                 patience=1,
                 initial_patience=None,
                 dev_metrics="",
                 schedule_metric="loss",
                 restart_trainer=False,
                 reload_command=None,
                 name=None,
                 inference=None):
        """
    :param yaml_context:
    :param corpus_parser: an input.InputReader object
    :param model: a generator.GeneratorModel object
    :param dev_every (int): dev checkpoints every n sentences (0 for only after epoch)
    :param batcher: Type of batcher. Defaults to SrcBatcher of batch size 32.
    :param loss_calculator:
    :param pretrained_model_file: Path of pre-trained model file
    :param src_format: Format of input data: text/contvec
    :param lr_decay (float):
    :param lr_decay_times (int):  Early stopping after decaying learning rate a certain number of times
    :param patience (int): apply LR decay after dev scores haven't improved over this many checkpoints
    :param initial_patience (int): if given, allows adjusting patience for the first LR decay
    :param dev_metrics: Comma-separated list of evaluation metrics (bleu/wer/cer)
    :param schedule_metric: determine learning schedule based on this dev_metric (loss/bleu/wer/cer)
    :param restart_trainer: Restart trainer (useful for Adam) and revert weights to best dev checkpoint when applying LR decay (https://arxiv.org/pdf/1706.09733.pdf)
    :param reload_command: Command to change the input data after each epoch.
                           --epoch EPOCH_NUM will be appended to the command.
                           To just reload the data after each epoch set the command to 'true'.
    :param name: will be prepended to log outputs if given
    :param inference: used for inference during dev checkpoints if dev_metrics are specified
    """
        assert yaml_context is not None
        self.yaml_context = yaml_context
        self.model_file = self.yaml_context.dynet_param_collection.model_file
        self.yaml_serializer = YamlSerializer()

        if lr_decay > 1.0 or lr_decay <= 0.0:
            raise RuntimeError(
                "illegal lr_decay, must satisfy: 0.0 < lr_decay <= 1.0")
        self.lr_decay = lr_decay
        self.patience = patience
        self.initial_patience = initial_patience
        self.lr_decay_times = lr_decay_times
        self.restart_trainer = restart_trainer
        self.run_for_epochs = run_for_epochs

        self.early_stopping_reached = False
        # training state
        self.training_state = TrainingState()

        self.evaluators = [
            s.lower() for s in dev_metrics.split(",") if s.strip() != ""
        ]
        if schedule_metric.lower() not in self.evaluators:
            self.evaluators.append(schedule_metric.lower())
        if "loss" not in self.evaluators: self.evaluators.append("loss")
        if dev_metrics:
            self.inference = inference or SimpleInference()

        self.reload_command = reload_command
        if reload_command is not None:
            self._augmentation_handle = None
            self._augment_data_initial()

        self.model = model
        self.corpus_parser = corpus_parser
        self.loss_calculator = loss_calculator or LossCalculator(MLELoss())
        self.pretrained_model_file = pretrained_model_file
        if self.pretrained_model_file:
            self.yaml_context.dynet_param_collection.load_from_data_file(
                self.pretrained_model_file + '.data')

        self.batcher = batcher or SrcBatcher(32)
        if src_format == "contvec":
            self.batcher.pad_token = np.zeros(self.model.src_embedder.emb_dim)
        self.pack_batches()
        self.logger = BatchLossTracker(self, dev_every, name)

        self.schedule_metric = schedule_metric.lower()