def test_train_dev_loss_equal(self): layer_dim = 512 batcher = SrcBatcher(batch_size=5, break_ties_randomly=False) train_args = {} train_args['src_file'] = "examples/data/head.ja" train_args['trg_file'] = "examples/data/head.en" train_args['loss_calculator'] = AutoRegressiveMLELoss() train_args['model'] = DefaultTranslator(src_reader=PlainTextReader(), trg_reader=PlainTextReader(), src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=AutoRegressiveDecoder(input_dim=layer_dim, trg_embed_dim=layer_dim, rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn"), transform=NonLinear(input_dim=layer_dim*2, output_dim=layer_dim), scorer=Softmax(input_dim=layer_dim, vocab_size=100), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) train_args['dev_tasks'] = [LossEvalTask(model=train_args['model'], src_file="examples/data/head.ja", ref_file="examples/data/head.en", batcher=batcher)] train_args['trainer'] = DummyTrainer() train_args['batcher'] = batcher train_args['run_for_epochs'] = 1 training_regimen = xnmt.training_regimen.SimpleTrainingRegimen(**train_args) training_regimen.run_training(save_fct = lambda: None) self.assertAlmostEqual(training_regimen.train_loss_tracker.epoch_loss.sum_factors() / training_regimen.train_loss_tracker.epoch_words, training_regimen.dev_loss_tracker.dev_score.loss, places=5)
def test_overfitting(self): layer_dim = 16 batcher = SrcBatcher(batch_size=10, break_ties_randomly=False) train_args = {} train_args['src_file'] = "examples/data/head.ja" train_args['trg_file'] = "examples/data/head.en" train_args['loss_calculator'] = MLELoss() train_args['model'] = DefaultTranslator( src_reader=PlainTextReader(), trg_reader=PlainTextReader(), src_embedder=SimpleWordEmbedder(vocab_size=100, emb_dim=layer_dim), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(vocab_size=100, emb_dim=layer_dim), decoder=MlpSoftmaxDecoder(input_dim=layer_dim, trg_embed_dim=layer_dim, rnn_layer=UniLSTMSeqTransducer( input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="model.decoder.rnn_layer"), mlp_layer=MLP( input_dim=layer_dim, hidden_dim=layer_dim, decoder_rnn_dim=layer_dim, vocab_size=100, yaml_path="model.decoder.rnn_layer"), bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) train_args['dev_tasks'] = [ LossEvalTask(model=train_args['model'], src_file="examples/data/head.ja", ref_file="examples/data/head.en", batcher=batcher) ] train_args['run_for_epochs'] = 1 train_args['trainer'] = AdamTrainer(alpha=0.1) train_args['batcher'] = batcher training_regimen = xnmt.training_regimen.SimpleTrainingRegimen( **train_args) for _ in range(50): training_regimen.run_training(save_fct=lambda: None, update_weights=True) self.assertAlmostEqual( 0.0, training_regimen.train_loss_tracker.epoch_loss.sum() / training_regimen.train_loss_tracker.epoch_words, places=2)
def test_train_dev_loss_equal(self): layer_dim = 512 batcher = SrcBatcher(batch_size=5, break_ties_randomly=False) train_args = {} train_args['src_file'] = "examples/data/head.ja" train_args['trg_file'] = "examples/data/head.en" train_args['loss_calculator'] = LossCalculator() train_args['model'] = DefaultTranslator( src_reader=PlainTextReader(), trg_reader=PlainTextReader(), src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=MlpSoftmaxDecoder(input_dim=layer_dim, lstm_dim=layer_dim, mlp_hidden_dim=layer_dim, trg_embed_dim=layer_dim, vocab_size=100, bridge=CopyBridge(dec_layers=1, dec_dim=layer_dim)), ) train_args['dev_tasks'] = [ LossEvalTask(model=train_args['model'], src_file="examples/data/head.ja", ref_file="examples/data/head.en", batcher=batcher) ] train_args['trainer'] = None train_args['batcher'] = batcher train_args['run_for_epochs'] = 1 training_regimen = xnmt.training_regimen.SimpleTrainingRegimen( **train_args) training_regimen.run_training(save_fct=lambda: None, update_weights=False) self.assertAlmostEqual(training_regimen.logger.epoch_loss.sum() / training_regimen.logger.epoch_words, training_regimen.logger.dev_score.loss, places=5)
def test_overfitting(self): self.exp_global = ExpGlobal( dynet_param_collection=NonPersistentParamCollection(), dropout=0.0) self.exp_global.default_layer_dim = 16 batcher = SrcBatcher(batch_size=10, break_ties_randomly=False) train_args = {} train_args['src_file'] = "examples/data/head.ja" train_args['trg_file'] = "examples/data/head.en" train_args['loss_calculator'] = LossCalculator() train_args['model'] = DefaultTranslator( src_reader=PlainTextReader(), trg_reader=PlainTextReader(), src_embedder=SimpleWordEmbedder(self.exp_global, vocab_size=100), encoder=BiLSTMSeqTransducer(self.exp_global), attender=MlpAttender(self.exp_global), trg_embedder=SimpleWordEmbedder(self.exp_global, vocab_size=100), decoder=MlpSoftmaxDecoder(self.exp_global, vocab_size=100, bridge=CopyBridge( exp_global=self.exp_global, dec_layers=1)), ) train_args['dev_tasks'] = [ LossEvalTask(model=train_args['model'], src_file="examples/data/head.ja", ref_file="examples/data/head.en", batcher=batcher) ] train_args['run_for_epochs'] = 1 train_args['trainer'] = AdamTrainer(self.exp_global, alpha=0.1) train_args['batcher'] = batcher training_regimen = xnmt.training_regimen.SimpleTrainingRegimen( exp_global=self.exp_global, **train_args) training_regimen.exp_global = self.exp_global for _ in range(50): training_regimen.run_training(save_fct=lambda: None, update_weights=True) self.assertAlmostEqual(0.0, training_regimen.logger.epoch_loss.sum() / training_regimen.logger.epoch_words, places=2)
trg_embed_dim=layer_dim, bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), inference=inference) train = SimpleTrainingRegimen( name=f"{EXP}", model=model, batcher=batcher, trainer=AdamTrainer(alpha=0.001), run_for_epochs=2, src_file="examples/data/head.ja", trg_file="examples/data/head.en", dev_tasks=[ LossEvalTask(src_file="examples/data/head.ja", ref_file="examples/data/head.en", model=model, batcher=batcher) ], ) evaluate = [ AccuracyEvalTask(eval_metrics="bleu,wer", src_file="examples/data/head.ja", ref_file="examples/data/head.en", hyp_file=f"examples/output/{EXP}.test_hyp", inference=inference, model=model) ] standard_experiment = Experiment(model=model, train=train, evaluate=evaluate)