Beispiel #1
0
 def test_creation_saving_restoring(self):
     cfg = AAYNBaseConfig()
     cfg.encoder_tokenizer.tokenizer_name = 'yttm'
     cfg.encoder_tokenizer.tokenizer_model = 'tests/.data/yttm.4096.en-de.model'
     cfg.decoder_tokenizer.tokenizer_name = 'yttm'
     cfg.decoder_tokenizer.tokenizer_model = 'tests/.data/yttm.4096.en-de.model'
     cfg.train_ds = None
     cfg.validation_ds = None
     cfg.test_ds = None
     model = MTEncDecModel(cfg=cfg)
     assert isinstance(model, MTEncDecModel)
     # Create a new temporary directory
     with tempfile.TemporaryDirectory() as restore_folder:
         with tempfile.TemporaryDirectory() as save_folder:
             save_folder_path = save_folder
             # Where model will be saved
             model_save_path = os.path.join(
                 save_folder, f"{model.__class__.__name__}.nemo")
             model.save_to(save_path=model_save_path)
             # Where model will be restored from
             model_restore_path = os.path.join(
                 restore_folder, f"{model.__class__.__name__}.nemo")
             shutil.copy(model_save_path, model_restore_path)
         # at this point save_folder should not exist
         assert save_folder_path is not None and not os.path.exists(
             save_folder_path)
         assert not os.path.exists(model_save_path)
         assert os.path.exists(model_restore_path)
         # attempt to restore
         model_copy = model.__class__.restore_from(
             restore_path=model_restore_path)
         assert model.num_weights == model_copy.num_weights
Beispiel #2
0
 def test_creation_saving_restoring(self):
     model = MTEncDecModel(cfg=get_cfg())
     assert isinstance(model, MTEncDecModel)
     # Create a new temporary directory
     with tempfile.TemporaryDirectory() as restore_folder:
         with tempfile.TemporaryDirectory() as save_folder:
             save_folder_path = save_folder
             # Where model will be saved
             model_save_path = os.path.join(save_folder, f"{model.__class__.__name__}.nemo")
             model.save_to(save_path=model_save_path)
             # Where model will be restored from
             model_restore_path = os.path.join(restore_folder, f"{model.__class__.__name__}.nemo")
             shutil.copy(model_save_path, model_restore_path)
         # at this point save_folder should not exist
         assert save_folder_path is not None and not os.path.exists(save_folder_path)
         assert not os.path.exists(model_save_path)
         assert os.path.exists(model_restore_path)
         # attempt to restore
         model_copy = model.__class__.restore_from(restore_path=model_restore_path)
         assert model.num_weights == model_copy.num_weights
Beispiel #3
0
 def test_no_artifact_name_collision(self):
     model = MTEncDecModel(cfg=get_cfg())
     assert isinstance(model, MTEncDecModel)
     with tempfile.TemporaryDirectory() as tmpdir1:
         model.save_to("nmt_model.nemo")
         with tempfile.TemporaryDirectory() as tmpdir:
             model._save_restore_connector._unpack_nemo_file(
                 path2file="nmt_model.nemo", out_folder=tmpdir)
             conf = OmegaConf.load(os.path.join(tmpdir,
                                                "model_config.yaml"))
             # Make sure names now differ in saved config
             assert conf.encoder_tokenizer.tokenizer_model != conf.decoder_tokenizer.tokenizer_model
             # Make sure names in config start with "nemo:" prefix
             assert conf.encoder_tokenizer.tokenizer_model.startswith(
                 "nemo:")
             assert conf.decoder_tokenizer.tokenizer_model.startswith(
                 "nemo:")
             # Check if both tokenizers were included
             assert os.path.exists(
                 os.path.join(tmpdir,
                              conf.encoder_tokenizer.tokenizer_model[5:]))
             assert os.path.exists(
                 os.path.join(tmpdir,
                              conf.decoder_tokenizer.tokenizer_model[5:]))
Beispiel #4
0
    def test_train_eval_loss(self):
        cfg = get_cfg()
        cfg.label_smoothing = 0.1
        model = MTEncDecModel(cfg=cfg)
        assert isinstance(model, MTEncDecModel)
        batch_size = 10
        time = 32
        vocab_size = 32000
        tgt_ids = torch.LongTensor(batch_size, time).random_(1, model.decoder_tokenizer.vocab_size)
        logits = torch.FloatTensor(batch_size, time, vocab_size).random_(-1, 1)
        log_probs = torch.nn.functional.log_softmax(logits, dim=-1)
        train_loss = model.loss_fn(log_probs=log_probs, labels=tgt_ids)
        eval_loss = model.eval_loss_fn(log_probs=log_probs, labels=tgt_ids)
        assert not torch.allclose(train_loss, eval_loss)  # , (train_loss, eval_loss)

        cfg.label_smoothing = 0
        model = MTEncDecModel(cfg=cfg)
        # Train loss == val loss when label smoothing = 0
        train_loss = model.loss_fn(log_probs=log_probs, labels=tgt_ids)
        eval_loss = model.eval_loss_fn(log_probs=log_probs, labels=tgt_ids)
        assert torch.allclose(train_loss, eval_loss)
Beispiel #5
0
 def test_gpu_export_onnx(self):
     model = MTEncDecModel(cfg=get_cfg()).cuda()
     assert isinstance(model, MTEncDecModel)
     export_test(model, ".onnx")
Beispiel #6
0
 def test_cpu_export_onnx(self):
     model = MTEncDecModel(cfg=get_cfg())
     assert isinstance(model, MTEncDecModel)
     export_test(model, ".ts")