예제 #1
0
 def _test_pretrained_masked_lm_for_translation(self, learned_pos_emb,
                                                encoder_only):
     with contextlib.redirect_stdout(StringIO()):
         with tempfile.TemporaryDirectory("test_mlm") as data_dir:
             create_dummy_data(data_dir)
             preprocess_lm_data(data_dir)
             train_legacy_masked_language_model(
                 data_dir,
                 arch="masked_lm",
                 extra_args=('--encoder-learned-pos', )
                 if learned_pos_emb else ())
             with tempfile.TemporaryDirectory(
                     "test_mlm_translation") as translation_dir:
                 create_dummy_data(translation_dir)
                 preprocess_translation_data(
                     translation_dir, extra_flags=["--joined-dictionary"])
                 # Train transformer with data_dir/checkpoint_last.pt
                 train_translation_model(
                     translation_dir,
                     arch="transformer_from_pretrained_xlm",
                     extra_flags=[
                         "--decoder-layers",
                         "1",
                         "--decoder-embed-dim",
                         "32",
                         "--decoder-attention-heads",
                         "1",
                         "--decoder-ffn-embed-dim",
                         "32",
                         "--encoder-layers",
                         "1",
                         "--encoder-embed-dim",
                         "32",
                         "--encoder-attention-heads",
                         "1",
                         "--encoder-ffn-embed-dim",
                         "32",
                         "--pretrained-xlm-checkpoint",
                         "{}/checkpoint_last.pt".format(data_dir),
                         "--activation-fn",
                         "gelu",
                         "--max-source-positions",
                         "500",
                         "--max-target-positions",
                         "500",
                     ] +
                     (["--encoder-learned-pos", "--decoder-learned-pos"]
                      if learned_pos_emb else []) +
                     (['--init-encoder-only'] if encoder_only else []),
                     task="translation_from_pretrained_xlm",
                 )
예제 #2
0
 def test_lstm_lm_residuals(self):
     with contextlib.redirect_stdout(StringIO()):
         with tempfile.TemporaryDirectory('test_lstm_lm_residuals') as data_dir:
             create_dummy_data(data_dir)
             preprocess_lm_data(data_dir)
             train_language_model(
                 data_dir, 'lstm_lm', ['--add-bos-token', '--residuals'], run_validation=True,
             )
             eval_lm_main(data_dir)
             generate_main(data_dir, [
                 '--task', 'language_modeling',
                 '--sample-break-mode', 'eos',
                 '--tokens-per-sample', '500',
             ])
예제 #3
0
 def test_linformer_roberta_sentence_prediction(self):
     num_classes = 3
     with contextlib.redirect_stdout(StringIO()):
         with tempfile.TemporaryDirectory(
                 "test_linformer_roberta_head") as data_dir:
             create_dummy_roberta_head_data(data_dir,
                                            num_classes=num_classes)
             preprocess_lm_data(os.path.join(data_dir, 'input0'))
             preprocess_lm_data(os.path.join(data_dir, 'label'))
             train_roberta_head(
                 data_dir,
                 "linformer_roberta_base",
                 num_classes=num_classes,
                 extra_flags=["--user-dir", "examples/linformer/src"],
             )
예제 #4
0
 def test_linformer_roberta_masked_lm(self):
     with contextlib.redirect_stdout(StringIO()):
         with tempfile.TemporaryDirectory(
                 "test_linformer_roberta_mlm") as data_dir:
             create_dummy_data(data_dir)
             preprocess_lm_data(data_dir)
             train_masked_lm(
                 data_dir,
                 "linformer_roberta_base",
                 extra_flags=[
                     "--user-dir",
                     "examples/linformer/src",
                     "--encoder-layers",
                     "2",
                 ],
             )
예제 #5
0
 def test_fconv_lm(self):
     with contextlib.redirect_stdout(StringIO()):
         with tempfile.TemporaryDirectory('test_fconv_lm') as data_dir:
             create_dummy_data(data_dir)
             preprocess_lm_data(data_dir)
             train_language_model(data_dir, 'fconv_lm', [
                 '--decoder-layers', '[(850, 3)] * 2 + [(1024,4)]',
                 '--decoder-embed-dim', '280',
                 '--optimizer', 'nag',
                 '--lr', '0.1',
             ])
             eval_lm_main(data_dir)
             generate_main(data_dir, [
                 '--task', 'language_modeling',
                 '--sample-break-mode', 'eos',
                 '--tokens-per-sample', '500',
             ])
예제 #6
0
 def test_linformer_roberta_regression_multiple(self):
     num_classes = 3
     with contextlib.redirect_stdout(StringIO()):
         with tempfile.TemporaryDirectory(
                 "test_linformer_roberta_regression_multiple") as data_dir:
             create_dummy_roberta_head_data(data_dir,
                                            num_classes=num_classes,
                                            regression=True)
             preprocess_lm_data(os.path.join(data_dir, 'input0'))
             train_roberta_head(
                 data_dir,
                 "linformer_roberta_base",
                 num_classes=num_classes,
                 extra_flags=[
                     "--regression-target", "--user-dir",
                     "examples/linformer/src"
                 ],
             )
예제 #7
0
 def test_legacy_masked_lm(self):
     with contextlib.redirect_stdout(StringIO()):
         with tempfile.TemporaryDirectory("test_legacy_mlm") as data_dir:
             create_dummy_data(data_dir)
             preprocess_lm_data(data_dir)
             train_legacy_masked_language_model(data_dir, "masked_lm")
예제 #8
0
 def test_roberta_masked_lm(self):
     with contextlib.redirect_stdout(StringIO()):
         with tempfile.TemporaryDirectory("test_roberta_mlm") as data_dir:
             create_dummy_data(data_dir)
             preprocess_lm_data(data_dir)
             train_masked_lm(data_dir, "roberta_base")
예제 #9
0
    def _test_resume_multilingual_training(self,
                                           extra_clargs,
                                           arch="transformer_lm_gpt2_tiny"):
        languages = ["en_XX", "fr_XX", "zh_CN"]
        save_interval = 5
        mu = 10
        flags = (self.train_flags(mu) + [
            "--save-interval-updates",
            str(save_interval), "--log-interval", "1"
        ] + extra_clargs)
        with contextlib.redirect_stdout(StringIO()):
            with tempfile.TemporaryDirectory("test_fp16") as data_dir:
                log = os.path.join(data_dir, "train.log")
                create_dummy_data(
                    data_dir,
                    num_examples=int(
                        mu * 20 * self.world_size *
                        1.5),  # make sure enough data for max updates
                    languages=languages,
                )
                preprocess_lm_data(data_dir, languages)
                train_language_model(
                    data_dir,
                    arch,
                    flags + ["--log-file", log],
                    task="multilingual_language_modeling",
                    world_size=self.world_size,
                )
                log2 = os.path.join(data_dir, "resume.log")
                ckpt_name = f"checkpoint_1_{save_interval}.pt"
                restore_file = os.path.join(data_dir, ckpt_name)
                train_language_model(
                    data_dir,
                    arch,
                    flags + [
                        "--log-file", log2, "--restore-file", restore_file,
                        "--no-save"
                    ],
                    task="multilingual_language_modeling",
                    world_size=self.world_size,
                )

                l1 = self.parse_logs(log)
                assert (
                    int(l1[-1]["train_num_updates"]) == mu
                ), f"The first run did not complete {mu} updates. Add more data"
                l2 = self.parse_logs(log2)

                if int(l2[0]["num_updates"]) != save_interval + 1:
                    all_ckpt_files = [
                        x for x in os.listdir(data_dir) if x.endswith(".pt")
                    ]
                    import shutil

                    shutil.move(data_dir, "last_failed_resume")
                    raise AssertionError(
                        f"Likely failed to load {ckpt_name}. {all_ckpt_files} \n LOGS: {l1} \n\n {l2}. "
                    )
                for k in [
                        "train_loss",
                        "train_num_updates",
                        "train_ppl",
                        "train_gnorm",
                ]:
                    from_scratch, resumed = float(l1[-1][k]), float(l2[-1][k])
                    # This fails without rounding!
                    assert (from_scratch == resumed
                            ), f"difference at {k} {from_scratch} != {resumed}"