def _test_pretrained_masked_lm_for_translation(self, learned_pos_emb, encoder_only): with contextlib.redirect_stdout(StringIO()): with tempfile.TemporaryDirectory("test_mlm") as data_dir: create_dummy_data(data_dir) preprocess_lm_data(data_dir) train_legacy_masked_language_model( data_dir, arch="masked_lm", extra_args=('--encoder-learned-pos', ) if learned_pos_emb else ()) with tempfile.TemporaryDirectory( "test_mlm_translation") as translation_dir: create_dummy_data(translation_dir) preprocess_translation_data( translation_dir, extra_flags=["--joined-dictionary"]) # Train transformer with data_dir/checkpoint_last.pt train_translation_model( translation_dir, arch="transformer_from_pretrained_xlm", extra_flags=[ "--decoder-layers", "1", "--decoder-embed-dim", "32", "--decoder-attention-heads", "1", "--decoder-ffn-embed-dim", "32", "--encoder-layers", "1", "--encoder-embed-dim", "32", "--encoder-attention-heads", "1", "--encoder-ffn-embed-dim", "32", "--pretrained-xlm-checkpoint", "{}/checkpoint_last.pt".format(data_dir), "--activation-fn", "gelu", "--max-source-positions", "500", "--max-target-positions", "500", ] + (["--encoder-learned-pos", "--decoder-learned-pos"] if learned_pos_emb else []) + (['--init-encoder-only'] if encoder_only else []), task="translation_from_pretrained_xlm", )
def test_lstm_lm_residuals(self): with contextlib.redirect_stdout(StringIO()): with tempfile.TemporaryDirectory('test_lstm_lm_residuals') as data_dir: create_dummy_data(data_dir) preprocess_lm_data(data_dir) train_language_model( data_dir, 'lstm_lm', ['--add-bos-token', '--residuals'], run_validation=True, ) eval_lm_main(data_dir) generate_main(data_dir, [ '--task', 'language_modeling', '--sample-break-mode', 'eos', '--tokens-per-sample', '500', ])
def test_linformer_roberta_sentence_prediction(self): num_classes = 3 with contextlib.redirect_stdout(StringIO()): with tempfile.TemporaryDirectory( "test_linformer_roberta_head") as data_dir: create_dummy_roberta_head_data(data_dir, num_classes=num_classes) preprocess_lm_data(os.path.join(data_dir, 'input0')) preprocess_lm_data(os.path.join(data_dir, 'label')) train_roberta_head( data_dir, "linformer_roberta_base", num_classes=num_classes, extra_flags=["--user-dir", "examples/linformer/src"], )
def test_linformer_roberta_masked_lm(self): with contextlib.redirect_stdout(StringIO()): with tempfile.TemporaryDirectory( "test_linformer_roberta_mlm") as data_dir: create_dummy_data(data_dir) preprocess_lm_data(data_dir) train_masked_lm( data_dir, "linformer_roberta_base", extra_flags=[ "--user-dir", "examples/linformer/src", "--encoder-layers", "2", ], )
def test_fconv_lm(self): with contextlib.redirect_stdout(StringIO()): with tempfile.TemporaryDirectory('test_fconv_lm') as data_dir: create_dummy_data(data_dir) preprocess_lm_data(data_dir) train_language_model(data_dir, 'fconv_lm', [ '--decoder-layers', '[(850, 3)] * 2 + [(1024,4)]', '--decoder-embed-dim', '280', '--optimizer', 'nag', '--lr', '0.1', ]) eval_lm_main(data_dir) generate_main(data_dir, [ '--task', 'language_modeling', '--sample-break-mode', 'eos', '--tokens-per-sample', '500', ])
def test_linformer_roberta_regression_multiple(self): num_classes = 3 with contextlib.redirect_stdout(StringIO()): with tempfile.TemporaryDirectory( "test_linformer_roberta_regression_multiple") as data_dir: create_dummy_roberta_head_data(data_dir, num_classes=num_classes, regression=True) preprocess_lm_data(os.path.join(data_dir, 'input0')) train_roberta_head( data_dir, "linformer_roberta_base", num_classes=num_classes, extra_flags=[ "--regression-target", "--user-dir", "examples/linformer/src" ], )
def test_legacy_masked_lm(self): with contextlib.redirect_stdout(StringIO()): with tempfile.TemporaryDirectory("test_legacy_mlm") as data_dir: create_dummy_data(data_dir) preprocess_lm_data(data_dir) train_legacy_masked_language_model(data_dir, "masked_lm")
def test_roberta_masked_lm(self): with contextlib.redirect_stdout(StringIO()): with tempfile.TemporaryDirectory("test_roberta_mlm") as data_dir: create_dummy_data(data_dir) preprocess_lm_data(data_dir) train_masked_lm(data_dir, "roberta_base")
def _test_resume_multilingual_training(self, extra_clargs, arch="transformer_lm_gpt2_tiny"): languages = ["en_XX", "fr_XX", "zh_CN"] save_interval = 5 mu = 10 flags = (self.train_flags(mu) + [ "--save-interval-updates", str(save_interval), "--log-interval", "1" ] + extra_clargs) with contextlib.redirect_stdout(StringIO()): with tempfile.TemporaryDirectory("test_fp16") as data_dir: log = os.path.join(data_dir, "train.log") create_dummy_data( data_dir, num_examples=int( mu * 20 * self.world_size * 1.5), # make sure enough data for max updates languages=languages, ) preprocess_lm_data(data_dir, languages) train_language_model( data_dir, arch, flags + ["--log-file", log], task="multilingual_language_modeling", world_size=self.world_size, ) log2 = os.path.join(data_dir, "resume.log") ckpt_name = f"checkpoint_1_{save_interval}.pt" restore_file = os.path.join(data_dir, ckpt_name) train_language_model( data_dir, arch, flags + [ "--log-file", log2, "--restore-file", restore_file, "--no-save" ], task="multilingual_language_modeling", world_size=self.world_size, ) l1 = self.parse_logs(log) assert ( int(l1[-1]["train_num_updates"]) == mu ), f"The first run did not complete {mu} updates. Add more data" l2 = self.parse_logs(log2) if int(l2[0]["num_updates"]) != save_interval + 1: all_ckpt_files = [ x for x in os.listdir(data_dir) if x.endswith(".pt") ] import shutil shutil.move(data_dir, "last_failed_resume") raise AssertionError( f"Likely failed to load {ckpt_name}. {all_ckpt_files} \n LOGS: {l1} \n\n {l2}. " ) for k in [ "train_loss", "train_num_updates", "train_ppl", "train_gnorm", ]: from_scratch, resumed = float(l1[-1][k]), float(l2[-1][k]) # This fails without rounding! assert (from_scratch == resumed ), f"difference at {k} {from_scratch} != {resumed}"