def main(): model_path = '/home/warvisionary/parlai_transfer/from_pretrained/model' print(f"Training model, writing output to {model_path}") TrainModel.main( # use our task on a pre-trained model task='empathetic_dialogues_ru', model='transformer/generator', model_file=model_path, # initialize with a pretrained model init_model='zoo:tutorial_transformer_generator/model', # arguments we get from the pretrained model. # Unfortunately, these must be looked up separately for each model. n_heads=16, n_layers=8, n_positions=512, text_truncate=512, label_truncate=128, ffn_size=2048, embedding_size=512, activation='gelu', variant='xlm', dict_lower=True, dict_tokenizer='bpe', dict_file='zoo:tutorial_transformer_generator/model.dict', learn_positional_embeddings=True, # some training arguments, specific to this fine-tuning # use a small learning rate with ADAM optimizer lr=1e-5, optimizer='adam', warmup_updates=100, # early stopping on perplexity validation_metric='ppl', # train at most 10 minutes, and validate every 0.25 epochs max_train_time=600, validation_every_n_epochs=0.25, # gpu-based params batchsize=12, fp16=True, fp16_impl='mem_efficient', # speeds up validation skip_generation=True, # helps us cram more examples into our gpu at a time dynamic_batching='full', )
def test_impatience(self, **kwargs): from parlai.scripts.train_model import TrainModel, TrainLoop # shallow copy to prevent overwrites kwargs = kwargs.copy() with testing_utils.tempdir() as tmpdir: kwargs['model'] = 'fake_report' kwargs['task'] = 'integration_tests' kwargs['validation_metric'] = 'loss' kwargs['model_file'] = os.path.join(tmpdir, 'model') kwargs[ 'dict_file'] = 'zoo:unittest/transformer_generator2/model.dict' kwargs['log_every_n_steps'] = 1 kwargs['validation_every_n_steps'] = 10 kwargs['max_train_steps'] = 100 kwargs['save_after_valid'] = True opt = TrainModel.setup_args().parse_kwargs(**kwargs) logs_first = [] main_loop = TrainLoop(opt) for i, train_step_log in enumerate(main_loop.train_steps()): if i % 10 == 1: # simulate preemption # load from preempted and check variables are the same preempt_loop = TrainLoop(opt) # assert main_loop.impatience == preempt_loop.impatience # assert main_loop.last_valid_epoch == preempt_loop.last_valid_epoch # assert main_loop.best_valid == preempt_loop.best_valid print(i, preempt_loop.impatience, preempt_loop.best_valid) if i == 1: assert preempt_loop.impatience == 0 assert preempt_loop.best_valid is None elif i == 11: assert preempt_loop.impatience == 0 assert preempt_loop.best_valid == 3 elif i == 21: assert preempt_loop.impatience == 1 assert preempt_loop.best_valid == 3 elif i == 31: assert preempt_loop.impatience == 0 assert preempt_loop.best_valid == 2 else: assert preempt_loop.impatience == (i - 31) // 10 assert preempt_loop.best_valid == 2
def _test_scheduler(self, **kwargs): from parlai.scripts.train_model import TrainModel, TrainLoop # shallow copy to prevent overwrites kwargs = kwargs.copy() with testing_utils.tempdir() as tmpdir: kwargs['model'] = 'test_agents/unigram' kwargs['task'] = 'integration_tests' kwargs['skip_generation'] = True kwargs['validation_metric'] = 'loss' kwargs['model_file'] = os.path.join(tmpdir, 'model') kwargs[ 'dict_file'] = 'zoo:unittest/transformer_generator2/model.dict' kwargs['log_every_n_steps'] = 1 kwargs['validation_every_n_steps'] = 10 kwargs['max_train_steps'] = 100 kwargs['save_after_valid'] = True kwargs['learningrate'] = 1 opt = TrainModel.setup_args().parse_kwargs(**kwargs) logs_first = [] for i, train_step_log in enumerate( TrainLoop(opt).train_steps(), 1): logs_first.append(train_step_log) if i >= self.PREEMPT - 2: # simulate preemption break # resume training logs_second = [] for train_step_log in TrainLoop(opt).train_steps(): logs_second.append(train_step_log) # check correctness assert (logs_first[20]['total_train_updates'] == logs_second[0] ['total_train_updates']) assert logs_first[20]['lr'] == logs_second[0]['lr'] if 'warump_updates' in kwargs: full_logs = logs_first[:20] + logs_second assert full_logs[kwargs['warmup_updates']]['lr'] == 1.0 return logs_first, logs_second
#!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. """ Train a model using parlai's standard training loop. For documentation, see parlai.scripts.train_model. """ from parlai.scripts.train_model import TrainModel if __name__ == '__main__': TrainModel.main()
if len(splits) == 3: yield (splits[0].replace('text:', ''), splits[1].replace('labels:', '')), new_episode new_episode = True else: yield (splits[0].replace('text:', ''), splits[1].replace('labels:', '')), new_episode new_episode = False DisplayModel.main(task='poly_teacher', model='poly') TrainModel.main( model='seq2seq', model_file='poly-encoder/model', dict_file='zoo:dodecadialogue/empathetic_dialogues_ft/model.dicts', task='poly_teacher', batchsize=3, validation_every_n_secs=10, max_train_time=60, ) DisplayModel.main( task='poly_teacher', model_file='poly-encoder/model', num_examples=6, ) for source, dirs, files in walks: print('Directory: ' + source) for filename in files: # construct the full local path local_file = os.path.join(source, filename)
TrainModel.main( # similar to before task='bankQA', model='projects.wizard_of_wikipedia.generator.agents:EndToEndAgent', model_file='/from_pretrained_wiki/model', # initialize with a pretrained model init_model='zoo:wizard_of_wikipedia/end2end_generator/model', # arguments we get from the pretrained model. # Unfortunately, these must be looked up separately for each model. # eps dict_file='zoo:wizard_of_wikipedia/end2end_generator/model.dict', num_epochs=2, dict_lower=True, dict_tokenizer='bpe', n_layers=5, n_heads=2, dropout=0.20, ffn_size=512, embedding_size=256, log_every_n_secs=10, validation_patience=12, validation_metric='ppl', validation_metric_mode='min', validation_every_n_epochs=0.5, n_positions=128, truncate=128, max_knowledge=32, knowledge_alpha=0.95, knowledge_truncate=32, learningrate=5e-4, warmup_updates=5000, clip=0.1, lr_scheduler='invsqrt', embedding_type='fasttext', beam_size=1, skip_generation=False, batchsize=64, # fp16=True )
TrainModel.main( model_file='model/kvmemnn/self_persona_short', task='personachat:self', # model_file='model/kvmemnn/self_persona_rv', # task='personachat:self_revised', # model_file='model/kvmemnn/no_persona', # task='personachat:none', # model_file='model/kvmemnn/no_persona_rv', # task='personachat:none_revised', # model_file='model/kvmemnn/other_persona', # task='personachat:other', # model_file='model/kvmemnn/other_persona_rv', # task='personachat:other_revised', # model_file='model/kvmemnn/both_persona', # task='personachat:both', # model_file='model/kvmemnn/both_persona_rv', # task='personachat:both_revised', model='projects.personachat.kvmemnn.kvmemnn:KvmemnnAgent', log_every_n_secs=60, validation_max_exs=10000, validation_every_n_secs=120, hops=1, lins=0, validation_patience=-1, validation_metric='hits@1', max_train_time=3600, share_embeddings=True, batchsize=1, learningrate=0.1, embeddingsize=500, margin=0.1, tfidf=False, numthreads=10, #### FROM : https://github.com/facebookresearch/ParlAI/pull/662#issuecomment-475629794 # model_file='model/seq2seq/self_persona_2', # task='personachat:self', # model='seq2seq', # log_every_n_secs=300, # # validation_every_n_secs=1800, # # validation_patience=-1, # # validation_metric='ppl', # max_train_time=11 * 3600, # batchsize=128, # learningrate=0.001, # embeddingsize=300, # numlayers=2, # hiddensize=512, # dropout=0.5, # optimizer='adam', # lookuptable='enc_dec', # # margin=0.1, # # tfidf=False, # #numthreads=8, ## GPU : https://dailylime.kr/2020/06/wsl2%EC%97%90%EC%84%9C-ubuntu%EC%99%80-cuda-%EC%82%AC%EC%9A%A9%ED%95%98%EA%B8%B0/ # fp16=True, fp16_impl='mem_efficient', )