def _mock_train(**args): outdir = tempfile.mkdtemp() parser = setup_args() parser.set_defaults( model_file=os.path.join(outdir, "model"), **args, ) stdout = io.StringIO() with contextlib.redirect_stdout(stdout): tl = TrainLoop(parser.parse_args(print_args=False)) valid, test = tl.train() shutil.rmtree(outdir) return stdout.getvalue(), valid, test
def test_pyt_preprocess_train(self): """ Test that the preprocess functionality works with the PytorchDataTeacher with a sample TorchAgent (here, the Seq2seq model). This tests whether an agent can train to completion with these preprocessed examples """ # Second, check that the model will train print('Testing test_pyt_preprocess training') f = io.StringIO() with redirect_stdout(f): parser = train_setup_args() defaults = parser_defaults.copy() set_model_file(defaults) defaults['datatype'] = 'train' defaults['pytorch_preprocess'] = True parser.set_defaults(**defaults) TrainLoop(parser.parse_args()).train() str_output = f.getvalue() self.assertTrue( solved_task(str_output), 'Teacher could not teach seq2seq with preprocessed obs, ' 'output: {}'.format(str_output)) print('\n------Passed `test_pyt_preprocess_train`------\n')
def test_pyt_train(self): """ Integration test: ensure that pytorch data teacher can successfully teach Seq2Seq model to fully solve the babi:task10k:1 task. The Seq2Seq model can solve the babi:task10k:1 task with the normal ParlAI setup, and thus should be able to with a PytorchDataTeacher This tests the following setups: 1. -dt train 2. -dt train:stream 3. -dt train:stream:ordered """ dts = ['train', 'train:stream', 'train:stream:ordered'] for dt in dts: print('Testing test_pyt_train with dt: {}'.format(dt)) f = io.StringIO() with redirect_stdout(f): parser = train_setup_args() defaults = parser_defaults.copy() set_model_file(defaults) defaults['datatype'] = dt defaults['shuffle'] = True # for train:stream parser.set_defaults(**defaults) TrainLoop(parser.parse_args()).train() str_output = f.getvalue() self.assertTrue( solved_task(str_output), 'Teacher could not teach seq2seq with args: ' '{}; here is str_output: {}'.format(defaults, str_output)) print('\n------Passed `test_pyt_train`------\n')
def test_impatience(self, **kwargs): from parlai.scripts.train_model import TrainModel, TrainLoop # shallow copy to prevent overwrites kwargs = kwargs.copy() with testing_utils.tempdir() as tmpdir: kwargs['model'] = 'fake_report' kwargs['task'] = 'integration_tests' kwargs['validation_metric'] = 'loss' kwargs['model_file'] = os.path.join(tmpdir, 'model') kwargs[ 'dict_file'] = 'zoo:unittest/transformer_generator2/model.dict' kwargs['log_every_n_steps'] = 1 kwargs['validation_every_n_steps'] = 10 kwargs['max_train_steps'] = 100 kwargs['save_after_valid'] = True opt = TrainModel.setup_args().parse_kwargs(**kwargs) logs_first = [] main_loop = TrainLoop(opt) for i, train_step_log in enumerate(main_loop.train_steps()): if i % 10 == 1: # simulate preemption # load from preempted and check variables are the same preempt_loop = TrainLoop(opt) # assert main_loop.impatience == preempt_loop.impatience # assert main_loop.last_valid_epoch == preempt_loop.last_valid_epoch # assert main_loop.best_valid == preempt_loop.best_valid print(i, preempt_loop.impatience, preempt_loop.best_valid) if i == 1: assert preempt_loop.impatience == 0 assert preempt_loop.best_valid is None elif i == 11: assert preempt_loop.impatience == 0 assert preempt_loop.best_valid == 3 elif i == 21: assert preempt_loop.impatience == 1 assert preempt_loop.best_valid == 3 elif i == 31: assert preempt_loop.impatience == 0 assert preempt_loop.best_valid == 2 else: assert preempt_loop.impatience == (i - 31) // 10 assert preempt_loop.best_valid == 2
def _mock_train(outdir=None, keepoutdir=False, override=None, **args): if not outdir: outdir = tempfile.mkdtemp() parser = setup_args() parser.set_defaults( model_file=os.path.join(outdir, "model"), **args, ) stdout = io.StringIO() with contextlib.redirect_stdout(stdout): opt = parser.parse_args(print_args=False) if override: opt['override'] = override tl = TrainLoop(opt) valid, test = tl.train() if not keepoutdir: shutil.rmtree(outdir) return stdout.getvalue(), valid, test
def test_hogwild_train(self): """Test the trainer eval with numthreads > 1 and batchsize in [1,2,3].""" parser = setup_args() NUM_EXS = 500 parser.set_defaults( task='tasks.repeat:RepeatTeacher:{}'.format(1), evaltask='tasks.repeat:RepeatTeacher:{}'.format(NUM_EXS), model='repeat_label', num_examples=-1, display_examples=False, num_epochs=10, ) old_out = sys.stdout output = display_output() try: sys.stdout = output for nt in [2, 5, 10]: parser.set_defaults(numthreads=nt) for bs in [1, 2, 3]: parser.set_defaults(batchsize=bs) parser.set_defaults(batch_sort=(bs % 2 == 0)) tl = TrainLoop(parser) report_valid, report_test = tl.train() # test final valid and test evals self.assertEqual(report_valid['exs'], NUM_EXS) self.assertEqual(report_test['exs'], NUM_EXS) report_full, _world = run_eval(tl.agent, tl.opt, 'valid', max_exs=-1, valid_world=tl.valid_world) self.assertEqual(report_full['exs'], NUM_EXS) report_part, _world = run_eval(tl.agent, tl.opt, 'valid', max_exs=NUM_EXS / 5, valid_world=tl.valid_world) self.assertTrue(report_part['exs'] < NUM_EXS) finally: # restore sys.stdout sys.stdout = old_out
def _test_scheduler(self, **kwargs): from parlai.scripts.train_model import TrainModel, TrainLoop # shallow copy to prevent overwrites kwargs = kwargs.copy() with testing_utils.tempdir() as tmpdir: kwargs['model'] = 'test_agents/unigram' kwargs['task'] = 'integration_tests' kwargs['skip_generation'] = True kwargs['validation_metric'] = 'loss' kwargs['model_file'] = os.path.join(tmpdir, 'model') kwargs[ 'dict_file'] = 'zoo:unittest/transformer_generator2/model.dict' kwargs['log_every_n_steps'] = 1 kwargs['validation_every_n_steps'] = 10 kwargs['max_train_steps'] = 100 kwargs['save_after_valid'] = True kwargs['learningrate'] = 1 opt = TrainModel.setup_args().parse_kwargs(**kwargs) logs_first = [] for i, train_step_log in enumerate( TrainLoop(opt).train_steps(), 1): logs_first.append(train_step_log) if i >= self.PREEMPT - 2: # simulate preemption break # resume training logs_second = [] for train_step_log in TrainLoop(opt).train_steps(): logs_second.append(train_step_log) # check correctness assert (logs_first[20]['total_train_updates'] == logs_second[0] ['total_train_updates']) assert logs_first[20]['lr'] == logs_second[0]['lr'] if 'warump_updates' in kwargs: full_logs = logs_first[:20] + logs_second assert full_logs[kwargs['warmup_updates']]['lr'] == 1.0 return logs_first, logs_second
def profile(opt): if opt['torch'] or opt['torch_cuda']: with torch.autograd.profiler.profile( use_cuda=opt['torch_cuda']) as prof: TrainLoop(opt).train() key = 'cpu_time_total' if opt['torch'] else 'cuda_time_total' print(prof.key_averages().table(sort_by=key, row_limit=25)) return prof else: pr = cProfile.Profile() pr.enable() TrainLoop(opt).train() pr.disable() s = io.StringIO() sortby = 'cumulative' ps = pstats.Stats(pr, stream=s).sort_stats(sortby) ps.print_stats() print(s.getvalue())
def profile(opt): if isinstance(opt, ParlaiParser): logging.error('profile should be passed opt not Parser') opt = opt.parse_args() if opt['torch'] or opt['torch_cuda']: with torch.autograd.profiler.profile(use_cuda=opt['torch_cuda']) as prof: TrainLoop(opt).train() print(prof.total_average()) sort_cpu = sorted(prof.key_averages(), key=lambda k: k.cpu_time) sort_cuda = sorted(prof.key_averages(), key=lambda k: k.cuda_time) def cpu(): for e in sort_cpu: print(e) def cuda(): for e in sort_cuda: print(e) cpu() if opt['debug']: print( '`cpu()` prints out cpu-sorted list, ' '`cuda()` prints cuda-sorted list' ) pdb.set_trace() else: pr = cProfile.Profile() pr.enable() TrainLoop(opt).train() pr.disable() s = io.StringIO() sortby = 'cumulative' ps = pstats.Stats(pr, stream=s).sort_stats(sortby) ps.print_stats() print(s.getvalue()) if opt['debug']: pdb.set_trace()
def test_output(self): f = io.StringIO() with redirect_stdout(f): try: import torch except ImportError: print('Cannot import torch, skipping test_train_model') return parser = setup_args() parser.set_defaults( model='mlb_vqa', task='pytorch_teacher', pytorch_buildteacher='vqa_v1', dataset='parlai.tasks.vqa_v1.agents', image_mode='resnet152_spatial', image_size=448, image_cropsize=448, dict_file='/tmp/vqa_v1', batchsize=1, num_epochs=1, no_cuda=True, no_hdf5=True, pytorch_preprocess=False, batch_sort_cache='none', numworkers=1, unittest=True ) TrainLoop(parser).train() str_output = f.getvalue() self.assertTrue(len(str_output) > 0, "Output is empty") self.assertTrue("[ training... ]" in str_output, "Did not reach training step") self.assertTrue("[ running eval: valid ]" in str_output, "Did not reach validation step") self.assertTrue("valid:{'total': 10," in str_output, "Did not complete validation") self.assertTrue("[ running eval: test ]" in str_output, "Did not reach evaluation step") self.assertTrue("test:{'total': 0}" in str_output, "Did not complete evaluation")
def test_pyt_batchsort_train(self): """ Tests the functionality of training with batchsort under the following conditions: 1. -dt train --pytorch_preprocess False 2. -dt train:stream --pytorch_preprocess False 3. -dt train --pytorch_preprocess True --batch_sort_field text_vec """ # Next, check that training works dt_and_preprocess = [('train', False), ('train:stream', False), ('train', True)] for dt, preprocess in dt_and_preprocess: print('Testing test_pyt_batchsort with -dt {} and --preprocess {}'. format(dt, preprocess)) f = io.StringIO() with redirect_stdout(f): parser = train_setup_args() defaults = parser_defaults.copy() set_model_file(defaults) defaults['datatype'] = dt defaults['pytorch_preprocess'] = preprocess defaults['pytorch_teacher_batch_sort'] = True defaults['batchsize'] = 50 if preprocess: defaults['batch_sort_field'] = 'text_vec' parser.set_defaults(**defaults) TrainLoop(parser.parse_args()).train() str_output = f.getvalue() self.assertTrue( solved_task(str_output), 'Teacher could not teach seq2seq with batch sort ' 'and args {} and output {}'.format((dt, preprocess), str_output)) print('\n------Passed `test_pyt_batchsort_train`------\n')
For documentation, see parlai.scripts.train_model. """ from parlai.scripts.train_model import TrainLoop, setup_args if __name__ == '__main__': parser = setup_args() parser.set_defaults( task='redial', model='transformer_rec/generator', model_file='saved/transformer_rec', dict_tokenizer='nltk', dict_lower=True, batchsize=64, truncate=1024, dropout=0.1, relu_dropout=0.1, n_entity=64368, n_relation=214, validation_metric='nll_loss', validation_metric_mode='min', validation_every_n_secs=300, validation_patience=5, tensorboard_log=True, tensorboard_tag= "task,model,batchsize,ffn_size,embedding_size,n_layers,learningrate,model_file", tensorboard_metrics="ppl,nll_loss,token_acc,bleu", ) opt = parser.parse_args() TrainLoop(opt).train()
parser.set_defaults( task='convai2:self', model= 'projects.personachat.persona_seq2seq:PersonachatSeqseqAgentSplit', model_file='/tmp/profilememconvai2', dict_lower=False, dict_include_valid=True, dict_maxexs=-1, datatype='train', batchsize=128, encoder='lstm', learningrate=0.001, numlayers=1, hiddensize=1024, dropout=0.2, attention='general', personachat_attnsentlevel=True, personachat_sharelt=True, personachat_learnreweight=True, personachat_reweight='use', truncate=100, rank_candidates=True, validation_every_n_secs=180, validation_metric='f1', validation_metric_mode='max', validation_patience=10, log_every_n_secs=10, dict_tokenizer='split', ) TrainLoop(parser.parse_args()).train()
# Copyright (c) 2017-present, Facebook, Inc. # All rights reserved. # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. An additional grant # of patent rights can be found in the PATENTS file in the same directory. """Train a model using parlai's standard training loop. After training, computes validation and test error. Run with, e.g.: python examples/train_model.py -m ir_baseline -t dialog_babi:Task:1 -mf /tmp/model ..or.. python examples/train_model.py -m seq2seq -t babi:Task10k:1 -mf '/tmp/model' -bs 32 -lr 0.5 -hs 128 ..or.. python examples/train_model.py -m drqa -t babi:Task10k:1 -mf /tmp/model -bs 10 """ from parlai.scripts.train_model import TrainLoop, setup_args if __name__ == '__main__': parser = setup_args() opt = parser.parse_args() TrainLoop(opt).run_test()
task='twitter', model='seq2seq', model_file='/tmp/twitter_seq2seq_model', dict_file=DICT_FILE_30K, dict_lower=True, datatype='train', batchsize=32, hiddensize=1024, embeddingsize=300, attention='none', numlayers=3, rnn_class='lstm', learningrate=1, dropout=0.1, gradient_clip=0.1, lookuptable='enc_dec', optimizer='sgd', embedding_type='glove', momentum=0.9, bidirectional=False, batch_sort=True, validation_every_n_secs=600, validation_metric='ppl', validation_metric_mode='min', validation_patience=15, log_every_n_secs=1, numsoftmax=1, truncate=150, ) TrainLoop(parser).train()
def test_output(self): class display_output(object): def __init__(self): self.data = [] def write(self, s): self.data.append(s) def flush(self): pass def __str__(self): return "".join(self.data) old_out = sys.stdout output = display_output() try: sys.stdout = output try: import torch # noqa: F401 except ImportError: print('Cannot import torch, skipping test_train_model') return parser = setup_args() parser.set_defaults( model='memnn', task='tasks.repeat:RepeatTeacher:10', dict_file='/tmp/repeat', batchsize=1, numthreads=1, validation_every_n_epochs=10, validation_patience=5, embedding_size=8, no_cuda=True, validation_share_agent=True, num_episodes=10, ) opt = parser.parse_args() TrainLoop(opt).train() finally: # restore sys.stdout sys.stdout = old_out str_output = str(output) self.assertTrue(len(str_output) > 0, "Output is empty") self.assertTrue("[ training... ]" in str_output, "Did not reach training step") self.assertTrue("[ running eval: valid ]" in str_output, "Did not reach validation step") self.assertTrue("valid:{'exs': 10," in str_output, "Did not complete validation") self.assertTrue("[ running eval: test ]" in str_output, "Did not reach evaluation step") self.assertTrue("test:{'exs': 10," in str_output, "Did not complete evaluation") list_output = str_output.split("\n") for line in list_output: if "test:{" in line: score = ast.literal_eval(line.split("test:", 1)[1]) self.assertTrue( score['accuracy'] > 0.5, 'Accuracy not convincing enough, was {}' ''.format(score['accuracy']))