def main():
    model_path = '/home/warvisionary/parlai_transfer/from_pretrained/model'
    print(f"Training model, writing output to {model_path}")
    TrainModel.main(
        # use our task on a pre-trained model
        task='empathetic_dialogues_ru', 
        model='transformer/generator',
        model_file=model_path,
        
        # initialize with a pretrained model
        init_model='zoo:tutorial_transformer_generator/model',
        
        # arguments we get from the pretrained model.
        # Unfortunately, these must be looked up separately for each model.
        n_heads=16, n_layers=8, n_positions=512, text_truncate=512,
        label_truncate=128, ffn_size=2048, embedding_size=512,
        activation='gelu', variant='xlm',
        dict_lower=True, dict_tokenizer='bpe',
        dict_file='zoo:tutorial_transformer_generator/model.dict',
        learn_positional_embeddings=True,
        
        # some training arguments, specific to this fine-tuning
        # use a small learning rate with ADAM optimizer
        lr=1e-5, optimizer='adam',
        warmup_updates=100,
        # early stopping on perplexity
        validation_metric='ppl',
        # train at most 10 minutes, and validate every 0.25 epochs
        max_train_time=600, validation_every_n_epochs=0.25,
        
        # gpu-based params
        batchsize=12, fp16=True, fp16_impl='mem_efficient',
        
        # speeds up validation
        skip_generation=True,
        
        # helps us cram more examples into our gpu at a time
        dynamic_batching='full',
    )
Exemple #2
0
    def test_impatience(self, **kwargs):
        from parlai.scripts.train_model import TrainModel, TrainLoop

        # shallow copy to prevent overwrites
        kwargs = kwargs.copy()
        with testing_utils.tempdir() as tmpdir:
            kwargs['model'] = 'fake_report'
            kwargs['task'] = 'integration_tests'
            kwargs['validation_metric'] = 'loss'
            kwargs['model_file'] = os.path.join(tmpdir, 'model')
            kwargs[
                'dict_file'] = 'zoo:unittest/transformer_generator2/model.dict'
            kwargs['log_every_n_steps'] = 1
            kwargs['validation_every_n_steps'] = 10
            kwargs['max_train_steps'] = 100
            kwargs['save_after_valid'] = True
            opt = TrainModel.setup_args().parse_kwargs(**kwargs)

            logs_first = []
            main_loop = TrainLoop(opt)

            for i, train_step_log in enumerate(main_loop.train_steps()):
                if i % 10 == 1:
                    # simulate preemption
                    # load from preempted and check variables are the same
                    preempt_loop = TrainLoop(opt)
                    # assert main_loop.impatience == preempt_loop.impatience
                    # assert main_loop.last_valid_epoch == preempt_loop.last_valid_epoch
                    # assert main_loop.best_valid == preempt_loop.best_valid
                    print(i, preempt_loop.impatience, preempt_loop.best_valid)
                    if i == 1:
                        assert preempt_loop.impatience == 0
                        assert preempt_loop.best_valid is None
                    elif i == 11:
                        assert preempt_loop.impatience == 0
                        assert preempt_loop.best_valid == 3
                    elif i == 21:
                        assert preempt_loop.impatience == 1
                        assert preempt_loop.best_valid == 3
                    elif i == 31:
                        assert preempt_loop.impatience == 0
                        assert preempt_loop.best_valid == 2
                    else:
                        assert preempt_loop.impatience == (i - 31) // 10
                        assert preempt_loop.best_valid == 2
    def _test_scheduler(self, **kwargs):
        from parlai.scripts.train_model import TrainModel, TrainLoop

        # shallow copy to prevent overwrites
        kwargs = kwargs.copy()
        with testing_utils.tempdir() as tmpdir:
            kwargs['model'] = 'test_agents/unigram'
            kwargs['task'] = 'integration_tests'
            kwargs['skip_generation'] = True
            kwargs['validation_metric'] = 'loss'
            kwargs['model_file'] = os.path.join(tmpdir, 'model')
            kwargs[
                'dict_file'] = 'zoo:unittest/transformer_generator2/model.dict'
            kwargs['log_every_n_steps'] = 1
            kwargs['validation_every_n_steps'] = 10
            kwargs['max_train_steps'] = 100
            kwargs['save_after_valid'] = True
            kwargs['learningrate'] = 1
            opt = TrainModel.setup_args().parse_kwargs(**kwargs)

            logs_first = []
            for i, train_step_log in enumerate(
                    TrainLoop(opt).train_steps(), 1):
                logs_first.append(train_step_log)
                if i >= self.PREEMPT - 2:
                    # simulate preemption
                    break

            # resume training
            logs_second = []
            for train_step_log in TrainLoop(opt).train_steps():
                logs_second.append(train_step_log)

            # check correctness
            assert (logs_first[20]['total_train_updates'] == logs_second[0]
                    ['total_train_updates'])
            assert logs_first[20]['lr'] == logs_second[0]['lr']

            if 'warump_updates' in kwargs:
                full_logs = logs_first[:20] + logs_second
                assert full_logs[kwargs['warmup_updates']]['lr'] == 1.0

            return logs_first, logs_second
#!/usr/bin/env python3

# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""
Train a model using parlai's standard training loop.

For documentation, see parlai.scripts.train_model.
"""

from parlai.scripts.train_model import TrainModel

if __name__ == '__main__':
    TrainModel.main()
Exemple #5
0
                if len(splits) == 3:
                    yield (splits[0].replace('text:', ''),
                           splits[1].replace('labels:', '')), new_episode
                    new_episode = True
                else:
                    yield (splits[0].replace('text:', ''),
                           splits[1].replace('labels:', '')), new_episode
                    new_episode = False


DisplayModel.main(task='poly_teacher', model='poly')
TrainModel.main(
    model='seq2seq',
    model_file='poly-encoder/model',
    dict_file='zoo:dodecadialogue/empathetic_dialogues_ft/model.dicts',
    task='poly_teacher',
    batchsize=3,
    validation_every_n_secs=10,
    max_train_time=60,
)

DisplayModel.main(
    task='poly_teacher',
    model_file='poly-encoder/model',
    num_examples=6,
)
for source, dirs, files in walks:
    print('Directory: ' + source)
    for filename in files:
        # construct the full local path
        local_file = os.path.join(source, filename)
TrainModel.main(
    # similar to before
    task='bankQA',
    model='projects.wizard_of_wikipedia.generator.agents:EndToEndAgent',
    model_file='/from_pretrained_wiki/model',
    # initialize with a pretrained model
    init_model='zoo:wizard_of_wikipedia/end2end_generator/model',

    # arguments we get from the pretrained model.
    # Unfortunately, these must be looked up separately for each model.
    # eps
    dict_file='zoo:wizard_of_wikipedia/end2end_generator/model.dict',
    num_epochs=2,
    dict_lower=True,
    dict_tokenizer='bpe',
    n_layers=5,
    n_heads=2,
    dropout=0.20,
    ffn_size=512,
    embedding_size=256,
    log_every_n_secs=10,
    validation_patience=12,
    validation_metric='ppl',
    validation_metric_mode='min',
    validation_every_n_epochs=0.5,
    n_positions=128,
    truncate=128,
    max_knowledge=32,
    knowledge_alpha=0.95,
    knowledge_truncate=32,
    learningrate=5e-4,
    warmup_updates=5000,
    clip=0.1,
    lr_scheduler='invsqrt',
    embedding_type='fasttext',
    beam_size=1,
    skip_generation=False,
    batchsize=64,
    # fp16=True
)
Exemple #7
0
TrainModel.main(
    model_file='model/kvmemnn/self_persona_short',
    task='personachat:self',
    
    # model_file='model/kvmemnn/self_persona_rv',
    # task='personachat:self_revised',

    # model_file='model/kvmemnn/no_persona',
    # task='personachat:none',
    
    # model_file='model/kvmemnn/no_persona_rv',
    # task='personachat:none_revised', 

    # model_file='model/kvmemnn/other_persona',
    # task='personachat:other',
    
    # model_file='model/kvmemnn/other_persona_rv',
    # task='personachat:other_revised',

    # model_file='model/kvmemnn/both_persona',
    # task='personachat:both',

    # model_file='model/kvmemnn/both_persona_rv',
    # task='personachat:both_revised',

    model='projects.personachat.kvmemnn.kvmemnn:KvmemnnAgent',
    log_every_n_secs=60,
    validation_max_exs=10000,
    validation_every_n_secs=120,
    hops=1,
    lins=0,
    validation_patience=-1,
    validation_metric='hits@1',
    max_train_time=3600,
    share_embeddings=True,
    batchsize=1,
    learningrate=0.1,
    embeddingsize=500,
    margin=0.1,
    tfidf=False,
    numthreads=10,

    #### FROM : https://github.com/facebookresearch/ParlAI/pull/662#issuecomment-475629794

    # model_file='model/seq2seq/self_persona_2',
    # task='personachat:self',

    # model='seq2seq',

    # log_every_n_secs=300,
    # # validation_every_n_secs=1800,
    # # validation_patience=-1,
    # # validation_metric='ppl',
    # max_train_time=11 * 3600,
    # batchsize=128,
    # learningrate=0.001,
    # embeddingsize=300,
    # numlayers=2,
    # hiddensize=512,
    # dropout=0.5,
    # optimizer='adam',
    # lookuptable='enc_dec',
    # # margin=0.1,
    # # tfidf=False,
    # #numthreads=8,

    ## GPU : https://dailylime.kr/2020/06/wsl2%EC%97%90%EC%84%9C-ubuntu%EC%99%80-cuda-%EC%82%AC%EC%9A%A9%ED%95%98%EA%B8%B0/
    # fp16=True, fp16_impl='mem_efficient',
)