コード例 #1
0
def test_train_resume_language_model_training(resources_path,
                                              results_base_path,
                                              tasks_base_path):
    dictionary = Dictionary.load(u'chars')
    language_model = LanguageModel(dictionary,
                                   is_forward_lm=True,
                                   hidden_size=128,
                                   nlayers=1)
    corpus = TextCorpus((resources_path / u'corpora/lorem_ipsum'),
                        dictionary,
                        language_model.is_forward_lm,
                        character_level=True)
    trainer = LanguageModelTrainer(language_model, corpus, test_mode=True)
    trainer.train(results_base_path,
                  sequence_length=10,
                  mini_batch_size=10,
                  max_epochs=2,
                  checkpoint=True)
    trainer = LanguageModelTrainer.load_from_checkpoint(
        (results_base_path / u'checkpoint.pt'), corpus)
    trainer.train(results_base_path,
                  sequence_length=10,
                  mini_batch_size=10,
                  max_epochs=2)
    shutil.rmtree(results_base_path)
コード例 #2
0
def train_elmo(args):

    if args.finetune and args.checkpoint_path == '':
        print("finetune")
        from flair.embeddings import FlairEmbeddings
        language_model = FlairEmbeddings('he-forward').lm
        corpus: TextCorpus = TextCorpus(args.corpus_path,
                                        language_model.dictionary,
                                        language_model.is_forward_lm,
                                        character_level=True)
        trainer = LanguageModelTrainer(language_model, corpus)

    elif args.checkpoint_path == '' and not args.finetune:

        # Training from scrach
        print('Training from scarch')

        #Downloading data
        if not os.path.exists(args.corpus_path):
            print('Corpus _path', args.corpus_path)
            download_corpus(args)

        language_model, corpus = create_corpus(args)
        trainer = LanguageModelTrainer(language_model, corpus)

    else:
        print("Training from checpoint")

        from pathlib import Path
        checkpoint = Path(args.checkpoint_path)
        if args.finetune:
            load_dict_from_lm = True
        else:
            load_dict_from_lm = False

        trainer = LanguageModelTrainer.load_from_checkpoint(
            checkpoint,
            create_corpus(args, load_dict_from_lm, return_back='corpus'))

    trainer.train(args.save_model,
                  sequence_length=args.seq_length,
                  mini_batch_size=args.mini_batch,
                  max_epochs=args.epochs,
                  checkpoint=args.checkpoint)
コード例 #3
0
def test_train_resume_language_model_training(resources_path,
                                              results_base_path,
                                              tasks_base_path):
    # get default dictionary
    dictionary: Dictionary = Dictionary.load("chars")

    # init forward LM with 128 hidden states and 1 layer
    language_model: LanguageModel = LanguageModel(dictionary,
                                                  is_forward_lm=True,
                                                  hidden_size=128,
                                                  nlayers=1)

    # get the example corpus and process at character level in forward direction
    corpus: TextCorpus = TextCorpus(
        resources_path / "corpora/lorem_ipsum",
        dictionary,
        language_model.is_forward_lm,
        character_level=True,
    )

    # train the language model
    trainer: LanguageModelTrainer = LanguageModelTrainer(language_model,
                                                         corpus,
                                                         test_mode=True)
    trainer.train(
        results_base_path,
        sequence_length=10,
        mini_batch_size=10,
        max_epochs=2,
        checkpoint=True,
    )
    del trainer, language_model

    trainer = LanguageModelTrainer.load_from_checkpoint(
        results_base_path / "checkpoint.pt", corpus)
    trainer.train(results_base_path,
                  sequence_length=10,
                  mini_batch_size=10,
                  max_epochs=2)

    # clean up results directory
    shutil.rmtree(results_base_path)
    del trainer
コード例 #4
0
    corpus = TextCorpus('/root/.fastai/data/idwiki/',
                        dictionary,
                        is_forward_lm,
                        character_level=True)
    logger.info('serializing corpus')
    joblib.dump(corpus, '../flair_models/backwards/corpus.flair')
    logger.info('saving the corpus to ../flair_models')

logger.info('loading corpus done, now creating language model')
# instantiate your language model, set hidden size and number of layers
language_model = LanguageModel(dictionary,
                               is_forward_lm,
                               hidden_size=2048,
                               nlayers=1)

if Path(MODEL_PATHLIB / 'checkpoint.pt').is_file():
    logger.info('checkpoint detected, resuming training')
    trainer = LanguageModelTrainer.load_from_checkpoint(
        MODEL_PATHLIB / 'checkpoint.pt', corpus)
else:
    # train your language model
    trainer = LanguageModelTrainer(language_model, corpus)

logger.info('we have lift off, good luck ground control')
trainer.train(MODEL_PATH,
              learning_rate=0.1,
              sequence_length=250,
              mini_batch_size=650,
              max_epochs=100,
              checkpoint=True)
コード例 #5
0
    # TODO: add possibility for other dictionary!
    # (https://github.com/zalandoresearch/flair/issues/179#issuecomment-433942853)
    print("loading Dictionary")
    dictionary = Dictionary.load('chars')
    # instantiate corpus
    log.info("Making corpus from folder: {}".format(args.corpus_path))
    corpus = TextCorpus(args.corpus_path,
                        dictionary,
                        options['is_forward_lm'],
                        **options['corpus'])

    # TRAINING
    if args.continue_training:
        # load checkpoint
        cp_path = args.train_path + '/checkpoint.pt'
        log.info("Continue training from {}".format(cp_path))
        # load LM-Trainer
        trainer = LanguageModelTrainer.load_from_checkpoint(cp_path, corpus)
    else:
        # instantiate language model
        log.info("Creating language model")
        language_model = LanguageModel(dictionary,
                                       options['is_forward_lm'],
                                       **options['language_model'])
        # instantiate LM Trainer
        trainer = LanguageModelTrainer(language_model, corpus)

    log.info("Starting training. See {}".format(args.train_path))
    trainer.log_interval = 500
    trainer.train(args.train_path, **options['training'])