예제 #1
0
if options.decay:
    opt = AdamAnneal(lr=options.lr, lr_min=options.lr_min, gamma=options.gamma)
else:
    opt = adam(lr=options.lr)

if options.log_file == '':
    log_file = None
else:
    log_file = options.log_file

if options.save == '':
    save_path = None
else:
    save_path = options.save

model = NCELangModel(vocab_size=nb_vocab,
                     nb_negative=options.negative,
                     embed_dims=options.embed_size,
                     context_dims=options.context_size,
                     negprob_table=unigram_table,
                     optimizer=opt)
model.compile()
model.train(data_file=DATA_PATH,
            save_path=save_path,
            batch_size=BATCH_SIZE,
            train_nb_words=nb_run_words,
            val_nb_words=nb_evaluate,
            train_val_nb=nb_run_val,
            validation_interval=options.interval,
            log_file=log_file)
예제 #2
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__author__ = 'Yunchuan Chen'

from utils import get_unigram_probtable
from models import NCELangModel
from keras.optimizers import adam

NB_RUN_WORDS = 100000000
NB_VOCAB = 10000
NB_RUN_VAL = 100000
NB_EVALUATE = 5000000
SAVE_PATH = '../data/models/lang/nce0-neg50-e128-c128-lr0.01.pkl'

DATA_PATH = '../data/corpus/wiki-sg-norm-lc-drop-bin.bz2'
BATCH_SIZE = 256
VAL_INTER = 1200

unigram_table = get_unigram_probtable(nb_words=NB_VOCAB)

opt = adam(lr=0.01)
model = NCELangModel(vocab_size=NB_VOCAB, nb_negative=50, embed_dims=128, context_dims=128,
                     negprob_table=unigram_table, optimizer=opt)
model.compile()
model.train(data_file=DATA_PATH,
            save_path=SAVE_PATH,
            batch_size=BATCH_SIZE, train_nb_words=NB_RUN_WORDS,
            val_nb_words=NB_EVALUATE, train_val_nb=NB_RUN_VAL, validation_interval=VAL_INTER)
예제 #3
0
파일: run_nce0.py 프로젝트: chenych11/lm
nb_evaluate = options.nb_evaluation

# unigram_table = get_unigram_probtable(nb_words=nb_vocab)
unigram_table = get_unigram_probtable(nb_words=nb_vocab,
                                      save_path='../data/wiki-unigram-prob-size%d.pkl' %
                                                nb_vocab)

if options.decay:
    opt = AdamAnneal(lr=options.lr, lr_min=options.lr_min, gamma=options.gamma)
else:
    opt = adam(lr=options.lr)

if options.log_file == '':
    log_file = None
else:
    log_file = options.log_file

if options.save == '':
    save_path = None
else:
    save_path = options.save

model = NCELangModel(vocab_size=nb_vocab, nb_negative=options.negative, 
                     embed_dims=options.embed_size, context_dims=options.context_size,
                     negprob_table=unigram_table, optimizer=opt)
model.compile()
model.train(data_file=DATA_PATH,
            save_path=save_path,
            batch_size=BATCH_SIZE, train_nb_words=nb_run_words,
            val_nb_words=nb_evaluate, train_val_nb=nb_run_val,
            validation_interval=options.interval, log_file=log_file)