if options.decay: opt = AdamAnneal(lr=options.lr, lr_min=options.lr_min, gamma=options.gamma) else: opt = adam(lr=options.lr) if options.log_file == '': log_file = None else: log_file = options.log_file if options.save == '': save_path = None else: save_path = options.save model = NCELangModel(vocab_size=nb_vocab, nb_negative=options.negative, embed_dims=options.embed_size, context_dims=options.context_size, negprob_table=unigram_table, optimizer=opt) model.compile() model.train(data_file=DATA_PATH, save_path=save_path, batch_size=BATCH_SIZE, train_nb_words=nb_run_words, val_nb_words=nb_evaluate, train_val_nb=nb_run_val, validation_interval=options.interval, log_file=log_file)
#!/usr/bin/env python # -*- coding: utf-8 -*- __author__ = 'Yunchuan Chen' from utils import get_unigram_probtable from models import NCELangModel from keras.optimizers import adam NB_RUN_WORDS = 100000000 NB_VOCAB = 10000 NB_RUN_VAL = 100000 NB_EVALUATE = 5000000 SAVE_PATH = '../data/models/lang/nce0-neg50-e128-c128-lr0.01.pkl' DATA_PATH = '../data/corpus/wiki-sg-norm-lc-drop-bin.bz2' BATCH_SIZE = 256 VAL_INTER = 1200 unigram_table = get_unigram_probtable(nb_words=NB_VOCAB) opt = adam(lr=0.01) model = NCELangModel(vocab_size=NB_VOCAB, nb_negative=50, embed_dims=128, context_dims=128, negprob_table=unigram_table, optimizer=opt) model.compile() model.train(data_file=DATA_PATH, save_path=SAVE_PATH, batch_size=BATCH_SIZE, train_nb_words=NB_RUN_WORDS, val_nb_words=NB_EVALUATE, train_val_nb=NB_RUN_VAL, validation_interval=VAL_INTER)
nb_evaluate = options.nb_evaluation # unigram_table = get_unigram_probtable(nb_words=nb_vocab) unigram_table = get_unigram_probtable(nb_words=nb_vocab, save_path='../data/wiki-unigram-prob-size%d.pkl' % nb_vocab) if options.decay: opt = AdamAnneal(lr=options.lr, lr_min=options.lr_min, gamma=options.gamma) else: opt = adam(lr=options.lr) if options.log_file == '': log_file = None else: log_file = options.log_file if options.save == '': save_path = None else: save_path = options.save model = NCELangModel(vocab_size=nb_vocab, nb_negative=options.negative, embed_dims=options.embed_size, context_dims=options.context_size, negprob_table=unigram_table, optimizer=opt) model.compile() model.train(data_file=DATA_PATH, save_path=save_path, batch_size=BATCH_SIZE, train_nb_words=nb_run_words, val_nb_words=nb_evaluate, train_val_nb=nb_run_val, validation_interval=options.interval, log_file=log_file)