def load_model(embed_map=None): """ Load all model components + apply vocab expansion """ # Load the worddict print 'Loading dictionary...' with open(path_to_dictionary, 'rb') as f: worddict = pkl.load(f) # Create inverted dictionary print 'Creating inverted dictionary...' word_idict = dict() for kk, vv in worddict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' worddict['<eos>'] = 0 worddict['UNK'] = 1 # Load model options print 'Loading model options...' with open('%s.pkl'%path_to_model, 'rb') as f: options = pkl.load(f) # Load parameters print 'Loading model parameters...' params = init_params(options) params = load_params(path_to_model, params) tparams = init_tparams(params) # Extractor functions print 'Compiling encoder...' trng = RandomStreams(1234) trng, x, x_mask, ctx, emb = build_encoder(tparams, options) f_enc = theano.function([x, x_mask], ctx, name='f_enc') f_emb = theano.function([x], emb, name='f_emb') trng, embedding, x_mask, ctxw2v = build_encoder_w2v(tparams, options) f_w2v = theano.function([embedding, x_mask], ctxw2v, name='f_w2v') # Load word2vec, if applicable if embed_map == None: print 'Loading word2vec embeddings...' # embed_map = load_googlenews_vectors(path_to_word2vec) embed_map = vocab.load_dictionary("word_dict") # Lookup table using vocab expansion trick print 'Creating word lookup tables...' # table = lookup_table(options, embed_map, worddict, word_idict, f_emb) table = worddict # Store everything we need in a dictionary print 'Packing up...' model = {} model['options'] = options model['table'] = table model['f_w2v'] = f_w2v return model
def __init__(self, loc="./saved_models/skip-best"): #导入之前训练得到的最好模型 print("Preparing the DataLoader. Loading the word dictionary") self.d = DataLoader( sentences=[''], word_dict=load_dictionary('./data/dummy_corpus.txt.pkl')) self.encoder = None print("Loading encoder from the saved model at {}".format(loc)) model = UniSkip() #载入模型 model.load_state_dict( torch.load(loc, map_location=lambda storage, loc: storage)) self.encoder = model.encoder if USE_CUDA: self.encoder.cuda(CUDA_DEVICE) #gpu
def __init__(self, loc, WORD_DICT): # BEST_MODEL = "../../dir_HugeFiles/prev_model/skip-best-loss10.237" print("Preparing the DataLoader. Loading the word dictionary") # WORD_DICT = '../dir_HugeFiles/instructions/skip_inst/skip_instruction.csv.pkl' self.d = DataLoader(sentences=[''], word_dict=load_dictionary(WORD_DICT)) self.encoder = None print("Loading encoder from the saved model at {}".format(loc)) model = UniSkip() model.load_state_dict( torch.load(loc, map_location=lambda storage, loc: storage)) self.encoder = model.encoder if USE_CUDA: self.encoder.cuda(CUDA_DEVICE) print('using cuda')
def main(data_path, dict_path, save_path, batch_size, reload_, reload_path): os.environ["THEANO_FLAGS"] = "floatX=float32" file_names = get_file_list(data_path, ['txt']) train_sent = load_txt_sent(file_names) if not os.path.exists(dict_path): print "Dictionary not found, recreating" worddict, wordcount = vocab.build_dictionary(train_sent) print "Built. Saving to: {}".format(dict_path) vocab.save_dictionary(worddict, wordcount, dict_path) else: print "Found dictionary at {}... Loading...".format(dict_path) worddict = vocab.load_dictionary(dict_path) print "Beginning Training..." train.trainer(train_sent, batch_size=batch_size, reload_=reload_, dictionary=dict_path, saveto=save_path, reload_path=reload_path, saveFreq=10000)
def trainer( X, dim_word=620, # word vector dimensionality dim=2400, # the number of GRU units encoder='gru', decoder='gru', max_epochs=5, dispFreq=1, decay_c=0., grad_clip=5., n_words=474000, maxlen_w=30, optimizer='adam', batch_size=64, saveto='/data/embeddingModel.npz', dictionary='dictionary.pkl', saveFreq=1000, reload_=False): # Model options model_options = {} model_options['dim_word'] = dim_word model_options['dim'] = dim model_options['encoder'] = encoder model_options['decoder'] = decoder model_options['max_epochs'] = max_epochs model_options['dispFreq'] = dispFreq model_options['decay_c'] = decay_c model_options['grad_clip'] = grad_clip model_options['n_words'] = n_words model_options['maxlen_w'] = maxlen_w model_options['optimizer'] = optimizer model_options['batch_size'] = batch_size model_options['saveto'] = saveto model_options['dictionary'] = dictionary model_options['saveFreq'] = saveFreq model_options['reload_'] = reload_ print(model_options) # reload options if reload_ and os.path.exists(saveto): print('reloading...' + saveto) with open('%s.pkl' % saveto, 'rb') as f: models_options = pkl.load(f) # load dictionary print('Loading dictionary...') worddict = load_dictionary(dictionary) # Inverse dictionary word_idict = dict() for kk, vv in worddict.items(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' print('Building model') params = init_params(model_options) # reload parameters if reload_ and os.path.exists(saveto): params = load_params(saveto, params) tparams = init_tparams(params) trng, x, x_mask, y, y_mask, z, z_mask, \ opt_ret, \ cost = \ build_model(tparams, model_options) inps = [x, x_mask, y, y_mask, z, z_mask] # before any regularizer print('Building f_log_probs...') f_log_probs = theano.function(inps, cost, profile=False) print('Done') # weight decay, if applicable if decay_c > 0.: decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') weight_decay = 0. for kk, vv in tparams.items(): weight_decay += (vv**2).sum() weight_decay *= decay_c cost += weight_decay # after any regularizer print('Building f_cost...') f_cost = theano.function(inps, cost, profile=False) print('Done') print('Done') print('Building f_grad...') grads = tensor.grad(cost, wrt=itemlist(tparams)) f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False) f_weight_norm = theano.function([], [(t**2).sum() for k, t in tparams.items()], profile=False) if grad_clip > 0.: g2 = 0. for g in grads: g2 += (g**2).sum() new_grads = [] for g in grads: new_grads.append( tensor.switch(g2 > (grad_clip**2), g / tensor.sqrt(g2) * grad_clip, g)) grads = new_grads lr = tensor.scalar(name='lr') print('Building optimizers...', end='') # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost) print('Optimization') # Each sentence in the minibatch have same length (for encoder) trainX = homogeneous_data.grouper(X) train_iter = homogeneous_data.HomogeneousData(trainX, batch_size=batch_size, maxlen=maxlen_w) uidx = 0 lrate = 0.01 for eidx in range(max_epochs): n_samples = 0 print('Epoch ', eidx) for x, y, z in train_iter: n_samples += len(x) uidx += 1 x, x_mask, y, y_mask, z, z_mask = homogeneous_data.prepare_data( x, y, z, worddict, maxlen=maxlen_w, n_words=n_words) if x is None: print('Minibatch with zero sample under length ', maxlen_w) uidx -= 1 continue ud_start = time.time() cost = f_grad_shared(x, x_mask, y, y_mask, z, z_mask) f_update(lrate) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print('NaN detected') return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print('Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud) if numpy.mod(uidx, saveFreq) == 0: print('Saving...', end='') params = unzip(tparams) numpy.savez(saveto, history_errs=[], **params) pkl.dump(model_options, open('%s.pkl' % saveto, 'wb')) print('Done') print('Seen %d samples' % n_samples)
def trainer(X, C, stmodel, dimctx=4800, #vector dimensionality dim_word=620, # word vector dimensionality dim=1600, # the number of GRU units encoder='gru', decoder='gru', doutput=False, max_epochs=5, dispFreq=1, decay_c=0., grad_clip=5., n_words=40000, maxlen_w=100, optimizer='adam', batch_size = 16, saveto='adventuremodel.npz', dictionary='/home/jm7432/tell-tall-tales/decoding/adventure_dict_final.pkl', embeddings=None, saveFreq=1000, sampleFreq=100, reload_=False): # Model options model_options = {} model_options['dimctx'] = dimctx model_options['dim_word'] = dim_word model_options['dim'] = dim model_options['encoder'] = encoder model_options['decoder'] = decoder model_options['doutput'] = doutput model_options['max_epochs'] = max_epochs model_options['dispFreq'] = dispFreq model_options['decay_c'] = decay_c model_options['grad_clip'] = grad_clip model_options['n_words'] = n_words model_options['maxlen_w'] = maxlen_w model_options['optimizer'] = optimizer model_options['batch_size'] = batch_size model_options['saveto'] = saveto model_options['dictionary'] = dictionary model_options['embeddings'] = embeddings model_options['saveFreq'] = saveFreq model_options['sampleFreq'] = sampleFreq model_options['reload_'] = reload_ print model_options # reload options if reload_ and os.path.exists(saveto): print 'reloading...' + saveto with open('%s.pkl'%saveto, 'rb') as f: models_options = pkl.load(f) # load dictionary print 'Loading dictionary...' worddict = load_dictionary(dictionary) # Load pre-trained embeddings, if applicable if embeddings != None: print 'Loading embeddings...' with open(embeddings, 'rb') as f: embed_map = pkl.load(f) dim_word = len(embed_map.values()[0]) model_options['dim_word'] = dim_word preemb = norm_weight(n_words, dim_word) pz = defaultdict(lambda : 0) for w in embed_map.keys(): pz[w] = 1 for w in worddict.keys()[:n_words-2]: if pz[w] > 0: preemb[worddict[w]] = embed_map[w] else: preemb = None # Inverse dictionary word_idict = dict() for kk, vv in worddict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' print 'Building model' params = init_params(model_options, preemb=preemb) # reload parameters if reload_ and os.path.exists(saveto): params = load_params(saveto, params) tparams = init_tparams(params) trng, inps, cost = build_model(tparams, model_options) print 'Building sampler' f_init, f_next = build_sampler(tparams, model_options, trng) # before any regularizer print 'Building f_log_probs...', f_log_probs = theano.function(inps, cost, profile=False) print 'Done' # weight decay, if applicable if decay_c > 0.: decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') weight_decay = 0. for kk, vv in tparams.iteritems(): weight_decay += (vv ** 2).sum() weight_decay *= decay_c cost += weight_decay # after any regularizer print 'Building f_cost...', f_cost = theano.function(inps, cost, profile=False) print 'Done' print 'Done' print 'Building f_grad...', grads = tensor.grad(cost, wrt=itemlist(tparams)) f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False) f_weight_norm = theano.function([], [(t**2).sum() for k,t in tparams.iteritems()], profile=False) if grad_clip > 0.: g2 = 0. for g in grads: g2 += (g**2).sum() new_grads = [] for g in grads: new_grads.append(tensor.switch(g2 > (grad_clip**2), g / tensor.sqrt(g2) * grad_clip, g)) grads = new_grads lr = tensor.scalar(name='lr') print 'Building optimizers...', # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost) print 'Optimization' # Each sentence in the minibatch have same length (for encoder) train_iter = homogeneous_data.HomogeneousData([X,C], batch_size=batch_size, maxlen=maxlen_w) uidx = 0 lrate = 0.01 for eidx in xrange(max_epochs): n_samples = 0 print 'Epoch ', eidx for x, c in train_iter: n_samples += len(x) uidx += 1 x, mask, ctx = homogeneous_data.prepare_data(x, c, worddict, stmodel, maxlen=maxlen_w, n_words=n_words) if x == None: print 'Minibatch with zero sample under length ', maxlen_w uidx -= 1 continue ud_start = time.time() cost = f_grad_shared(x, mask, ctx) f_update(lrate) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud if numpy.mod(uidx, saveFreq) == 0: print 'Saving...', params = unzip(tparams) numpy.savez(saveto, history_errs=[], **params) pkl.dump(model_options, open('%s.pkl'%saveto, 'wb')) print 'Done' if numpy.mod(uidx, sampleFreq) == 0: x_s = x mask_s = mask ctx_s = ctx for jj in xrange(numpy.minimum(10, len(ctx_s))): sample, score = gen_sample(tparams, f_init, f_next, ctx_s[jj].reshape(1, model_options['dimctx']), model_options, trng=trng, k=1, maxlen=100, stochastic=False, use_unk=False) print 'Truth ',jj,': ', for vv in x_s[:,jj]: if vv == 0: break if vv in word_idict: print word_idict[vv], else: print 'UNK', print for kk, ss in enumerate([sample[0]]): print 'Sample (', kk,') ', jj, ': ', for vv in ss: if vv == 0: break if vv in word_idict: print word_idict[vv], else: print 'UNK', print print 'Seen %d samples'%n_samples
def trainer(Xs, Xs_val, dim_word=620, # word vector dimensionality dim=2400, # the number of GRU units encoder='gru', decoder='gru', max_epochs=5, dispFreq=1, decay_c=0., grad_clip=5., n_words=20000, maxlen_w=30, optimizer='adam', batch_size = 64, saveto='/u/rkiros/research/semhash/models/toy.npz', dictionary='/ais/gobi3/u/rkiros/bookgen/book_dictionary_large.pkl', embeddings=None, saveFreq=1000, reload_=False): # Model options model_options = {} model_options['dim_word'] = dim_word model_options['dim'] = dim model_options['encoder'] = encoder model_options['decoder'] = decoder model_options['max_epochs'] = max_epochs model_options['dispFreq'] = dispFreq model_options['decay_c'] = decay_c model_options['grad_clip'] = grad_clip model_options['n_words'] = n_words model_options['maxlen_w'] = maxlen_w model_options['optimizer'] = optimizer model_options['batch_size'] = batch_size model_options['saveto'] = saveto model_options['dictionary'] = dictionary model_options['embeddings'] = embeddings model_options['saveFreq'] = saveFreq model_options['reload_'] = reload_ print model_options # reload options if reload_ and os.path.exists(saveto): print 'reloading...' + saveto with open('%s.pkl'%saveto, 'rb') as f: models_options = pkl.load(f) # load dictionary print 'Loading dictionary...' worddict = load_dictionary(dictionary) # Load pre-trained embeddings, if applicable if embeddings: print 'Loading embeddings...' from gensim.models import Word2Vec as word2vec embed_map = word2vec.load_word2vec_format(embeddings, binary=True) model_options['dim_word'] = dim_word = embed_map.vector_size preemb = norm_weight(n_words, dim_word) preemb_mask = numpy.ones((n_words, 1), dtype='float32') for w,i in worddict.items()[:n_words-2]: if w in embed_map: preemb[i] = embed_map[w] preemb_mask[i] = 0 # don't propagate gradients into pretrained embs else: preemb = None # Inverse dictionary word_idict = dict() for kk, vv in worddict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' print 'Building model' params = init_params(model_options, preemb=preemb) # reload parameters if reload_ and os.path.exists(saveto): params = load_params(saveto, params) tparams = init_tparams(params) trng, x, x_mask, y, y_mask, z, z_mask, \ opt_ret, \ cost = \ build_model(tparams, model_options) inps = [x, x_mask, y, y_mask, z, z_mask] # before any regularizer print 'Building f_log_probs...', f_log_probs = theano.function(inps, cost, profile=False) print 'Done' # weight decay, if applicable if decay_c > 0.: decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') weight_decay = 0. for kk, vv in tparams.iteritems(): weight_decay += (vv ** 2).sum() weight_decay *= decay_c cost += weight_decay # after any regularizer print 'Building f_cost...', f_cost = theano.function(inps, cost, profile=False) print 'Done' print 'Done' print 'Building f_grad...', grads = tensor.grad(cost, wrt=itemlist(tparams)) f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False) f_weight_norm = theano.function([], [(t**2).sum() for k,t in tparams.iteritems()], profile=False) if grad_clip > 0.: g2 = 0. for g in grads: g2 += (g**2).sum() new_grads = [] for g in grads: new_grads.append(tensor.switch(g2 > (grad_clip**2), g / tensor.sqrt(g2) * grad_clip, g)) grads = new_grads if embeddings: param_preemb_mask = theano.shared(preemb_mask, name='preemb_mask', broadcastable=(False, True)) grads[0] *= param_preemb_mask lr = tensor.scalar(name='lr') print 'Building optimizers...', # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost) print 'Optimization' # Each sentence in the minibatch have same length (for encoder) if type(Xs[0]) is not list: Xs = [Xs] if type(Xs_val[0]) is not list: Xs_val = [Xs_val] trainXs = map(hd.grouper, Xs) valXs = map(hd.grouper, Xs_val) train_iters = [hd.HomogeneousData(trainX, batch_size=batch_size, maxlen=maxlen_w) for trainX in trainXs] val_iters = [hd.HomogeneousData(valX, batch_size=batch_size, maxlen=maxlen_w) for valX in valXs] f_progress = open('%s_progress.txt' % saveto, 'w', 1) uidx = 0 lrate = 0.01 for eidx in xrange(max_epochs): n_samples = 0 print 'Epoch ', eidx for train_iter in train_iters: for x, y, z in train_iter: n_samples += len(x) uidx += 1 x, x_mask, y, y_mask, z, z_mask = hd.prepare_data(x, y, z, worddict, maxlen=maxlen_w, n_words=n_words) if x == None: print 'Minibatch with zero sample under length ', maxlen_w uidx -= 1 continue ud_start = time.time() cost = f_grad_shared(x, x_mask, y, y_mask, z, z_mask) f_update(lrate) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud if numpy.mod(uidx, saveFreq) == 0: val_logprob = n_val_samples = 0 for val_iter in val_iters: for x, y, z in val_iter: n_val_samples += len(x) x, x_mask, y, y_mask, z, z_mask = hd.prepare_data(x, y, z, worddict, maxlen=maxlen_w, n_words=n_words) val_logprob += f_log_probs(x, x_mask, y, y_mask, z, z_mask) val_logprob /= n_val_samples print 'LOGPROB: %s' % val_logprob f_progress.write('%s\n' % val_logprob) print 'Saving...', params = unzip(tparams) numpy.savez('%s_%.3f' % (saveto, val_logprob), history_errs=[], **params) pkl.dump(model_options, open('%s_%.3f.pkl'%(saveto, val_logprob), 'wb')) print 'Done' print 'Seen %d samples'%n_samples
""" all_sent = [] for txt_file in flist_txt: print "Reading file: {}".format(txt_file) with open(txt_file, 'r') as f: data = f.read() sent = data.split('\n') all_sent += sent print "File loading complete. Cleaning..." #all_sent = map(clean_string, all_sent) return all_sent if __name__ == "__main__": os.environ["THEANO_FLAGS"] = "floatX=float32" file_names = get_file_list(data_path, ['txt']) train_sent = load_txt_sent(file_names) if not os.path.exists(dict_path): print "Dictionary not found, recreating" worddict, wordcount = vocab.build_dictionary(train_sent) print "Built. Saving to: {}".format(dict_path) vocab.save_dictionary(worddict, wordcount, dict_path) else: print "Found dictionary at {}... Loading...".format(dict_path) worddict = vocab.load_dictionary(dict_path) print "Beginning Training..." train.trainer(train_sent, n_words=20000, dim=2400, batch_size=128, reload_=False, dictionary=dict_path, saveto=save_path)
def trainer(X, C, stmodel, dimctx=4800, #vector dimensionality dim_word=620, # word vector dimensionality dim=1600, # the number of GRU units encoder='gru', decoder='gru', doutput=False, max_epochs=5, dispFreq=1, decay_c=0., grad_clip=5., n_words=40000, maxlen_w=100, optimizer='adam', batch_size = 16, saveto='/u/rkiros/research/semhash/models/toy.npz', dictionary='/ais/gobi3/u/rkiros/bookgen/book_dictionary_large.pkl', embeddings=None, saveFreq=1000, sampleFreq=100, reload_=False): # Model options model_options = {} model_options['dimctx'] = dimctx model_options['dim_word'] = dim_word model_options['dim'] = dim model_options['encoder'] = encoder model_options['decoder'] = decoder model_options['doutput'] = doutput model_options['max_epochs'] = max_epochs model_options['dispFreq'] = dispFreq model_options['decay_c'] = decay_c model_options['grad_clip'] = grad_clip model_options['n_words'] = n_words model_options['maxlen_w'] = maxlen_w model_options['optimizer'] = optimizer model_options['batch_size'] = batch_size model_options['saveto'] = saveto model_options['dictionary'] = dictionary model_options['embeddings'] = embeddings model_options['saveFreq'] = saveFreq model_options['sampleFreq'] = sampleFreq model_options['reload_'] = reload_ print model_options # reload options if reload_ and os.path.exists(saveto): print 'reloading...' + saveto with open('%s.pkl'%saveto, 'rb') as f: models_options = pkl.load(f) # load dictionary print 'Loading dictionary...' worddict = load_dictionary(dictionary) # Load pre-trained embeddings, if applicable if embeddings != None: print 'Loading embeddings...' with open(embeddings, 'rb') as f: embed_map = pkl.load(f) dim_word = len(embed_map.values()[0]) model_options['dim_word'] = dim_word preemb = norm_weight(n_words, dim_word) pz = defaultdict(lambda : 0) for w in embed_map.keys(): pz[w] = 1 for w in worddict.keys()[:n_words-2]: if pz[w] > 0: preemb[worddict[w]] = embed_map[w] else: preemb = None # Inverse dictionary word_idict = dict() for kk, vv in worddict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' print 'Building model' params = init_params(model_options, preemb=preemb) # reload parameters if reload_ and os.path.exists(saveto): params = load_params(saveto, params) tparams = init_tparams(params) trng, inps, cost = build_model(tparams, model_options) print 'Building sampler' f_init, f_next = build_sampler(tparams, model_options, trng) # before any regularizer print 'Building f_log_probs...', f_log_probs = theano.function(inps, cost, profile=False) print 'Done' # weight decay, if applicable if decay_c > 0.: decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') weight_decay = 0. for kk, vv in tparams.iteritems(): weight_decay += (vv ** 2).sum() weight_decay *= decay_c cost += weight_decay # after any regularizer print 'Building f_cost...', f_cost = theano.function(inps, cost, profile=False) print 'Done' print 'Done' print 'Building f_grad...', grads = tensor.grad(cost, wrt=itemlist(tparams)) f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False) f_weight_norm = theano.function([], [(t**2).sum() for k,t in tparams.iteritems()], profile=False) if grad_clip > 0.: g2 = 0. for g in grads: g2 += (g**2).sum() new_grads = [] for g in grads: new_grads.append(tensor.switch(g2 > (grad_clip**2), g / tensor.sqrt(g2) * grad_clip, g)) grads = new_grads lr = tensor.scalar(name='lr') print 'Building optimizers...', # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost) print 'Optimization' # Each sentence in the minibatch have same length (for encoder) train_iter = homogeneous_data.HomogeneousData([X,C], batch_size=batch_size, maxlen=maxlen_w) uidx = 0 lrate = 0.01 for eidx in xrange(max_epochs): n_samples = 0 print 'Epoch ', eidx for x, c in train_iter: n_samples += len(x) uidx += 1 x, mask, ctx = homogeneous_data.prepare_data(x, c, worddict, stmodel, maxlen=maxlen_w, n_words=n_words) if x == None: print 'Minibatch with zero sample under length ', maxlen_w uidx -= 1 continue ud_start = time.time() cost = f_grad_shared(x, mask, ctx) f_update(lrate) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud if numpy.mod(uidx, saveFreq) == 0: print 'Saving...', params = unzip(tparams) numpy.savez(saveto, history_errs=[], **params) pkl.dump(model_options, open('%s.pkl'%saveto, 'wb')) print 'Done' if numpy.mod(uidx, sampleFreq) == 0: x_s = x mask_s = mask ctx_s = ctx for jj in xrange(numpy.minimum(10, len(ctx_s))): sample, score = gen_sample(tparams, f_init, f_next, ctx_s[jj].reshape(1, model_options['dimctx']), model_options, trng=trng, k=1, maxlen=100, stochastic=False, use_unk=False) print 'Truth ',jj,': ', for vv in x_s[:,jj]: if vv == 0: break if vv in word_idict: print word_idict[vv], else: print 'UNK', print for kk, ss in enumerate([sample[0]]): print 'Sample (', kk,') ', jj, ': ', for vv in ss: if vv == 0: break if vv in word_idict: print word_idict[vv], else: print 'UNK', print print 'Seen %d samples'%n_samples
def trainer(X, dim_word=620, # word vector dimensionality dim=2400, # the number of GRU units encoder='gru', decoder='gru', max_epochs=5, dispFreq=1, decay_c=0., grad_clip=5., n_words=20000, maxlen_w=30, optimizer='adam', batch_size = 64, saveto='/u/rkiros/research/semhash/models/toy.npz', dictionary='/ais/gobi3/u/rkiros/bookgen/book_dictionary_large.pkl', saveFreq=1000, reload_=False): # Model options model_options = {} model_options['dim_word'] = dim_word model_options['dim'] = dim model_options['encoder'] = encoder model_options['decoder'] = decoder model_options['max_epochs'] = max_epochs model_options['dispFreq'] = dispFreq model_options['decay_c'] = decay_c model_options['grad_clip'] = grad_clip model_options['n_words'] = n_words model_options['maxlen_w'] = maxlen_w model_options['optimizer'] = optimizer model_options['batch_size'] = batch_size model_options['saveto'] = saveto model_options['dictionary'] = dictionary model_options['saveFreq'] = saveFreq model_options['reload_'] = reload_ print model_options # reload options # TODO: if loading old parameters you need to make sure you are using them # in the rest of the code # if reload_ and os.path.exists(saveto): # print 'reloading...' + saveto # with open('%s.pkl'%saveto, 'rb') as f: # model_options = pkl.load(f) # load dictionary print 'Loading dictionary...' worddict = load_dictionary(dictionary) # Inverse dictionary word_idict = dict() for kk, vv in worddict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' print 'Building model' params = init_params(model_options) # reload parameters if reload_ and os.path.exists(saveto): params = load_params(saveto + '.npz', params) tparams = init_tparams(params) trng, x, x_mask, y, y_mask, z, z_mask, \ opt_ret, \ cost = \ build_model(tparams, model_options) inps = [x, x_mask, y, y_mask, z, z_mask] # before any regularizer print 'Building f_log_probs...', f_log_probs = theano.function(inps, cost, profile=False) print 'Done' # weight decay, if applicable if decay_c > 0.: decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') weight_decay = 0. for kk, vv in tparams.iteritems(): weight_decay += (vv ** 2).sum() weight_decay *= decay_c cost += weight_decay # after any regularizer print 'Building f_cost...', f_cost = theano.function(inps, cost, profile=False) print 'Done' print 'Done' print 'Building f_grad...', grads = tensor.grad(cost, wrt=itemlist(tparams)) f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False) f_weight_norm = theano.function([], [(t**2).sum() for k,t in tparams.iteritems()], profile=False) if grad_clip > 0.: g2 = 0. for g in grads: g2 += (g**2).sum() new_grads = [] for g in grads: new_grads.append(tensor.switch(g2 > (grad_clip**2), g / tensor.sqrt(g2) * grad_clip, g)) grads = new_grads lr = tensor.scalar(name='lr') print 'Building optimizers...', # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost) print 'Optimization' # Each sentence in the minibatch have same length (for encoder) trainX = homogeneous_data.grouper(X) train_iter = homogeneous_data.HomogeneousData(trainX, batch_size=batch_size, maxlen=maxlen_w) uidx = 0 lrate = 0.01 for eidx in xrange(max_epochs): n_samples = 0 print 'Epoch ', eidx for x, y, z in train_iter: n_samples += len(x) uidx += 1 x, x_mask, y, y_mask, z, z_mask = homogeneous_data.prepare_data(x, y, z, worddict, maxlen=maxlen_w, n_words=n_words) if x == None: print 'Minibatch with zero sample under length ', maxlen_w uidx -= 1 continue ud_start = time.time() cost = f_grad_shared(x, x_mask, y, y_mask, z, z_mask) f_update(lrate) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud if numpy.mod(uidx, saveFreq) == 0: print 'Saving...', params = unzip(tparams) numpy.savez(saveto, history_errs=[], **params) pkl.dump(model_options, open('%s.pkl'%saveto, 'wb')) print 'Done' print 'Seen %d samples'%n_samples
def trainer(X, dim_word=620, # word vector dimensionality dim=2400, # the number of GRU units encoder='gru', num_neg=4, gamma=1.0, max_epochs=5, dispFreq=1, decay_c=0., grad_clip=5., n_words=20000, maxlen_w=30, optimizer='adam', batch_size = 64, saveto='/u/rkiros/research/semhash/models/toy.npz', dictionary='/ais/gobi3/u/rkiros/bookgen/book_dictionary_large.pkl', saveFreq=1000, reload_=False): # Model options model_options = {} model_options['dim_word'] = dim_word model_options['dim'] = dim model_options['encoder'] = encoder model_options['num_neg'] = num_neg model_options['gamma'] = gamma model_options['max_epochs'] = max_epochs model_options['dispFreq'] = dispFreq model_options['decay_c'] = decay_c model_options['grad_clip'] = grad_clip model_options['n_words'] = n_words model_options['maxlen_w'] = maxlen_w model_options['optimizer'] = optimizer model_options['batch_size'] = batch_size model_options['saveto'] = saveto model_options['dictionary'] = dictionary model_options['saveFreq'] = saveFreq model_options['reload_'] = reload_ print model_options # reload options if reload_ and os.path.exists(saveto): print 'reloading...' + saveto with open('%s.pkl'%saveto, 'rb') as f: models_options = pkl.load(f) # load dictionary print 'Loading dictionary...' worddict = load_dictionary(dictionary) # Inverse dictionary word_idict = dict() for kk, vv in worddict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' print 'Building model' params = init_params(model_options) # reload parameters if reload_ and os.path.exists(saveto): params = load_params(saveto, params) tparams = init_tparams(params) trng, x, x_mask, p_f, p_f_mask, p_b, p_b_mask, \ ns_list, ns_masks, \ opt_ret, \ cost = \ build_model(tparams, model_options) inps = [x, x_mask, p_f, p_f_mask, p_b, p_b_mask] + ns_list + ns_masks # before any regularizer print 'Building f_log_probs...', f_log_probs = theano.function(inps, cost, profile=False) print 'Done' # weight decay, if applicable if decay_c > 0.: decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') weight_decay = 0. for kk, vv in tparams.iteritems(): weight_decay += (vv ** 2).sum() weight_decay *= decay_c cost += weight_decay # after any regularizer print 'Building f_cost...', f_cost = theano.function(inps, cost, profile=False) print 'Done' print 'Done' print 'Building f_grad...', grads = tensor.grad(cost, wrt=itemlist(tparams)) f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False) f_weight_norm = theano.function([], [(t**2).sum() for k,t in tparams.iteritems()], profile=False) if grad_clip > 0.: g2 = 0. for g in grads: g2 += (g**2).sum() new_grads = [] for g in grads: new_grads.append(tensor.switch(g2 > (grad_clip**2), g / tensor.sqrt(g2) * grad_clip, g)) grads = new_grads lr = tensor.scalar(name='lr') print 'Building optimizers...', # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost) print 'Optimization' # Each sentence in the minibatch have same length (for encoder) trainX = homogeneous_data.grouper(X) train_iter = homogeneous_data.HomogeneousData(trainX, batch_size=batch_size, num_neg=num_neg, maxlen=maxlen_w) uidx = 0 lrate = 0.01 for eidx in xrange(max_epochs): n_samples = 0 print 'Epoch ', eidx for x, p_f, p_b, ns in train_iter: n_samples += len(x) uidx += 1 # ns input is list of num_neg negative sentences, # output ns is list of num_neg (batchsize, neg_len) negative sentences x, x_mask, p_f, p_f_mask, p_b, p_b_mask, ns_list, ns_masks = homogeneous_data.prepare_data(x, p_f, p_b, ns, worddict, maxlen=maxlen_w, n_words=n_words) if x == None: print 'Minibatch with zero sample under length ', maxlen_w uidx -= 1 continue ud_start = time.time() args = [x, x_mask, p_f, p_f_mask, p_b, p_b_mask] + ns_list + ns_masks cost = f_grad_shared(*args) f_update(lrate) ud = time.time() - ud_start if numpy.isnan(cost_p1) or numpy.isinf(cost_p1) or numpy.isnan(cost_p2) or numpy.isinf(cost_p2): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost_p1 + cost_p2, 'UD ', ud if numpy.mod(uidx, saveFreq) == 0: print 'Saving...', saveto_idx = saveto.format(uidx) params = unzip(tparams) numpy.savez(saveto_idx, history_errs=[], **params) pkl.dump(model_options, open('%s.pkl'%saveto_idx, 'wb')) print 'Done' print 'Seen %d samples'%n_samples
def trainer( X, dim_word=620, # word vector dimensionality dim=2400, # the number of GRU units encoder='gru', decoder='gru', max_epochs=5, dispFreq=1, decay_c=0., grad_clip=5., n_words=20000, maxlen_w=30, optimizer='adam', batch_size=512, saveto='/u/rkiros/research/semhash/models/toy.npz', dictionary='/ais/gobi3/u/rkiros/bookgen/book_dictionary_large.pkl', saveFreq=5000, reload_=False, reload_path='output_books_full/model_ae_full_bsz_64_iter_313000.npz', SICK_eval=False): # Model options model_options = {} model_options['dim_word'] = dim_word model_options['dim'] = dim model_options['encoder'] = encoder model_options['decoder'] = decoder model_options['max_epochs'] = max_epochs model_options['dispFreq'] = dispFreq model_options['decay_c'] = decay_c model_options['grad_clip'] = grad_clip model_options['n_words'] = n_words model_options['maxlen_w'] = maxlen_w model_options['optimizer'] = optimizer model_options['batch_size'] = batch_size model_options['saveto'] = saveto model_options['dictionary'] = dictionary model_options['saveFreq'] = saveFreq model_options['reload_'] = reload_ model_options['reload_path'] = reload_path print model_options # reload options if reload_ and os.path.exists(reload_path): print 'reloading...' + reload_path with open('%s.pkl' % reload_path, 'rb') as f: models_options = pkl.load(f) reload_idx = int(reload_path.split('_')[-1].split('.')[0]) # load dictionary print 'Loading dictionary...' worddict = load_dictionary(dictionary) # Inverse dictionary word_idict = dict() for kk, vv in worddict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' print 'Building model' params = init_params(model_options) # reload parameters if reload_ and os.path.exists(reload_path): params = load_params(reload_path, params) tparams = init_tparams(params) trng, x, x_mask, y, y_mask, z, z_mask, \ opt_ret, \ cost = \ build_model(tparams, model_options) inps = [x, x_mask, y, y_mask, z, z_mask] # before any regularizer print 'Building f_log_probs...', f_log_probs = theano.function(inps, cost, profile=False) print 'Done' # weight decay, if applicable if decay_c > 0.: decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') weight_decay = 0. for kk, vv in tparams.iteritems(): weight_decay += (vv**2).sum() weight_decay *= decay_c cost += weight_decay # after any regularizer print 'Building f_cost...', f_cost = theano.function(inps, cost, profile=False) print 'Done' print 'Done' print 'Building f_grad...', grads = tensor.grad(cost, wrt=itemlist(tparams)) f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False) f_weight_norm = theano.function([], [(t**2).sum() for k, t in tparams.iteritems()], profile=False) if grad_clip > 0.: g2 = 0. for g in grads: g2 += (g**2).sum() new_grads = [] for g in grads: new_grads.append( tensor.switch(g2 > (grad_clip**2), g / tensor.sqrt(g2) * grad_clip, g)) grads = new_grads lr = tensor.scalar(name='lr') print 'Building optimizers...', # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost) print 'Optimization' # Each sentence in the minibatch have same length (for encoder) trainX = homogeneous_data.grouper(X) train_iter = homogeneous_data.HomogeneousData(trainX, batch_size=batch_size, maxlen=maxlen_w) if not reload_: uidx = 0 else: uidx = reload_idx lrate = 0.01 for eidx in xrange(max_epochs): n_samples = 0 print 'Epoch ', eidx for x, y, z in train_iter: n_samples += len(x) uidx += 1 x, x_mask, y, y_mask, z, z_mask = homogeneous_data.prepare_data( x, y, z, worddict, maxlen=maxlen_w, n_words=n_words) if x == None: print 'Minibatch with zero sample under length ', maxlen_w uidx -= 1 continue ud_start = time.time() cost = f_grad_shared(x, x_mask, y, y_mask, z, z_mask) f_update(lrate) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud if numpy.mod(uidx, saveFreq) == 0: print 'Saving...', saveto_iternum = saveto.format(uidx) params = unzip(tparams) numpy.savez(saveto_iternum, history_errs=[], **params) pkl.dump(model_options, open('%s.pkl' % saveto_iternum, 'wb')) print 'Done' if SICK_eval: print "Evaluating SICK Test performance" embed_map = tools.load_googlenews_vectors() model = tools.load_model(path_to_model=saveto_iternum, embed_map=embed_map) yhat, pr, sr, mse = eval_sick.evaluate(model, evaltest=True) del (model) del (embed_map) print pr, sr, mse res_save_file = saveto.format('ALL').split( '.')[0] + '_SICK_EVAL.txt' with open(res_save_file, 'a') as rsf: cur_time = strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) rsf.write('\n \n {}'.format(cur_time)) rsf.write('\n{}, {}, {}, {}'.format(uidx, pr, se, mse)) print "Done" print 'Seen %d samples' % n_samples