def train_encoder(name_of_data, sentences, max_epochs=5, save_frequency=1000): if not os.path.exists('data/'): os.makedirs('data') sys.path.insert(0, 'training/') import vocab worddict, wordcount = vocab.build_dictionary(sentences) vocab.save_dictionary(worddict, wordcount, 'data/' + name_of_data + '_dictionary.pkl') pickle.dump(sentences, open('data/' + name_of_data + '_sen.p', 'w')) with open('training/train.py', 'r') as f: text = f.read() text = text.replace('max_epochs=5', 'max_epochs=' + str(max_epochs)) text = text.replace('saveto=\'/u/rkiros/research/semhash/models/toy.npz\'',\ 'saveto=\'data/' + name_of_data + '_encoder.npz\'') text = text.replace('dictionary=\'/ais/gobi3/u/rkiros/bookgen/book_dictionary_large.pkl\'',\ 'dictionary=\'data/' + name_of_data + '_dictionary.pkl\'') text = text.replace('n_words=20000', 'n_words=' + str(len(wordcount.keys()))) text = text.replace('saveFreq=1000', 'saveFreq=' + str(save_frequency)) g = open('training/train_temp.py', 'w') g.write(text) g.close() import train_temp train_temp.trainer(sentences)
def main(data_path, dict_path, save_path, batch_size, reload_, reload_path): os.environ["THEANO_FLAGS"] = "floatX=float32" file_names = get_file_list(data_path, ['txt']) train_sent = load_txt_sent(file_names) if not os.path.exists(dict_path): print "Dictionary not found, recreating" worddict, wordcount = vocab.build_dictionary(train_sent) print "Built. Saving to: {}".format(dict_path) vocab.save_dictionary(worddict, wordcount, dict_path) else: print "Found dictionary at {}... Loading...".format(dict_path) worddict = vocab.load_dictionary(dict_path) print "Beginning Training..." train.trainer(train_sent, batch_size=batch_size, reload_=reload_, dictionary=dict_path, saveto=save_path, reload_path=reload_path, saveFreq=10000)
def create_dictionaries(data, load_test, langs, dictionaries): train, dev, test = [], [], [] for d,t in zip(data, load_test): train_, dev_, test_ = [], [], [] for lang in langs: train_.append("%s/train.%s"%(d,lang)) dev_.append("%s/dev.%s"%(d,lang)) test_.append("%s/test.%s"%(d,lang)) train.append(train_) dev.append(dev_) if t: test.append(test_) # debugging: make sure data set files exist assert( all([os.path.isfile(t) for t_ in train for t in t_]) ), "Could not find train files.\n%s"%train[0][0] assert( all([os.path.isfile(d) for d_ in dev for d in d_]) ), "Could not find dev files.\n%s"%dev[0][0] assert( all([os.path.isfile(t) for t_ in test for t in t_]) ), "Could not find test files.\n%s"%test[0][0] # Load training and development sets, alternatively also test set print 'Loading dataset' wordslang = [] for d,t in zip(data, load_test): train, dev, test = load_multilingual_dataset(path_to_data=d, langs=langs, load_test=t, load_images=False) for lidx,lang in enumerate(langs): #print len(train[0][lidx]), len(dev[0][lidx]), len(test[0][lidx]) wordslang.append( train[0][lidx]+dev[0][lidx]+test[0][lidx] ) worddicts = [] iworddicts = [] # Create and save dictionaries print 'Creating and saving multilingual dictionaries %s ...'%(", ".join(langs)) for lidx, (lang, saveto) in enumerate(zip(langs,dictionaries)): worddict = build_dictionary(wordslang[lidx])[0] n_words_dict = len(worddict) print '%s dictionary size: %s'%(lang,str(n_words_dict)) with open('%s.dictionary-%s.pkl'%(saveto,lang), 'wb') as f: pkl.dump(worddict, f) print 'Done.'
def train_decoder(name_of_data, sentences, model, p, max_epochs=5, save_frequency=1000, n_words=20000, maxlen_w=30, reload_=False): if not os.path.exists('data/'): os.makedirs('data') sys.path.insert(1, 'decoding/') import vocab reload(vocab) worddict, wordcount = vocab.build_dictionary(sentences, n_words) vocab.save_dictionary(worddict, wordcount, 'data/' + name_of_data + '_dictionary.pkl') with open('decoding/train.py', 'r') as f: text = f.read() text = text.replace('max_epochs=5', 'max_epochs=' + str(max_epochs)) text = text.replace('saveto=\'/u/rkiros/research/semhash/models/toy.npz\'',\ 'saveto=\'data/' + name_of_data + '_decoder.npz\'') text = text.replace('dictionary=\'/ais/gobi3/u/rkiros/bookgen/book_dictionary_large.pkl\'',\ 'dictionary=\'data/' + name_of_data + '_dictionary.pkl\'') text = text.replace('n_words=40000', 'n_words=' + str(len(wordcount.keys()))) text = text.replace('saveFreq=1000', 'saveFreq=' + str(save_frequency)) g = open('decoding/train_temp.py', 'w') g.write(text) g.close() import train_temp reload(train_temp) return train_temp.trainer(sentences, sentences, model, p, maxlen_w=maxlen_w, reload_=reload_)
def main(): parser = argparse.ArgumentParser( description='Pass target style genre to train decoder') parser.add_argument('-s', '--style_genre', help='the name of style corpus', required='True', default='localhost') flag = parser.parse_args() style_corpus_path = "/media/VSlab3/kuanchen_arxiv/artistic_style_corpora/{}".format( flag.style_genre) style_genre = flag.style_genre.split(".")[0] X = [] with open(style_corpus_path, 'r') as handle: for line in handle.readlines(): X.append(line.strip()) C = X if not os.path.isfile("./vocab_save/{}.pkl".format(style_genre)): print "Get vocabulary..." worddict, wordcount = vocab.build_dictionary(X) vocab.save_dictionary(worddict=worddict, wordcount=wordcount, loc="vocab_save/{}.pkl".format(style_genre)) else: pass savepath = "./logs_{}".format(style_genre) if not os.path.exists(savepath): os.mkdir(savepath) skmodel = skipthoughts.load_model() train.trainer(X, C, skmodel, dictionary="vocab_save/{}.pkl".format(style_genre), savepath=savepath, saveto="model.npz")
def trainer(load_from=None, save_dir="snapshots", name="anon", **kwargs): """ :param load_from: location to load parameters + options from :param name: name of model, used as location to save parameters + options """ curr_model = dict() # load old model, including parameters, but overwrite with new options if load_from: print "reloading..." + load_from with open("%s.pkl" % load_from, "rb") as f: curr_model = pkl.load(f) else: curr_model["options"] = {} for k, v in kwargs.iteritems(): curr_model["options"][k] = v model_options = curr_model["options"] # initialize logger import datetime timestampedName = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S") + "_" + name from logger import Log log = Log( name=timestampedName, hyperparams=model_options, saveDir="vis/training", xLabel="Examples Seen", saveFrequency=1 ) print curr_model["options"] # Load training and development sets print "Loading dataset" dataset = load_dataset(model_options["data"], cnn=model_options["cnn"], load_train=True) train = dataset["train"] dev = dataset["dev"] # Create dictionary print "Creating dictionary" worddict = build_dictionary(train["caps"] + dev["caps"]) print "Dictionary size: " + str(len(worddict)) curr_model["worddict"] = worddict curr_model["options"]["n_words"] = len(worddict) + 2 # save model pkl.dump(curr_model, open("%s/%s.pkl" % (save_dir, name), "wb")) print "Loading data" train_iter = datasource.Datasource(train, batch_size=model_options["batch_size"], worddict=worddict) dev = datasource.Datasource(dev, worddict=worddict) dev_caps, dev_ims = dev.all() print "Building model" params = init_params(model_options) # reload parameters if load_from is not None and os.path.exists(load_from): params = load_params(load_from, params) tparams = init_tparams(params) inps, cost = build_model(tparams, model_options) print "Building sentence encoder" inps_se, sentences = build_sentence_encoder(tparams, model_options) f_senc = theano.function(inps_se, sentences, profile=False) print "Building image encoder" inps_ie, images = build_image_encoder(tparams, model_options) f_ienc = theano.function(inps_ie, images, profile=False) print "Building f_grad...", grads = tensor.grad(cost, wrt=itemlist(tparams)) print "Building errors.." inps_err, errs = build_errors(model_options) f_err = theano.function(inps_err, errs, profile=False) curr_model["f_senc"] = f_senc curr_model["f_ienc"] = f_ienc curr_model["f_err"] = f_err if model_options["grad_clip"] > 0.0: grads = [maxnorm(g, model_options["grad_clip"]) for g in grads] lr = tensor.scalar(name="lr") print "Building optimizers...", # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(model_options["optimizer"])(lr, tparams, grads, inps, cost) print "Optimization" uidx = 0 curr = 0 n_samples = 0 for eidx in xrange(model_options["max_epochs"]): print "Epoch ", eidx for x, mask, im in train_iter: n_samples += x.shape[1] uidx += 1 # Update ud_start = time.time() cost = f_grad_shared(x, mask, im) f_update(model_options["lrate"]) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print "NaN detected" return 1.0, 1.0, 1.0 if numpy.mod(uidx, model_options["dispFreq"]) == 0: print "Epoch ", eidx, "Update ", uidx, "Cost ", cost, "UD ", ud log.update({"Error": float(cost)}, n_samples) if numpy.mod(uidx, model_options["validFreq"]) == 0: print "Computing results..." # encode sentences efficiently dev_s = encode_sentences(curr_model, dev_caps, batch_size=model_options["batch_size"]) dev_i = encode_images(curr_model, dev_ims) # compute errors dev_errs = compute_errors(curr_model, dev_s, dev_i) # compute ranking error (r1, r5, r10, medr, meanr), vis_details = t2i(dev_errs, vis_details=True) (r1i, r5i, r10i, medri, meanri) = i2t(dev_errs) print "Text to image (dev set): %.1f, %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr, meanr) log.update({"R@1": r1, "R@5": r5, "R@10": r10, "median_rank": medr, "mean_rank": meanr}, n_samples) print "Image to text (dev set): %.1f, %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri, meanri) log.update( { "Image2Caption_R@1": r1i, "Image2Caption_R@5": r5i, "Image2CaptionR@10": r10i, "Image2Caption_median_rank": medri, "Image2Caption_mean_rank": meanri, }, n_samples, ) tot = r1 + r5 + r10 if tot > curr: curr = tot # Save parameters print "Saving...", numpy.savez("%s/%s" % (save_dir, name), **unzip(tparams)) print "Done" vis_details["hyperparams"] = model_options # Save visualization details with open("vis/roc/%s/%s.json" % (model_options["data"], timestampedName), "w") as f: json.dump(vis_details, f) # Add the new model to the index try: index = json.load(open("vis/roc/index.json", "r")) except IOError: index = {model_options["data"]: []} models = index[model_options["data"]] if timestampedName not in models: models.append(timestampedName) with open("vis/roc/index.json", "w") as f: json.dump(index, f) print "Seen %d samples" % n_samples
def main(): model_config = configuration.ModelConfig() model_config.data = FLAGS.input_dataset_name #loading dataset print('Loading dataset ...') (train_caps, train_ims), (test_caps, test_ims), _ = load_dataset(name=model_config.data, load_train=True) train_nic_ims = train_ims[:, 1536:] test_nic_ims = test_ims[:, 1536:] train_ims[:, 1536:] = preprocessing.scale(train_nic_ims) test_ims[:, 1536:] = preprocessing.scale(test_nic_ims) test_vgg_feature = test_ims[:, :1536] test_NIC_feature = test_ims[:, 1536:] #create and save dictionary print('creating dictionary') worddict = build_dictionary(train_caps + test_caps)[0] n_words = len(worddict) model_config.n_words = n_words model_config.worddict = worddict print('dictionary size: ' + str(n_words)) with open('f8k.dictionary.pkl', 'wb') as f: pkl.dump(worddict, f) #Building the model print('Building the model ...') model = LTS(model_config) model.build() config = tf.ConfigProto() config.gpu_options.allow_growth = True saver = tf.train.Saver(max_to_keep=model_config.max_checkpoints_to_keep) #sess = tf.Session(config=config) print('start embedding training') curr = 0. uidx = 0. train_iter = homogeneous_data.HomogeneousData( data=[train_caps, train_ims], batch_size=model_config.batch_size, maxlen=model_config.maxlen_w) with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) for epoch in range(model_config.max_epochs): # Train G print('Epoch ', epoch) if epoch == 15: model_config.lrate = model_config.lrate / 10 for x, im in train_iter: uidx += 1 ls, mask, im = homogeneous_data.prepare_data( caps=x, features=im, worddict=worddict, maxlen=model_config.maxlen_w, n_words=model_config.n_words) vgg_feature = im[:, :1536] NIC_feature = im[:, 1536:] #embedding training _, cost = sess.run( [model.updates, model.embedding_loss], feed_dict={ model.VGG_pred_data: vgg_feature, model.NIC_pred_data: NIC_feature, model.ls_pred_data: ls.T, model.input_mask: mask.T, model.keep_prob: 0.5, model.phase: 1, model.learning_rate: model_config.lrate }) if np.mod(uidx, 10) == 0: print('Epoch ', epoch, 'Update ', uidx, 'Cost ', cost) if np.mod(uidx, 100) == 0: print('test ...') # encode images into the text embedding space images = getTestImageFeature(sess, model, test_vgg_feature, test_NIC_feature) features = getTestTextFeature(sess, model, model_config, test_caps) (r1, r5, r10, medr) = recall.i2t(images, features) print("Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr)) (r1i, r5i, r10i, medri) = recall.t2i(images, features) print("Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri)) currscore = r1 + r5 + r10 + r1i + r5i + r10i if currscore > curr: curr = currscore # Save model print('Saving...') saver.save(sess, "checkpoint_files/model.ckpt", global_step=int(uidx + 1)) print('done.') sess = tf.Session() model_path = tf.train.latest_checkpoint("checkpoint_files/") if not model_path: print("Skipping testing. No checkpoint found in: %s", FLAGS.checkpoint_dir) return print("Loading model from checkpoint: %s", model_path) saver.restore(sess, model_path) print("Successfully loaded checkpoint: %s", model_path) images = getTestImageFeature(sess, model, test_vgg_feature, test_NIC_feature) # encode sentences into the text embedding space features = getTestTextFeature(sess, model, model_config, test_caps) (r1, r5, r10, medr) = recall.i2t(images, features) print("Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr)) (r1i, r5i, r10i, medri) = recall.t2i(images, features) print("Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri))
default='./target_text/toy_story.txt') args = parser.parse_args() target_name = args.targe_text.split("/")[-1].split(".")[ 0] # Get target text file name. eg. "speeches.txt" download_model() print("Loading Skip-Vector Model...") skmodel = skipthoughts.load_model() print("Done!") """ Step 1: Generating dictionary for the target text. """ print("Generating dictionary for the target text...") X = load_text(args.targe_text) worddict, wordcount = vocab.build_dictionary(X) #vocab.save_dictionary(worddict, wordcount, './target_dict/%s_dict.pkl'%target_name) #print("Done! Saved dictionary under ./target_dict/ as %s_dict.pkl"%target_name) vocab.save_dictionary(worddict, wordcount, './%s/%s_dict.pkl' % (target_name, target_name)) print("Done! Saved dictionary under ./%s/ as %s_dict.pkl" % (target_name, target_name)) """ Step 2: Generating style vector for the target text. """ print("Generating style vector for the target text...") nltk.download( 'punkt') # Natural Language Toolkit for skipthoughts encoder. print("The lenth of X is:") print len(X) skip_vector = skipthoughts.encode(skmodel, X)
""" all_sent = [] for txt_file in flist_txt: print "Reading file: {}".format(txt_file) with open(txt_file, 'r') as f: data = f.read() sent = data.split('\n') all_sent += sent print "File loading complete. Cleaning..." #all_sent = map(clean_string, all_sent) return all_sent if __name__ == "__main__": os.environ["THEANO_FLAGS"] = "floatX=float32" file_names = get_file_list(data_path, ['txt']) train_sent = load_txt_sent(file_names) if not os.path.exists(dict_path): print "Dictionary not found, recreating" worddict, wordcount = vocab.build_dictionary(train_sent) print "Built. Saving to: {}".format(dict_path) vocab.save_dictionary(worddict, wordcount, dict_path) else: print "Found dictionary at {}... Loading...".format(dict_path) worddict = vocab.load_dictionary(dict_path) print "Beginning Training..." train.trainer(train_sent, n_words=20000, dim=2400, batch_size=128, reload_=False, dictionary=dict_path, saveto=save_path)
def trainer(data='coco', margin=0.2, dim=1024, dim_image=4096, dim_word=300, encoder='gru', max_epochs=15, dispFreq=10, decay_c=0.0, grad_clip=2.0, maxlen_w=150, batch_size=128, saveto='vse/coco', validFreq=100, lrate=0.0002, concat=True, reload_=False): hyper_params = { 'data': data, 'encoder': encoder, 'batch_size': batch_size, 'time': cur_time, 'lrate': lrate, 'concat': concat, } i2t_r1 = dict([('i2t_recall', 'r1')] + hyper_params.items()) i2t_r5 = dict([('i2t_recall', 'r5')] + hyper_params.items()) i2t_r10 = dict([('i2t_recall', 'r10')] + hyper_params.items()) t2i_r1 = dict([('t2i_recall', 'r1')] + hyper_params.items()) t2i_r5 = dict([('t2i_recall', 'r5')] + hyper_params.items()) t2i_r10 = dict([('t2i_recall', 'r10')] + hyper_params.items()) i2t_med = dict([('i2t_med', 'i2t_med')] + hyper_params.items()) t2i_med = dict([('t2i_med', 't2i_med')] + hyper_params.items()) agent = Agent(port=5020) i2t_r1_agent = agent.register(i2t_r1, 'recall', overwrite=True) i2t_r5_agent = agent.register(i2t_r5, 'recall', overwrite=True) i2t_r10_agent = agent.register(i2t_r10, 'recall', overwrite=True) t2i_r1_agent = agent.register(t2i_r1, 'recall', overwrite=True) t2i_r5_agent = agent.register(t2i_r5, 'recall', overwrite=True) t2i_r10_agent = agent.register(t2i_r10, 'recall', overwrite=True) i2t_med_agent = agent.register(i2t_med, 'median', overwrite=True) t2i_med_agent = agent.register(t2i_med, 'median', overwrite=True) # Model options model_options = {} model_options['data'] = data model_options['margin'] = margin model_options['dim'] = dim model_options['dim_image'] = dim_image model_options['dim_word'] = dim_word model_options['encoder'] = encoder model_options['max_epochs'] = max_epochs model_options['dispFreq'] = dispFreq model_options['decay_c'] = decay_c model_options['grad_clip'] = grad_clip model_options['maxlen_w'] = maxlen_w model_options['batch_size'] = batch_size model_options['saveto'] = saveto model_options['validFreq'] = validFreq model_options['lrate'] = lrate model_options['reload_'] = reload_ model_options['concat'] = concat print model_options # reload options if reload_ and os.path.exists(saveto): print 'reloading...' + saveto with open('%s.pkl' % saveto, 'rb') as f: model_options = pkl.load(f) # Load training and development sets print 'loading dataset' train, dev = load_dataset(data)[:2] # Create and save dictionary print 'Create dictionary' worddict = build_dictionary(train[0] + dev[0])[0] n_words = len(worddict) model_options['n_words'] = n_words print 'Dictionary size: ' + str(n_words) with open('%s.dictionary.pkl' % saveto, 'wb') as f: pkl.dump(worddict, f) # Inverse dictionary word_idict = dict() for kk, vv in worddict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' model_options['worddict'] = worddict model_options['word_idict'] = word_idict # Each sentence in the minibatch have same length (for encoder) train_iter = homogeneous_data.HomogeneousData([train[0], train[1]], batch_size=batch_size, maxlen=maxlen_w) img_sen_model = ImgSenRanking(model_options) img_sen_model = img_sen_model.cuda() loss_fn = PairwiseRankingLoss(margin=margin) loss_fn = loss_fn.cuda() params = filter(lambda p: p.requires_grad, img_sen_model.parameters()) optimizer = torch.optim.Adam(params, lrate) uidx = 0 curr = 0.0 n_samples = 0 for eidx in xrange(max_epochs): print 'Epoch ', eidx for x, im in train_iter: n_samples += len(x) uidx += 1 x_id, im = homogeneous_data.prepare_data(x, im, worddict, maxlen=maxlen_w, n_words=n_words) if x_id is None: print 'Minibatch with zero sample under length ', maxlen_w uidx -= 1 continue x_id = Variable(torch.from_numpy(x_id).cuda()) im = Variable(torch.from_numpy(im).cuda()) # Update ud_start = time.time() x, im = img_sen_model(x_id, im, x) cost = loss_fn(im, x) optimizer.zero_grad() cost.backward() torch.nn.utils.clip_grad_norm(params, grad_clip) optimizer.step() ud = time.time() - ud_start if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost.data.cpu( ).numpy()[0], 'UD ', ud if numpy.mod(uidx, validFreq) == 0: print 'Computing results...' curr_model = {} curr_model['options'] = model_options curr_model['worddict'] = worddict curr_model['word_idict'] = word_idict curr_model['img_sen_model'] = img_sen_model ls, lim = encode_sentences(curr_model, dev[0]), encode_images( curr_model, dev[1]) r1, r5, r10, medr = 0.0, 0.0, 0.0, 0 r1i, r5i, r10i, medri = 0.0, 0.0, 0.0, 0 r_time = time.time() if data == 'arch' or data == 'arch_small': (r1, r5, r10, medr) = i2t_arch(lim, ls) print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr) (r1i, r5i, r10i, medri) = t2i_arch(lim, ls) print "Text to image: %.1f, %.1f, %.1f, %.1f" % ( r1i, r5i, r10i, medri) else: (r1, r5, r10, medr) = i2t(lim, ls) print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr) (r1i, r5i, r10i, medri) = t2i(lim, ls) print "Text to image: %.1f, %.1f, %.1f, %.1f" % ( r1i, r5i, r10i, medri) print "Cal Recall@K using %ss" % (time.time() - r_time) record_num = uidx / validFreq agent.append(i2t_r1_agent, record_num, r1) agent.append(i2t_r5_agent, record_num, r5) agent.append(i2t_r10_agent, record_num, r10) agent.append(t2i_r1_agent, record_num, r1i) agent.append(t2i_r5_agent, record_num, r5i) agent.append(t2i_r10_agent, record_num, r10i) agent.append(i2t_med_agent, record_num, medr) agent.append(t2i_med_agent, record_num, medri) currscore = r1 + r5 + r10 + r1i + r5i + r10i if currscore > curr: curr = currscore # Save model print 'Saving model...', pkl.dump( model_options, open('%s_params_%s.pkl' % (saveto, encoder), 'wb')) torch.save(img_sen_model.state_dict(), '%s_model_%s.pkl' % (saveto, encoder)) print 'Done' print 'Seen %d samples' % n_samples
def trainer( data='f30k-comparable-full', path_to_data='./data/', langs=['en', 'de'], margin=0.2, dim=100, dim_multimodal=100, dim_image=4096, dim_word=100, encoders={ 'en': 'gru', 'de': 'gru' }, # gru OR bow max_epochs=15, dispFreq=10, decay_c=0., grad_clip=2., maxlen_w=100, optimizer='adam', batch_size=128, saveto='./models/f30k-comparable-full.npz', validFreq=100, testFreq=100, lrate=0.0002, reload_=False, # new parameters max_words={ 'en': 0, 'de': 0 }, # integer, zero means unlimited debug=False, use_dropout=False, dropout_embedding=0.2, # dropout for input embeddings (0: no dropout) dropout_hidden=0.2, # dropout for hidden layers (0: no dropout) dropout_source=0.0, # dropout source words (0: no dropout) #dropout_prob=0.5, load_test=False, lambda_img_sent=0.5, lambda_sent_sent=0.5, bidirectional_enc=False, n_enc_hidden_layers=1, use_all_costs=False, create_dictionaries=False, attention_type='dot', # one of 'general', 'dot' decay_c_general_attention=0.0, # L2 regularisation for the attention matrices dictionaries_min_freq=0): # Model options model_options = {} model_options['data'] = data model_options['langs'] = langs for lang in langs: model_options['encoder_%s' % lang] = encoders[lang] model_options['max_words_%s' % lang] = max_words[lang] model_options['margin'] = margin model_options['dim'] = dim model_options['dim_multimodal'] = dim_multimodal model_options['dim_image'] = dim_image model_options['dim_word'] = dim_word model_options['max_epochs'] = max_epochs model_options['dispFreq'] = dispFreq model_options['decay_c'] = decay_c model_options['grad_clip'] = grad_clip model_options['maxlen_w'] = maxlen_w model_options['optimizer'] = optimizer model_options['batch_size'] = batch_size model_options['saveto'] = saveto model_options['validFreq'] = validFreq model_options['testFreq'] = testFreq model_options['lrate'] = lrate model_options['reload_'] = reload_ model_options['use_dropout'] = use_dropout model_options['dropout_embedding'] = dropout_embedding model_options['dropout_hidden'] = dropout_hidden model_options['dropout_source'] = dropout_source #model_options['dropout_prob'] = dropout_prob model_options['bidirectional_enc'] = bidirectional_enc model_options['n_enc_hidden_layers'] = n_enc_hidden_layers model_options['load_test'] = load_test model_options['lambda_img_sent'] = lambda_img_sent model_options['lambda_sent_sent'] = lambda_sent_sent model_options['use_all_costs'] = use_all_costs model_options['use_all_costs'] = use_all_costs model_options['create_dictionaries'] = create_dictionaries model_options['dictionaries_min_freq'] = dictionaries_min_freq model_options['attention_type'] = attention_type model_options['decay_c_general_attention'] = decay_c_general_attention assert (n_enc_hidden_layers >= 1) # reload options if reload_ and os.path.exists(saveto): print 'reloading...' + saveto with open('%s.pkl' % saveto, 'rb') as f: models_options = pkl.load(f) # Load training and development sets, alternatively also test set print 'Loading dataset' train, dev, test = load_multilingual_dataset(data, langs, load_test=load_test) worddicts = [] iworddicts = [] if create_dictionaries: # Create and save dictionaries print 'Creating and saving multilingual dictionaries %s' % (", ".join( model_options['langs'])) for lang_idx, lang in enumerate(langs): if load_test: worddict = build_dictionary( train[0][lang_idx] + dev[0][lang_idx] + test[0][lang_idx], dictionaries_min_freq)[0] else: worddict = build_dictionary( train[0][lang_idx] + dev[0][lang_idx], dictionaries_min_freq)[0] n_words_dict = len(worddict) print 'minimum word frequency: %i' % dictionaries_min_freq print '%s dictionary size: %s' % (lang, str(n_words_dict)) with open('%s.dictionary-%s.pkl' % (saveto, lang), 'wb') as f: pkl.dump(worddict, f) # Inverse dictionaries iworddict = dict() for kk, vv in worddict.iteritems(): iworddict[vv] = kk iworddict[0] = '<eos>' iworddict[1] = 'UNK' worddicts.append(worddict) iworddicts.append(iworddict) model_options["n_words_%s" % lang] = n_words_dict if max_words[ lang] == 0 else max_words[lang] else: # load dictionaries print 'Loading multilingual dictionaries %s' % (", ".join( model_options['langs'])) for lang_idx, lang in enumerate(langs): with open('%s.dictionary-%s.pkl' % (saveto, lang), 'wb') as f: worddict = pkl.load(f) # Inverse dictionaries iworddict = dict() for kk, vv in worddict.iteritems(): iworddict[vv] = kk iworddict[0] = '<eos>' iworddict[1] = 'UNK' worddicts.append(worddict) iworddicts.append(iworddict) model_options["n_words_%s" % lang] = n_words_dict if max_words[ lang] == 0 else max_words[lang] # assert all max_words per language are equal assert (all(x == max_words.values()[0] for x in max_words.values())) print model_options print 'Building model' params = init_params(model_options) # reload parameters if reload_ and os.path.exists(saveto): params = load_params(saveto, params) tparams = init_tparams(params) trng, inps, cost = build_model(tparams, model_options) # before any regularizer print 'Building f_log_probs...', f_log_probs = theano.function(inps, cost, profile=False) print 'Done' # weight decay, if applicable if decay_c > 0.: decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') weight_decay = 0. for kk, vv in tparams.iteritems(): # all parameters but general attention, if any if not kk.endswith('mapping'): weight_decay += (vv**2).sum() weight_decay *= decay_c cost += weight_decay # weight decay for the general attention, if applicable if decay_c_general_attention > 0. and attention_type == 'general': decay_g = theano.shared(numpy.float32(decay_c_general_attention), name='decay_c') weight_decay = 0. for kk, vv in tparams.iteritems(): if kk.endswith('mapping'): print 'Adding L2 for %s ...' % kk weight_decay += (vv**2).sum() weight_decay *= decay_g cost += weight_decay # after any regularizer print 'Building f_cost...', f_cost = theano.function(inps, cost, profile=False) print 'Done' print 'Building multilingual sentence encoders' trng, alls_se = build_sentence_encoders(tparams, model_options) f_sencs = [] for inps_se in alls_se: #print "sentence encoder input", inps_se inp_se, sentences = inps_se f_senc = theano.function(inp_se, sentences, profile=False) f_sencs.append(f_senc) print 'Building image encoder' trng, inps_ie, images = build_image_encoder(tparams, model_options) f_ienc = theano.function(inps_ie, images, profile=False) print 'Building f_grad...', sys.stdout.flush() grads = tensor.grad(cost, wrt=itemlist(tparams)) f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False) f_weight_norm = theano.function([], [(t**2).sum() for k, t in tparams.iteritems()], profile=False) if grad_clip > 0.: g2 = 0. for g in grads: g2 += (g**2).sum() new_grads = [] for g in grads: new_grads.append( tensor.switch(g2 > (grad_clip**2), g / tensor.sqrt(g2) * grad_clip, g)) grads = new_grads lr = tensor.scalar(name='lr') print 'Building optimizers...', sys.stdout.flush() # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost) print 'Optimization' # create training set iterator where # a heuristic tries to make sure sentences in minibatch have a similar size train_iter = homogeneous_data_multilingual.HomogeneousDataMultilingual( train, batch_size=batch_size, maxlen=maxlen_w) uidx = 0 curr_best_model = None best_model_changed = True curr_best_score = 0. curr_best_rank = 1e10 curr_ranks_langs = [1e10] * len(model_options['langs']) curr_scores_langs = [0.] * len(model_options['langs']) n_samples = 0 ep_start = time.time() ep_times = [ep_start] for eidx in xrange(max_epochs): print 'Epoch ', eidx for xs, im in train_iter: uidx += 1 xs, masks, im = homogeneous_data_multilingual.prepare_data(xs, im, \ worddicts, \ model_options=model_options, \ maxlen=maxlen_w) if xs[0] is None: print 'Minibatch with zero sample under length ', maxlen_w uidx -= 1 continue # do not train on certain small sentences (less than 3 words) #if not x_src.shape[0]>=minlen_w and x_tgt.shape[0]>= minlen_w: #if not all( x.shape[0]>=minlen_w for x in xs ): # print "At least one minibatch (in one of the languages in the model)", # print "has less words than %i. Skipping..."%minlen_w # skipped_samples += xs[0].shape[1] # uidx -= 1 # continue n_samples += len(xs[0]) # Update ud_start = time.time() # flatten inputs for theano function inps_ = [] inps_.extend(xs) inps_.extend(masks) inps_.append(im) #cost = f_grad_shared(xs, masks, im) cost = f_grad_shared(*inps_) f_update(lrate) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud if numpy.mod(uidx, validFreq) == 0: print 'Computing results...' curr_model = {} curr_model['options'] = model_options # store model's language dependent parameters for lang, worddict, iworddict, f_senc in zip( langs, worddicts, iworddicts, f_sencs): curr_model['worddict_%s' % lang] = worddict curr_model['wordidict_%s' % lang] = iworddict curr_model['f_senc_%s' % lang] = f_senc curr_model['f_ienc'] = f_ienc # up-to-date model parameters params_ = unzip(tparams) # encode sentences lss = [] for lang_idx, lang in enumerate(model_options['langs']): ls = encode_multilingual_sentences(curr_model, dev[0][lang_idx], lang=lang) lss.append(ls) # encode images n_sentences_per_image = int(dev[2]) if len(dev) == 3 else 1 dev_img_feats = numpy.repeat(dev[1], n_sentences_per_image, axis=0).astype('float32') lim = encode_images(curr_model, dev_img_feats) # print scores for each language pair for lang_idx1, lang1 in enumerate(model_options['langs']): for lang_idx2, lang2 in enumerate(model_options['langs']): if lang_idx1 == lang_idx2 or lang_idx2 <= lang_idx1: continue sent_sent = None # if attention type is general, pass on the mapping matrix if attention_type == 'general': sent_sent = params_[ 'sentence_%i_sentence_%i_mapping' % (lang_idx1, lang_idx2)] # text en to text de and vice-versa (r1, r5, r10, medr, medr_double) = \ t2t(lss[ lang_idx1 ], lss[ lang_idx2 ], n_sentences_per_image=n_sentences_per_image, attention_type=attention_type, sent_sent=sent_sent) print "%s text to %s text: %.1f, %.1f, %.1f, %d (%.2f)" % ( lang1, lang2, r1, r5, r10, medr, medr_double) (r1, r5, r10, medr, medr_double) = \ t2t(lss[ lang_idx2 ], lss[ lang_idx1 ], n_sentences_per_image=n_sentences_per_image, attention_type=attention_type, sent_sent=(sent_sent.T if sent_sent is not None else sent_sent)) print "%s text to %s text: %.1f, %.1f, %.1f, %d (%.2f)" % ( lang2, lang1, r1, r5, r10, medr, medr_double) # compute scores currranks = 0. # the lower the better currscore = 0. # the higher the better for lang_idx1, lang1 in enumerate(model_options['langs']): sent_img = None # if attention type is general, pass on the mapping matrix if attention_type == 'general': sent_img = params_['image_sentence_%i_mapping' % lang_idx1] (r1, r5, r10, medr, medr_double) = \ i2t(lim, lss[ lang_idx1 ], n_sentences_per_image=n_sentences_per_image, attention_type=attention_type, sent_img=sent_img) print "Image to %s text: %.1f, %.1f, %.1f, %d (%.2f)" % ( lang1, r1, r5, r10, medr, medr_double) (r1i, r5i, r10i, medri, medr_doublei) = \ t2i(lim, lss[ lang_idx1 ], n_sentences_per_image=n_sentences_per_image, attention_type=attention_type, sent_img=sent_img) print "%s text to image: %.1f, %.1f, %.1f, %d (%.2f)" % ( lang1, r1i, r5i, r10i, medri, medr_doublei) # adjust current overall score including all languages currranks += medr_double + medr_doublei currscore += r1 + r5 + r10 + r1i + r5i + r10i # best current score for individual language/image pair currranks_lang = medr_double + medr_doublei currscore_lang = r1 + r5 + r10 + r1i + r5i + r10i # first, we select the model that ranks best (median rank). # second, if there is a tie, we select the model that has best sum of scores (recall@k). if currranks_lang < curr_ranks_langs[lang_idx1]: curr_ranks_langs[lang_idx1] = currranks_lang curr_scores_langs[lang_idx1] = currscore_lang # save model print 'saving best %s...' % lang1, #params = unzip(tparams) numpy.savez('%s.best-%s' % (saveto, lang1), **params_) pkl.dump( model_options, open('%s.best-%s.pkl' % (saveto, lang1), 'wb')) print 'done' elif currranks_lang == curr_ranks_langs[lang_idx1]: print '%s ranks are equal the current best (=%i) ...' % ( lang1, currranks_lang) if currscore_lang > curr_scores_langs[lang_idx1]: curr_scores_langs[lang_idx1] = currscore_lang # save model print 'saving best %s...' % lang1, #params = unzip(tparams) numpy.savez('%s.best-%s' % (saveto, lang1), **params_) pkl.dump( model_options, open('%s.best-%s.pkl' % (saveto, lang1), 'wb')) print 'done' if currranks < curr_best_rank: curr_best_rank = currranks curr_best_score = currscore best_model_changed = True # Save model print 'Saving best overall model (%s)...' % str("-".join( model_options['langs'])), params = unzip(tparams) numpy.savez(saveto, **params) pkl.dump(model_options, open('%s.pkl' % saveto, 'wb')) print 'Done' elif currranks == curr_best_rank: print 'global ranks are equal to the current best (=%i)...' % int( currranks) if currscore > curr_best_score: # adjust current best overall score if needed curr_best_score = currscore best_model_changed = True # Save model print 'Saving best overall model (%s)...' % str( "-".join(model_options['langs'])), params = unzip(tparams) numpy.savez(saveto, **params) pkl.dump(model_options, open('%s.pkl' % saveto, 'wb')) print 'Done' if numpy.mod(uidx, testFreq) == 0: if not best_model_changed: print '.. Best model on valid set did not change from previous evaluation on test set...' print '' else: print '.. Computing results on test set...' # update current best model (on the valid set) best_model_dev = load_model(saveto, verbose=False) # encode sentences lss = [] for lang_idx, lang in enumerate(model_options['langs']): ls = encode_multilingual_sentences(best_model_dev, test[0][lang_idx], lang=lang) lss.append(ls) #ls = encode_multilingual_sentences(best_model_dev, test[0]) n_sentences_per_image = test[2] if len(test) == 3 else 1 test_img_feats = numpy.repeat(test[1], n_sentences_per_image, axis=0).astype('float32') lim = encode_images(best_model_dev, test_img_feats) for lang_idx1, lang1 in enumerate(model_options['langs']): for lang_idx2, lang2 in enumerate( model_options['langs']): if lang_idx1 == lang_idx2 or lang_idx2 <= lang_idx1: continue sent_sent = None # if attention type is general, pass on the mapping matrix if attention_type == 'general': sent_sent = best_model_dev['tparams'][ 'sentence_%i_sentence_%i_mapping' % (lang_idx1, lang_idx2)] (r1, r5, r10, medr, medr_double) = \ t2t(lss[ lang_idx1 ], lss[ lang_idx2 ], n_sentences_per_image=n_sentences_per_image, attention_type=attention_type, sent_sent=sent_sent) print ".. %s text to %s text: %.1f, %.1f, %.1f, %d (%.2f)" % ( lang1, lang2, r1, r5, r10, medr, medr_double) (r1, r5, r10, medr, medr_double) = \ t2t(lss[ lang_idx2 ], lss[ lang_idx1 ], n_sentences_per_image=n_sentences_per_image, attention_type=attention_type, sent_sent=(sent_sent.T if sent_sent is not None else sent_sent)) print ".. %s text to %s text: %.1f, %.1f, %.1f, %d (%.2f)" % ( lang2, lang1, r1, r5, r10, medr, medr_double) #for i in range(len(lss)): for lang_idx1, lang1 in enumerate(model_options['langs']): sent_img = None # if attention type is general, pass on the mapping matrix if attention_type == 'general': sent_img = best_model_dev['tparams'][ 'image_sentence_%i_mapping' % lang_idx1] (r1, r5, r10, medr, medr_double) = i2t( lim, lss[lang_idx1], n_sentences_per_image=n_sentences_per_image, attention_type=attention_type, sent_img=sent_img) print ".. Image to %s text: %.1f, %.1f, %.1f, %d (%.2f)" % ( lang1, r1, r5, r10, medr, medr_double) (r1i, r5i, r10i, medri, medr_doublei) = t2i( lim, lss[lang_idx1], n_sentences_per_image=n_sentences_per_image, attention_type=attention_type, sent_img=sent_img) print ".. %s text to image: %.1f, %.1f, %.1f, %d (%.2f)" % ( lang1, r1i, r5i, r10i, medri, medr_doublei) best_model_changed = False print '' ep_end = time.time() ep_times.append(ep_end) #print 'Seen %d samples'%n_samples seconds = ep_times[-1] - ep_times[0] m, s = divmod(seconds, 60) h, m = divmod(m, 60) print "Seen %i epoch(s) (%i samples) in %d:%02d:%02d" % ( eidx, n_samples, h, m, s) seconds = ep_times[-1] - ep_times[0] m, s = divmod(seconds, 60) h, m = divmod(m, 60) print "Finished execution in %d:%02d:%02d" % (h, m, s)
def train(margin=0.2, dim=300, dim_word=300, max_epochs=100, dispFreq=50, validFreq=200, grad_clip=2.0, maxlen_w=150, batch_size=300, early_stop=20, lrate=0.001, reload_=False, load_dict=False): # Model options model_options = {} model_options['UM_Corpus'] = data model_options['margin'] = margin model_options['dim'] = dim model_options['dim_word'] = dim_word model_options['max_epochs'] = max_epochs model_options['dispFreq'] = dispFreq model_options['grad_clip'] = grad_clip model_options['maxlen_w'] = maxlen_w model_options['batch_size'] = batch_size model_options['saveto'] = saveto model_options['validFreq'] = validFreq model_options['lrate'] = lrate model_options['reload_'] = reload_ print(model_options) # reload options if reload_ and os.path.exists(saveto): print('reloading...' + saveto) with open('%s.pkl' % saveto, 'rb') as f: model_options = pkl.load(f) # Load training and development sets print('loading dataset') train, dev = load_dataset(data) test = load_dataset(data, load_test=True) if load_dict: with open('%s.dictionary.pkl' % saveto, 'rb') as f: worddict = pkl.load(f) n_words = len(worddict) model_options['n_words'] = len(worddict) else: # Create and save dictionary print('Create dictionary') worddict = build_dictionary(train[0] + train[1] + dev[0] + dev[1]) n_words = len(worddict) model_options['n_words'] = n_words print('Dictionary size: ' + str(n_words)) with open('%s.dictionary_%s.pkl' % (saveto, run), 'wb') as f: pkl.dump(worddict, f) # # Inverse dictionary # word_idict = dict() # for kk, vv in worddict.iteritems(): # word_idict[vv] = kk # word_idict[0] = '<eos>' # word_idict[1] = 'UNK' model_options['worddict'] = worddict # model_options['word_idict'] = word_idict # # Each sentence in the minibatch have same length (for encoder) # train_iter = HomogeneousData([train[0], train[1]], batch_size=batch_size, maxlen=maxlen_w) share_model = LIUMCVC_Encoder(model_options) # gpus = [0, 1, 2, 3] # share_model = torch.nn.DataParallel(share_model, device_ids=gpus) share_model = share_model.cuda() loss_fn = PairwiseRankingLoss(margin=margin) loss_fn = loss_fn.cuda() params = filter(lambda p: p.requires_grad, share_model.parameters()) optimizer = torch.optim.Adam(params, lrate) # decrease learning rate scheduler = ReduceLROnPlateau(optimizer, factor=0.1, patience=10) uidx = 0 curr = 1e10 n_samples = 0 # For Early-stopping best_step = 0 for eidx in xrange(1, max_epochs + 1): print('Epoch ', eidx) train_data_index = prepare_data(train, worddict, n_words) for en, cn, en_lengths, cn_lengths, en_index, cn_index in data_generator( train_data_index, batch_size): uidx += 1 n_samples += len(en) en, cn = share_model(en, en_lengths, en_index, cn, cn_lengths, cn_index) loss = loss_fn(en, cn) optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm(params, grad_clip) optimizer.step() if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, '\tUpdate ', uidx, '\tCost ', loss.data.cpu( ).numpy()[0] if numpy.mod(uidx, validFreq) == 0: print 'Computing results...' curr_model = {} curr_model['options'] = model_options curr_model['worddict'] = worddict # curr_model['word_idict'] = word_idict curr_model['en_cn_model'] = share_model r_time = time.time() fen, fcn = encode_sentences(curr_model, dev) score = devloss(fen, fcn, margin=margin) fen, fcn = encode_sentences(curr_model, test, test=True) test_score = devloss(fen, fcn, margin=margin) print "Cal Recall@K using %ss" % (time.time() - r_time) curr_step = uidx / validFreq #scheduler.step(score) currscore = score print 'loss on dev', score print 'loss on test', test_score if currscore < curr: curr = currscore # best_r1, best_r5, best_r10, best_medr = r1, r5, r10, medr # best_r1i, best_r5i, best_r10i, best_medri = r1i, r5i, r10i, medri best_step = curr_step # Save model print 'Saving model...', pkl.dump(model_options, open('%s_params_%s.pkl' % (saveto, run), 'wb')) torch.save(share_model.state_dict(), '%s_model_%s.pkl' % (saveto, run)) print 'Done' if curr_step - best_step > early_stop: print 'Early stopping ...' # print "cn to en: %.1f, %.1f, %.1f, %.1f" % (best_r1, best_r5, best_r10, best_medr) # print "en to cn: %.1f, %.1f, %.1f, %.1f" % (best_r1i, best_r5i, best_r10i, best_medri) return print 'Seen %d samples' % n_samples
def trainer(data='coco', #f8k, f30k, coco margin=0.2, dim=1024, dim_image=4096, dim_word=300, encoder='gru', # gru OR bow max_epochs=15, dispFreq=10, decay_c=0., grad_clip=2., maxlen_w=100, optimizer='adam', batch_size = 128, saveto='/ais/gobi3/u/rkiros/uvsmodels/coco.npz', validFreq=100, lrate=0.0002, reload_=False): # Model options model_options = {} model_options['data'] = data model_options['margin'] = margin model_options['dim'] = dim model_options['dim_image'] = dim_image model_options['dim_word'] = dim_word model_options['encoder'] = encoder model_options['max_epochs'] = max_epochs model_options['dispFreq'] = dispFreq model_options['decay_c'] = decay_c model_options['grad_clip'] = grad_clip model_options['maxlen_w'] = maxlen_w model_options['optimizer'] = optimizer model_options['batch_size'] = batch_size model_options['saveto'] = saveto model_options['validFreq'] = validFreq model_options['lrate'] = lrate model_options['reload_'] = reload_ print model_options # reload options if reload_ and os.path.exists(saveto): print 'reloading...' + saveto with open('%s.pkl'%saveto, 'rb') as f: models_options = pkl.load(f) # Load training and development sets print 'Loading dataset' train, dev = load_dataset(data)[:2] # Create and save dictionary print 'Creating dictionary' worddict = build_dictionary(train[0]+dev[0])[0] n_words = len(worddict) model_options['n_words'] = n_words print 'Dictionary size: ' + str(n_words) with open('%s.dictionary.pkl'%saveto, 'wb') as f: pkl.dump(worddict, f) # Inverse dictionary word_idict = dict() for kk, vv in worddict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' print 'Building model' params = init_params(model_options) # reload parameters if reload_ and os.path.exists(saveto): params = load_params(saveto, params) tparams = init_tparams(params) trng, inps, cost = build_model(tparams, model_options) # before any regularizer print 'Building f_log_probs...', f_log_probs = theano.function(inps, cost, profile=False) print 'Done' # weight decay, if applicable if decay_c > 0.: decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') weight_decay = 0. for kk, vv in tparams.iteritems(): weight_decay += (vv ** 2).sum() weight_decay *= decay_c cost += weight_decay # after any regularizer print 'Building f_cost...', f_cost = theano.function(inps, cost, profile=False) print 'Done' print 'Building sentence encoder' trng, inps_se, sentences = build_sentence_encoder(tparams, model_options) f_senc = theano.function(inps_se, sentences, profile=False) print 'Building image encoder' trng, inps_ie, images = build_image_encoder(tparams, model_options) f_ienc = theano.function(inps_ie, images, profile=False) print 'Building f_grad...', grads = tensor.grad(cost, wrt=itemlist(tparams)) f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False) f_weight_norm = theano.function([], [(t**2).sum() for k,t in tparams.iteritems()], profile=False) if grad_clip > 0.: g2 = 0. for g in grads: g2 += (g**2).sum() new_grads = [] for g in grads: new_grads.append(tensor.switch(g2 > (grad_clip**2), g / tensor.sqrt(g2) * grad_clip, g)) grads = new_grads lr = tensor.scalar(name='lr') print 'Building optimizers...', # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost) print 'Optimization' # Each sentence in the minibatch have same length (for encoder) train_iter = homogeneous_data.HomogeneousData([train[0], train[1]], batch_size=batch_size, maxlen=maxlen_w) uidx = 0 curr = 0. n_samples = 0 for eidx in xrange(max_epochs): print 'Epoch ', eidx for x, im in train_iter: n_samples += len(x) uidx += 1 x, mask, im = homogeneous_data.prepare_data(x, im, worddict, maxlen=maxlen_w, n_words=n_words) if x == None: print 'Minibatch with zero sample under length ', maxlen_w uidx -= 1 continue # Update ud_start = time.time() cost = f_grad_shared(x, mask, im) f_update(lrate) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud if numpy.mod(uidx, validFreq) == 0: print 'Computing results...' curr_model = {} curr_model['options'] = model_options curr_model['worddict'] = worddict curr_model['word_idict'] = word_idict curr_model['f_senc'] = f_senc curr_model['f_ienc'] = f_ienc ls = encode_sentences(curr_model, dev[0]) lim = encode_images(curr_model, dev[1]) (r1, r5, r10, medr) = i2t(lim, ls) print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr) (r1i, r5i, r10i, medri) = t2i(lim, ls) print "Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri) currscore = r1 + r5 + r10 + r1i + r5i + r10i if currscore > curr: curr = currscore # Save model print 'Saving...', params = unzip(tparams) numpy.savez(saveto, **params) pkl.dump(model_options, open('%s.pkl'%saveto, 'wb')) print 'Done' print 'Seen %d samples'%n_samples
def trainer(load_from=None, save_dir='snapshots', name='anon', **kwargs): """ :param load_from: location to load parameters + options from :param name: name of model, used as location to save parameters + options """ curr_model = dict() # load old model, including parameters, but overwrite with new options if load_from: print 'reloading...' + load_from with open('%s.pkl' % load_from, 'rb') as f: curr_model = pkl.load(f) else: curr_model['options'] = {} for k, v in kwargs.iteritems(): curr_model['options'][k] = v model_options = curr_model['options'] # initialize logger import datetime timestampedName = datetime.datetime.now().strftime( '%Y_%m_%d_%H_%M_%S') + '_' + name from logger import Log log = Log(name=timestampedName, hyperparams=model_options, saveDir='vis/training', xLabel='Examples Seen', saveFrequency=1) print curr_model['options'] # Load training and development sets print 'Loading dataset' dataset = load_dataset(model_options['data'], cnn=model_options['cnn'], load_train=True) train = dataset['train'] dev = dataset['dev'] # Create dictionary print 'Creating dictionary' worddict = build_dictionary(train['caps'] + dev['caps']) print 'Dictionary size: ' + str(len(worddict)) curr_model['worddict'] = worddict curr_model['options']['n_words'] = len(worddict) + 2 # save model pkl.dump(curr_model, open('%s/%s.pkl' % (save_dir, name), 'wb')) print 'Loading data' train_iter = datasource.Datasource(train, batch_size=model_options['batch_size'], worddict=worddict) dev = datasource.Datasource(dev, worddict=worddict) dev_caps, dev_ims = dev.all() print 'Building model' params = init_params(model_options) # reload parameters if load_from is not None and os.path.exists(load_from): params = load_params(load_from, params) tparams = init_tparams(params) inps, cost = build_model(tparams, model_options) print 'Building sentence encoder' inps_se, sentences = build_sentence_encoder(tparams, model_options) f_senc = theano.function(inps_se, sentences, profile=False) print 'Building image encoder' inps_ie, images = build_image_encoder(tparams, model_options) f_ienc = theano.function(inps_ie, images, profile=False) print 'Building f_grad...', grads = tensor.grad(cost, wrt=itemlist(tparams)) print 'Building errors..' inps_err, errs = build_errors(model_options) f_err = theano.function(inps_err, errs, profile=False) curr_model['f_senc'] = f_senc curr_model['f_ienc'] = f_ienc curr_model['f_err'] = f_err if model_options['grad_clip'] > 0.: grads = [maxnorm(g, model_options['grad_clip']) for g in grads] lr = tensor.scalar(name='lr') print 'Building optimizers...', # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(model_options['optimizer'])(lr, tparams, grads, inps, cost) print 'Optimization' uidx = 0 curr = 0 n_samples = 0 for eidx in xrange(model_options['max_epochs']): print 'Epoch ', eidx for x, mask, im in train_iter: n_samples += x.shape[1] uidx += 1 # Update ud_start = time.time() cost = f_grad_shared(x, mask, im) f_update(model_options['lrate']) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, model_options['dispFreq']) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud log.update({'Error': float(cost)}, n_samples) if numpy.mod(uidx, model_options['validFreq']) == 0: print 'Computing results...' # encode sentences efficiently dev_s = encode_sentences( curr_model, dev_caps, batch_size=model_options['batch_size']) dev_i = encode_images(curr_model, dev_ims) # compute errors dev_errs = compute_errors(curr_model, dev_s, dev_i) # compute ranking error (r1, r5, r10, medr, meanr), vis_details = t2i(dev_errs, vis_details=True) (r1i, r5i, r10i, medri, meanri) = i2t(dev_errs) print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % ( r1, r5, r10, medr, meanr) log.update( { 'R@1': r1, 'R@5': r5, 'R@10': r10, 'median_rank': medr, 'mean_rank': meanr }, n_samples) print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % ( r1i, r5i, r10i, medri, meanri) log.update( { 'Image2Caption_R@1': r1i, 'Image2Caption_R@5': r5i, 'Image2CaptionR@10': r10i, 'Image2Caption_median_rank': medri, 'Image2Caption_mean_rank': meanri }, n_samples) tot = r1 + r5 + r10 if tot > curr: curr = tot # Save parameters print 'Saving...', numpy.savez('%s/%s' % (save_dir, name), **unzip(tparams)) print 'Done' vis_details['hyperparams'] = model_options # Save visualization details with open( 'vis/roc/%s/%s.json' % (model_options['data'], timestampedName), 'w') as f: json.dump(vis_details, f) # Add the new model to the index try: index = json.load(open('vis/roc/index.json', 'r')) except IOError: index = {model_options['data']: []} models = index[model_options['data']] if timestampedName not in models: models.append(timestampedName) with open('vis/roc/index.json', 'w') as f: json.dump(index, f) print 'Seen %d samples' % n_samples
# coding: utf-8 import vocab import train import tools import numpy as np with open("../../wikipedia_txt/result_wakati.txt") as f: fdata = [line.rstrip() for i, line in enumerate(f)] print '# lines: ', len(fdata) worddict, wordcount = vocab.build_dictionary(fdata) vocab.save_dictionary(worddict, wordcount, "word_dict") print '# vocab: ', len(worddict) train.trainer(fdata, dictionary="word_dict", saveFreq=100, saveto="model", reload_=True, n_words=40000) model = tools.load_model() vectors = tools.encode(model, fdata, use_norm=False) np.savez('vecs.npz', vectors)
def trainer(load_from=None, save_dir='snapshots', name='anon', **kwargs): """ :param load_from: location to load parameters + options from :param name: name of model, used as location to save parameters + options """ curr_model = dict() # load old model, including parameters, but overwrite with new options if load_from: print 'reloading...' + load_from with open('%s.pkl'%load_from, 'rb') as f: curr_model = pkl.load(f) else: curr_model['options'] = {} for k, v in kwargs.iteritems(): curr_model['options'][k] = v model_options = curr_model['options'] # initialize logger import datetime timestampedName = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S') + '_' + name from logger import Log log = Log(name=timestampedName, hyperparams=model_options, saveDir='vis/training', xLabel='Examples Seen', saveFrequency=1) print curr_model['options'] # Load training and development sets print 'Loading dataset' dataset = load_dataset(model_options['data'], cnn=model_options['cnn'], load_train=True) train = dataset['train'] dev = dataset['dev'] # Create dictionary print 'Creating dictionary' worddict = build_dictionary(train['caps']+dev['caps']) print 'Dictionary size: ' + str(len(worddict)) curr_model['worddict'] = worddict curr_model['options']['n_words'] = len(worddict) + 2 # save model pkl.dump(curr_model, open('%s/%s.pkl' % (save_dir, name), 'wb')) print 'Loading data' train_iter = datasource.Datasource(train, batch_size=model_options['batch_size'], worddict=worddict) dev = datasource.Datasource(dev, worddict=worddict) dev_caps, dev_ims = dev.all() print 'Building model' params = init_params(model_options) # reload parameters if load_from is not None and os.path.exists(load_from): params = load_params(load_from, params) tparams = init_tparams(params) inps, cost = build_model(tparams, model_options) print 'Building sentence encoder' inps_se, sentences = build_sentence_encoder(tparams, model_options) f_senc = theano.function(inps_se, sentences, profile=False) print 'Building image encoder' inps_ie, images = build_image_encoder(tparams, model_options) f_ienc = theano.function(inps_ie, images, profile=False) print 'Building f_grad...', grads = tensor.grad(cost, wrt=itemlist(tparams)) print 'Building errors..' inps_err, errs = build_errors(model_options) f_err = theano.function(inps_err, errs, profile=False) curr_model['f_senc'] = f_senc curr_model['f_ienc'] = f_ienc curr_model['f_err'] = f_err if model_options['grad_clip'] > 0.: grads = [maxnorm(g, model_options['grad_clip']) for g in grads] lr = tensor.scalar(name='lr') print 'Building optimizers...', # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(model_options['optimizer'])(lr, tparams, grads, inps, cost) print 'Optimization' uidx = 0 curr = 0 n_samples = 0 for eidx in xrange(model_options['max_epochs']): print 'Epoch ', eidx for x, mask, im in train_iter: n_samples += x.shape[1] uidx += 1 # Update ud_start = time.time() cost = f_grad_shared(x, mask, im) f_update(model_options['lrate']) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, model_options['dispFreq']) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud log.update({'Error': float(cost)}, n_samples) if numpy.mod(uidx, model_options['validFreq']) == 0: print 'Computing results...' # encode sentences efficiently dev_s = encode_sentences(curr_model, dev_caps, batch_size=model_options['batch_size']) dev_i = encode_images(curr_model, dev_ims) # compute errors dev_errs = compute_errors(curr_model, dev_s, dev_i) # compute ranking error (r1, r5, r10, medr, meanr), vis_details = t2i(dev_errs, vis_details=True) (r1i, r5i, r10i, medri, meanri) = i2t(dev_errs) print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr, meanr) log.update({'R@1': r1, 'R@5': r5, 'R@10': r10, 'median_rank': medr, 'mean_rank': meanr}, n_samples) print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri, meanri) log.update({'Image2Caption_R@1': r1i, 'Image2Caption_R@5': r5i, 'Image2CaptionR@10': r10i, 'Image2Caption_median_rank': medri, 'Image2Caption_mean_rank': meanri}, n_samples) tot = r1 + r5 + r10 if tot > curr: curr = tot # Save parameters print 'Saving...', numpy.savez('%s/%s'%(save_dir, name), **unzip(tparams)) print 'Done' vis_details['hyperparams'] = model_options # Save visualization details with open('vis/roc/%s/%s.json' % (model_options['data'], timestampedName), 'w') as f: json.dump(vis_details, f) # Add the new model to the index index = json.load(open('vis/roc/index.json', 'r')) models = index[model_options['data']] if timestampedName not in models: models.append(timestampedName) with open('vis/roc/index.json', 'w') as f: json.dump(index, f) print 'Seen %d samples'%n_samples
def trainer(data='f30k', margin=0.2, dim=1024, dim_image=4096, dim_word=300, max_epochs=15, encoder='lstm', dispFreq=10, grad_clip=2.0, maxlen_w=150, batch_size=128, saveto='vse/f30K', validFreq=100, early_stop=20, lrate=1e-3, reload_=False): # Model options model_options = {} model_options['data'] = data model_options['margin'] = margin model_options['dim'] = dim model_options['dim_image'] = dim_image model_options['dim_word'] = dim_word model_options['max_epochs'] = max_epochs model_options['dispFreq'] = dispFreq model_options['grad_clip'] = grad_clip model_options['maxlen_w'] = maxlen_w model_options['batch_size'] = batch_size model_options['saveto'] = saveto model_options['validFreq'] = validFreq model_options['lrate'] = lrate model_options['reload_'] = reload_ logging.info(model_options) # reload options if reload_ and os.path.exists(saveto): logging.info('reloading...' + saveto) with open('%s.pkl' % saveto, 'rb') as f: model_options = pkl.load(f) # Load training and development sets logging.info('loading dataset') titles, album_ims, artist, genre = load_dataset(data) artist_string = artist genre_string = genre # Create and save dictionary if os.path.exists('%s.dictionary.pkl' % saveto): logging.info('loading dict from...' + saveto) with open('%s.dictionary.pkl' % saveto, 'rb') as wdict: worddict = pkl.load(wdict) n_words = len(worddict) model_options['n_words'] = n_words logging.info('Dictionary size: ' + str(n_words)) else: logging.info('Create dictionary') worddict = build_dictionary(titles + artist + genre)[0] n_words = len(worddict) model_options['n_words'] = n_words logging.info('Dictionary words: ' + str(n_words)) with open('%s.dictionary.pkl' % saveto, 'wb') as f: pkl.dump(worddict, f) # Inverse dictionary word_idict = dict() for kk, vv in worddict.items(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' model_options['worddict'] = worddict model_options['word_idict'] = word_idict # Each sentence in the minibatch have same length (for encoder) train_iter = homogeneous_data.HomogeneousData( [titles, album_ims, artist, genre], batch_size=batch_size, maxlen=maxlen_w) img_sen_model = Img_Sen_Artist_Ranking(model_options) # todo code to load saved model dict if os.path.exists('%s_model_%s.pkl' % (saveto, encoder)): logging.info('Loading model...') # pkl.dump(model_options, open('%s_params_%s.pkl' % (saveto, encoder), 'wb')) img_sen_model.load_state_dict( torch.load('%s_model_%s.pkl' % (saveto, encoder))) logging.info('Done') img_sen_model = img_sen_model.cuda() loss_fn = PairwiseRankingLoss(margin=margin).cuda() params = filter(lambda p: p.requires_grad, img_sen_model.parameters()) optimizer = torch.optim.Adam(params, lr=lrate) scheduler = ReduceLROnPlateau(optimizer, factor=0.1, patience=40, mode='min', verbose=True, threshold=1e-8) uidx = 0 curr = 0.0 n_samples = 0 # For Early-stopping best_r1, best_r5, best_r10, best_medr = 0.0, 0.0, 0.0, 0 best_step = 0 writer = SummaryWriter() for eidx in range(max_epochs): for x, im, artist, genre in train_iter: n_samples += len(x) uidx += 1 x, im, artist, genre = homogeneous_data.prepare_data( x, im, artist, genre, worddict, maxlen=maxlen_w, n_words=n_words) if x is None: logging.info('Minibatch with zero sample under length ', maxlen_w) uidx -= 1 continue x = Variable(torch.from_numpy(x).cuda()) im = Variable(torch.from_numpy(im).cuda()) artist = Variable(torch.from_numpy(artist).cuda()) genre = Variable(torch.from_numpy(genre).cuda()) # Update x1, im1, artist, genre = img_sen_model(x, im, artist, genre) #make validation on inout before trainer see it if numpy.mod(uidx, validFreq) == 0: img_sen_model.eval() with torch.no_grad(): print('Epoch ', eidx, '\tUpdate@ ', uidx, '\tCost ', cost.data.item()) writer.add_scalar('Evaluation/Validation_Loss', cost.data.item(), uidx) (r1, r5, r10, medr) = i2t(im1, x) #distances with l2norm logging.info("Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr)) (r1g, r5g, r10g, medrg) = i2t(im1, genre) logging.info("Image to genre: %.1f, %.1f, %.1f, %.1f" % (r1g, r5g, r10g, medrg)) (r1a, r5a, r10a, medra) = i2t(im1, artist) logging.info("Image to Artist: %.1f, %.1f, %.1f, %.1f" % (r1a, r5a, r10a, medra)) logging.info("Cal Recall@K ") writer.add_scalars('Validation Recal/Image2Album', { 'r@1': r1, 'r@5': r5, 'r@10': r10 }, uidx) writer.add_scalars('Validation Recal/Image2Genres', { 'r@1': r1g, 'r@5': r5g, 'r@10': r10g }, uidx) writer.add_scalars('Validation Recal/Image2Artist', { 'r@1': r1a, 'r@5': r5a, 'r@10': r5a }, uidx) curr_step = uidx / validFreq currscore = r1 + r5 + r10 + r1a + r5a + r10a + r1g + r5g + r10g - medr - medrg - medra if currscore > curr: curr = currscore best_r1, best_r5, best_r10, best_medr = r1, r5, r10, medr best_r1g, best_r5g, best_r10g, best_medrg = r1, r5, r10, medrg best_step = curr_step # Save model logging.info('Saving model...') pkl.dump( model_options, open('%s_params_%s.pkl' % (saveto, encoder), 'wb')) torch.save(img_sen_model.state_dict(), '%s_model_%s.pkl' % (saveto, encoder)) logging.info('Done') if curr_step - best_step > early_stop: logging.info('early stopping, jumping now...') logging.info("Image to text: %.1f, %.1f, %.1f, %.1f" % (best_r1, best_r5, best_r10, best_medr)) logging.info( "Image to genre: %.1f, %.1f, %.1f, %.1f" % (best_r1g, best_r5g, best_r10g, best_medrg)) #return 0 lrate = 1e-4 for param_group in optimizer.param_groups: param_group['lr'] = lrate img_sen_model.train() cost = loss_fn(im1, x1, artist, genre) writer.add_scalar('Evaluation/training_Loss', cost, uidx) optimizer.zero_grad() cost.backward() torch.nn.utils.clip_grad_norm_(params, grad_clip) scheduler.step(cost.data.item()) optimizer.step() #scheduler.step(cost.data.item()) logging.info('Seen %d samples' % n_samples)
def trainer( data='coco', #f8k, f30k, coco margin=0.2, dim=1024, dim_image=4096, dim_word=300, encoder='gru', # gru OR bow max_epochs=15, dispFreq=10, decay_c=0., grad_clip=2., maxlen_w=100, optimizer='adam', batch_size=128, saveto='/ais/gobi3/u/rkiros/uvsmodels/coco.npz', validFreq=100, lrate=0.0002, reload_=False): # Model options model_options = {} model_options['data'] = data model_options['margin'] = margin model_options['dim'] = dim model_options['dim_image'] = dim_image model_options['dim_word'] = dim_word model_options['encoder'] = encoder model_options['max_epochs'] = max_epochs model_options['dispFreq'] = dispFreq model_options['decay_c'] = decay_c model_options['grad_clip'] = grad_clip model_options['maxlen_w'] = maxlen_w model_options['optimizer'] = optimizer model_options['batch_size'] = batch_size model_options['saveto'] = saveto model_options['validFreq'] = validFreq model_options['lrate'] = lrate model_options['reload_'] = reload_ print(model_options) # reload options if reload_ and os.path.exists(saveto): print('reloading...' + saveto) with open('%s.pkl' % saveto, 'rb') as f: models_options = pkl.load(f) # Load training and development sets print('Loading dataset') train, dev = load_dataset(data)[:2] # Create and save dictionary print('Creating dictionary') worddict = build_dictionary(train[0] + dev[0])[0] n_words = len(worddict) model_options['n_words'] = n_words print('Dictionary size: ' + str(n_words)) with open('%s.dictionary.pkl' % saveto, 'wb') as f: pkl.dump(worddict, f) # Inverse dictionary word_idict = dict() for kk, vv in worddict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' print('Building model') params = init_params(model_options) # reload parameters if reload_ and os.path.exists(saveto): params = load_params(saveto, params) tparams = init_tparams(params) trng, inps, cost = build_model(tparams, model_options) # before any regularizer print('Building f_log_probs...', ) f_log_probs = theano.function(inps, cost, profile=False) print('Done') # weight decay, if applicable if decay_c > 0.: decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') weight_decay = 0. for kk, vv in tparams.iteritems(): weight_decay += (vv**2).sum() weight_decay *= decay_c cost += weight_decay # after any regularizer print('Building f_cost...', ) f_cost = theano.function(inps, cost, profile=False) print('Done') print('Building sentence encoder') trng, inps_se, sentences = build_sentence_encoder(tparams, model_options) f_senc = theano.function(inps_se, sentences, profile=False) print('Building image encoder') trng, inps_ie, images = build_image_encoder(tparams, model_options) f_ienc = theano.function(inps_ie, images, profile=False) print('Building f_grad...', ) grads = tensor.grad(cost, wrt=itemlist(tparams)) f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False) f_weight_norm = theano.function([], [(t**2).sum() for k, t in tparams.iteritems()], profile=False) if grad_clip > 0.: g2 = 0. for g in grads: g2 += (g**2).sum() new_grads = [] for g in grads: new_grads.append( tensor.switch(g2 > (grad_clip**2), g / tensor.sqrt(g2) * grad_clip, g)) grads = new_grads lr = tensor.scalar(name='lr') print('Building optimizers...', ) # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost) print('Optimization') # Each sentence in the minibatch have same length (for encoder) train_iter = homogeneous_data.HomogeneousData([train[0], train[1]], batch_size=batch_size, maxlen=maxlen_w) uidx = 0 curr = 0. n_samples = 0 for eidx in xrange(max_epochs): print('Epoch ', eidx) for x, im in train_iter: n_samples += len(x) uidx += 1 x, mask, im = homogeneous_data.prepare_data(x, im, worddict, maxlen=maxlen_w, n_words=n_words) if x == None: print('Minibatch with zero sample under length ', maxlen_w) uidx -= 1 continue # Update ud_start = time.time() cost = f_grad_shared(x, mask, im) f_update(lrate) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print('NaN detected') return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print('Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud) if numpy.mod(uidx, validFreq) == 0: print('Computing results...') curr_model = {} curr_model['options'] = model_options curr_model['worddict'] = worddict curr_model['word_idict'] = word_idict curr_model['f_senc'] = f_senc curr_model['f_ienc'] = f_ienc ls = encode_sentences(curr_model, dev[0]) lim = encode_images(curr_model, dev[1]) (r1, r5, r10, medr) = i2t(lim, ls) print("Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr)) (r1i, r5i, r10i, medri) = t2i(lim, ls) print("Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri)) currscore = r1 + r5 + r10 + r1i + r5i + r10i if currscore > curr: curr = currscore # Save model print('Saving...', ) params = unzip(tparams) numpy.savez(saveto, **params) pkl.dump(model_options, open('%s.pkl' % saveto, 'wb')) print('Done') print('Seen %d samples' % n_samples)
def trainer(**kwargs): """ Train the model according to input params Info about input params is available in parameters.py """ # Timing print('Starting time:', datetime.now()) sys.stdout.flush() t_start_train = time.time() # Model options # load old model, including parameters, but overwrite with new options # Extract model options from arguments model_options = {} for k, v in kwargs.iteritems(): model_options[k] = v # Print input options print('PARAMETERS BEFORE LOADING:') for k, v in model_options.items(): print('{:>26}: {}'.format(k, v)) sys.stdout.flush() # Reload options if required curr_model = dict() if model_options['reload_']: # Reload model parameters opt_filename_reload = get_opt_filename(model_options, previous=True) print('reloading...', opt_filename_reload) sys.stdout.flush() try: with open(opt_filename_reload, 'rb') as f: curr_model = pkl.load(f) except: print( 'Failed to reload parameters, try to use only feeded parameters' ) curr_model['options'] = {} # Check if we reload from best model or last model if model_options['load_from'] in ['Best', 'best', 'B', 'b']: load_from_best = True print('Loading from Best saved model in validation results') elif model_options['load_from'] in ['Last', 'last', 'L', 'l']: load_from_best = False print('Loading from Last saved model') else: print('Unkown choice for "load_from" parameter', model_options['load_from']) print('Please choose one of:', ['Best', 'best', 'B', 'b'], ['Last', 'last', 'L', 'l']) print('Using Last as default') load_from_best = False # Reload end-point parameters state_filename = get_sol_filename(model_options, best=load_from_best, previous=True) print('reloading...', state_filename) sys.stdout.flush() try: with open(state_filename, 'rb') as f: state_params = pkl.load(f) if load_from_best: init_epoch = state_params['epoch'] solution = state_params else: init_epoch = state_params['epoch_done'] + 1 solution = state_params['solution'] best_val_score = solution['best_val_score'] n_samples = solution['samples_seen'] except: print('Failed to reload state parameters, starting from 0') init_epoch = 0 best_val_score = 0 n_samples = 0 else: curr_model['options'] = {} init_epoch = 0 best_val_score = 0 n_samples = 0 # Overwrite loaded options with input options for k, v in kwargs.iteritems(): curr_model['options'][k] = v model_options = curr_model['options'] # Print final options loaded if model_options['reload_']: print('PARAMETERS AFTER LOADING:') for k, v in model_options.items(): print('{:>26}: {}'.format(k, v)) sys.stdout.flush() # Load training and development sets print('Loading dataset') sys.stdout.flush() dataset = load_dataset(dataset_name=model_options['data'], embedding=model_options['embedding'], path_to_data=model_options['data_path'], test_subset=model_options['test_subset'], load_train=True, fold=0) train = dataset['train'] dev = dataset['val'] # Create word dictionary print('Creating dictionary') sys.stdout.flush() worddict = build_dictionary(train['caps'] + dev['caps']) print('Dictionary size: ' + str(len(worddict))) sys.stdout.flush() curr_model['worddict'] = worddict curr_model['options']['n_words'] = len(worddict) + 2 # save model opt_filename_save = get_opt_filename(model_options, previous=False) print('Saving model parameters in', opt_filename_save) sys.stdout.flush() try: os.makedirs(os.path.dirname(opt_filename_save)) except: pass pkl.dump(curr_model, open(opt_filename_save, 'wb')) # Load data from dataset print('Loading data') sys.stdout.flush() train_iter = datasource.Datasource(train, batch_size=model_options['batch_size'], worddict=worddict) dev = datasource.Datasource(dev, worddict=worddict) dev_caps, dev_ims = dev.all() print('Building model') sys.stdout.flush() params = init_params(model_options) # reload network parameters, ie. weights if model_options['reload_']: params_filename = get_npz_filename(model_options, best=load_from_best, previous=True) params = load_params(params_filename, params) tparams = init_tparams(params) inps, cost = build_model(tparams, model_options) print('Building sentence encoder') sys.stdout.flush() inps_se, sentences = build_sentence_encoder(tparams, model_options) f_senc = theano.function(inps_se, sentences, profile=False) print('Building image encoder') sys.stdout.flush() inps_ie, images = build_image_encoder(tparams, model_options) f_ienc = theano.function(inps_ie, images, profile=False) print('Building f_grad...') sys.stdout.flush() grads = tensor.grad(cost, wrt=itemlist(tparams)) print('Building errors...') sys.stdout.flush() inps_err, errs = build_errors(model_options) f_err = theano.function(inps_err, errs, profile=False) curr_model['f_senc'] = f_senc curr_model['f_ienc'] = f_ienc curr_model['f_err'] = f_err if model_options['grad_clip'] > 0.: grads = [maxnorm(g, model_options['grad_clip']) for g in grads] lr = tensor.scalar(name='lr') print('Building optimizers...') sys.stdout.flush() # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(model_options['optimizer'])(lr, tparams, grads, inps, cost) # Get names for the files to save model and solution sol_filename_best = get_sol_filename(model_options, best=True, previous=False) sol_filename_last = get_sol_filename(model_options, best=False, previous=False) params_filename_best = get_npz_filename(model_options, best=True, previous=False) params_filename_last = get_npz_filename(model_options, best=False, previous=False) print('PATHS TO MODELS:') for filename in [ sol_filename_best, sol_filename_last, params_filename_best, params_filename_last ]: print(filename) sys.stdout.flush() try: os.makedirs(os.path.dirname(filename)) except: pass # Start optimization print('Optimization') sys.stdout.flush() uidx = 0 # Timing t_start = time.time() print('Starting time:', datetime.now()) for eidx in range(init_epoch, model_options['max_epochs']): t_start_epoch = time.time() print('Epoch ', eidx) sys.stdout.flush() for x, mask, im in train_iter: n_samples += x.shape[1] uidx += 1 # Update ud_start = time.time() cost = f_grad_shared(x, mask, im) f_update(model_options['lrate']) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print('NaN detected') sys.stdout.flush() return 1., 1., 1. if numpy.mod(uidx, model_options['dispFreq']) == 0: print('Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud) sys.stdout.flush() if numpy.mod(uidx, model_options['validFreq']) == 0: print('Computing results...') sys.stdout.flush() # encode sentences efficiently dev_s = encode_sentences( curr_model, dev_caps, batch_size=model_options['batch_size']) dev_i = encode_images(curr_model, dev_ims) # compute errors dev_errs = compute_errors(curr_model, dev_s, dev_i) # compute ranking error (r1, r5, r10, medr, meanr) = i2t(dev_errs) (r1i, r5i, r10i, medri, meanri) = t2i(dev_errs) print("Text to image (dev set): %.1f, %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri, meanri)) sys.stdout.flush() print("Image to text (dev set): %.1f, %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr, meanr)) sys.stdout.flush() # Score val_score = r1 + r5 + r10 + r1i + r5i + r10i if val_score > best_val_score: print('BEST MODEL FOUND') print('Score:', val_score) print('Previous best score:', best_val_score) best_val_score = val_score # Join in a results dict results_dict = build_results_dict(r1, r5, r10, medr, r1i, r5i, r10i, medri) # Save parameters print('Saving...', end=' ') sys.stdout.flush() numpy.savez(params_filename_best, **unzip(tparams)) print('Done') sys.stdout.flush() # Update solution solution = OrderedDict([ ('epoch', eidx), ('update', uidx), ('samples_seen', n_samples), ('best_val_score', best_val_score), ('best_val_res', results_dict), ('time_until_results', str(timedelta(seconds=(time.time() - t_start_train)))) ]) pkl.dump(solution, open(sol_filename_best, 'wb')) print('Seen %d samples' % n_samples) sys.stdout.flush() # Timing t_epoch = time.time() - t_start_epoch t_epoch_avg = (time.time() - t_start) / (eidx + 1 - (init_epoch)) print('Time for this epoch:', str(timedelta(seconds=t_epoch)), 'Average:', str(timedelta(seconds=t_epoch_avg))) t_2_complete = t_epoch_avg * (model_options['max_epochs'] - (eidx + 1)) print('Time since start session:', str(timedelta(seconds=time.time() - t_start)), 'Estimated time to complete training:', str(timedelta(seconds=t_2_complete))) print('Current time:', datetime.now()) sys.stdout.flush() # Save current model try: state_params = OrderedDict([('epoch_done', eidx), ('solution', solution)]) except: solution = OrderedDict([ ('epoch', eidx), ('update', uidx), ('samples_seen', n_samples), ('best_val_score', best_val_score), ('time_until_results', str(timedelta(seconds=(time.time() - t_start_train)))) ]) state_params = OrderedDict([('epoch_done', eidx), ('solution', solution)]) pkl.dump(state_params, open(sol_filename_last, 'wb')) # Save parameters print('Saving LAST npz...', end=' ') sys.stdout.flush() numpy.savez(params_filename_last, **unzip(tparams)) print('Done') sys.stdout.flush() return solution
def trainer(data='coco', margin=0.2, dim=1024, dim_image=4096, dim_word=300, max_epochs=15, encoder='lstm', dispFreq=10, grad_clip=2.0, maxlen_w=150, batch_size=128, saveto='vse/coco', validFreq=100, early_stop=20, lrate=0.0002, reload_=False): # Model options model_options = {} model_options['data'] = data model_options['margin'] = margin model_options['dim'] = dim model_options['dim_image'] = dim_image model_options['dim_word'] = dim_word model_options['max_epochs'] = max_epochs model_options['dispFreq'] = dispFreq model_options['grad_clip'] = grad_clip model_options['maxlen_w'] = maxlen_w model_options['batch_size'] = batch_size model_options['saveto'] = saveto model_options['validFreq'] = validFreq model_options['lrate'] = lrate model_options['reload_'] = reload_ print model_options # reload options if reload_ and os.path.exists(saveto): print 'reloading...' + saveto with open('%s.pkl' % saveto, 'rb') as f: model_options = pkl.load(f) # Load training and development sets print 'loading dataset' train, dev = load_dataset(data) # Create and save dictionary print 'Create dictionary' worddict = build_dictionary(train[0] + dev[0])[0] n_words = len(worddict) model_options['n_words'] = n_words print 'Dictionary size: ' + str(n_words) with open('%s.dictionary.pkl' % saveto, 'wb') as f: pkl.dump(worddict, f) # Inverse dictionary word_idict = dict() for kk, vv in worddict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' model_options['worddict'] = worddict model_options['word_idict'] = word_idict # Each sentence in the minibatch have same length (for encoder) train_iter = homogeneous_data.HomogeneousData([train[0], train[1]], batch_size=batch_size, maxlen=maxlen_w) img_sen_model = ImgSenRanking(model_options) img_sen_model = img_sen_model.cuda() loss_fn = PairwiseRankingLoss(margin=margin) loss_fn = loss_fn.cuda() params = filter(lambda p: p.requires_grad, img_sen_model.parameters()) optimizer = torch.optim.Adam(params, lrate) uidx = 0 curr = 0.0 n_samples = 0 # For Early-stopping best_r1, best_r5, best_r10, best_medr = 0.0, 0.0, 0.0, 0 best_r1i, best_r5i, best_r10i, best_medri = 0.0, 0.0, 0.0, 0 best_step = 0 for eidx in xrange(max_epochs): print 'Epoch ', eidx for x, im in train_iter: n_samples += len(x) uidx += 1 x, im = homogeneous_data.prepare_data(x, im, worddict, maxlen=maxlen_w, n_words=n_words) if x is None: print 'Minibatch with zero sample under length ', maxlen_w uidx -= 1 continue x = Variable(torch.from_numpy(x).cuda()) im = Variable(torch.from_numpy(im).cuda()) # Update x, im = img_sen_model(x, im) cost = loss_fn(im, x) optimizer.zero_grad() cost.backward() torch.nn.utils.clip_grad_norm(params, grad_clip) optimizer.step() if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, '\tUpdate ', uidx, '\tCost ', cost.data.cpu( ).numpy()[0] if numpy.mod(uidx, validFreq) == 0: print 'Computing results...' curr_model = {} curr_model['options'] = model_options curr_model['worddict'] = worddict curr_model['word_idict'] = word_idict curr_model['img_sen_model'] = img_sen_model ls, lim = encode_sentences(curr_model, dev[0]), encode_images( curr_model, dev[1]) r_time = time.time() (r1, r5, r10, medr) = i2t(lim, ls) print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr) (r1i, r5i, r10i, medri) = t2i(lim, ls) print "Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri) print "Cal Recall@K using %ss" % (time.time() - r_time) curr_step = uidx / validFreq currscore = r1 + r5 + r10 + r1i + r5i + r10i if currscore > curr: curr = currscore best_r1, best_r5, best_r10, best_medr = r1, r5, r10, medr best_r1i, best_r5i, best_r10i, best_medri = r1i, r5i, r10i, medri best_step = curr_step # Save model print 'Saving model...', pkl.dump( model_options, open('%s_params_%s.pkl' % (saveto, encoder), 'wb')) torch.save(img_sen_model.state_dict(), '%s_model_%s.pkl' % (saveto, encoder)) print 'Done' if curr_step - best_step > early_stop: print 'Early stopping ...' print "Image to text: %.1f, %.1f, %.1f, %.1f" % ( best_r1, best_r5, best_r10, best_medr) print "Text to image: %.1f, %.1f, %.1f, %.1f" % ( best_r1i, best_r5i, best_r10i, best_medri) return 0 print 'Seen %d samples' % n_samples
def trainer( data=['f30k-comparable', 'f30k-translational'], langs=['en', 'de'], margin=1, dim=1600, # 800 forward, 800 backward dim_image=4096, dim_word=300, encoders={ 'en': 'gru', 'de': 'gru' }, # gru OR bow max_epochs=80, dispFreq=50, decay_c=0, grad_clip=2., maxlen_w=100, optimizer='adam', batch_size=128, saveto='./f30k-half-comparable-and-translational.npz', validFreq=100, lrate=0.0002, reload_=False, # new parameters minlen_w=10, max_words={ 'en': 0, 'de': 0 }, # integer, zero means unlimited debug=False, use_dropout=True, dropout_prob=0.3, load_test=False, lambda_img_sent=0.75, lambda_sent_sent=0.25, bidirectional_enc=True, n_enc_hidden_layers=1): #use_all_costs=True): # Model options model_options = {} model_options['data'] = data model_options['langs'] = langs for lang in langs: model_options['encoder_%s' % lang] = encoders[lang] model_options['max_words_%s' % lang] = max_words[lang] model_options['margin'] = margin model_options['dim'] = dim model_options['dim_image'] = dim_image model_options['dim_word'] = dim_word model_options['max_epochs'] = max_epochs model_options['dispFreq'] = dispFreq model_options['decay_c'] = decay_c model_options['grad_clip'] = grad_clip model_options['maxlen_w'] = maxlen_w model_options['optimizer'] = optimizer model_options['batch_size'] = batch_size model_options['saveto'] = saveto model_options['validFreq'] = validFreq model_options['lrate'] = lrate model_options['reload_'] = reload_ model_options['minlen_w'] = minlen_w model_options['use_dropout'] = use_dropout model_options['dropout_prob'] = dropout_prob model_options['bidirectional_enc'] = bidirectional_enc model_options['n_enc_hidden_layers'] = n_enc_hidden_layers model_options['load_test'] = load_test model_options['lambda_img_sent'] = lambda_img_sent model_options['lambda_sent_sent'] = lambda_sent_sent #model_options['use_all_costs'] = use_all_costs assert (n_enc_hidden_layers >= 1) # reload options if reload_ and os.path.exists(saveto): print 'reloading...' + saveto with open('%s.pkl' % saveto, 'rb') as f: models_options = pkl.load(f) # Load training and development sets print 'Loading dataset' train, dev = load_multilingual_dataset(data, langs, load_test=load_test)[:2] # Create and save dictionaries print 'Creating and saving multilingual dictionaries %s' % (", ".join( model_options['langs'])) worddicts = [] iworddicts = [] for lang_idx, lang in enumerate(langs): # built dictionaries including all comparable and translational vocab worddict = build_dictionary(train[0][0][lang_idx] + train[1][0][lang_idx] + dev[0][0][lang_idx] + dev[1][0][lang_idx])[0] n_words_dict = len(worddict) #print '%s dictionary size: %s'%(lang,str(n_words_dict)) with open('%s.dictionary-%s.pkl' % (saveto, lang), 'wb') as f: pkl.dump(worddict, f) # Inverse dictionaries iworddict = dict() for kk, vv in worddict.iteritems(): iworddict[vv] = kk iworddict[0] = '<eos>' iworddict[1] = 'UNK' worddicts.append(worddict) iworddicts.append(iworddict) model_options[ "n_words_%s" % lang] = n_words_dict if max_words[lang] == 0 else max_words[lang] # assert all max_words per language are equal assert (all(x == max_words.values()[0] for x in max_words.values())) print model_options print 'Building model' params = init_params(model_options) # reload parameters if reload_ and os.path.exists(saveto): params = load_params(saveto, params) tparams = init_tparams(params) trng, inps, is_translational, cost = build_model(tparams, model_options) # before any regularizer print 'Building f_log_probs...', f_log_probs = theano.function(inps, cost, profile=False) print 'Done' # weight decay, if applicable if decay_c > 0.: decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') weight_decay = 0. for kk, vv in tparams.iteritems(): weight_decay += (vv**2).sum() weight_decay *= decay_c cost += weight_decay # after any regularizer print 'Building f_cost...', f_cost = theano.function(inps, cost, profile=False) print 'Done' print 'Building multilingual sentence encoders' trng, alls_se = build_sentence_encoders(tparams, model_options) f_sencs = [] for inps_se in alls_se: #print "sentence encoder input", inps_se inp_se, sentences = inps_se f_senc = theano.function(inp_se, sentences, profile=False) f_sencs.append(f_senc) print 'Building image encoder' trng, inps_ie, images = build_image_encoder(tparams, model_options) f_ienc = theano.function(inps_ie, images, profile=False) print 'Building f_grad...', sys.stdout.flush() grads = tensor.grad(cost, wrt=itemlist(tparams)) f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False) f_weight_norm = theano.function([], [(t**2).sum() for k, t in tparams.iteritems()], profile=False) if grad_clip > 0.: g2 = 0. for g in grads: g2 += (g**2).sum() new_grads = [] for g in grads: new_grads.append( tensor.switch(g2 > (grad_clip**2), g / tensor.sqrt(g2) * grad_clip, g)) grads = new_grads lr = tensor.scalar(name='lr') print 'Building optimizers...', sys.stdout.flush() # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost) print 'Optimization' # train: # [(c_train_caps_list, c_train_ims, 0), (t_train_caps_list, t_train_ims, 1)] # create training set iterator where # a heuristic tries to make sure sentences in minibatch have a similar size train_comparable_iter = HomogeneousDataMultilingualWithTranslationalEvidence( train[0], batch_size=batch_size, maxlen=maxlen_w, minlen=minlen_w) train_translational_iter = HomogeneousDataMultilingualWithTranslationalEvidence( train[1], batch_size=batch_size, maxlen=maxlen_w, minlen=minlen_w) uidx = 0 curr = 0. curr_langs = [0.] * len(model_options['langs']) n_samples = 0 ep_start = time.time() ep_times = [ep_start] for eidx in xrange(max_epochs): print 'Epoch ', eidx for xs, im, is_translational_ in itertools.chain( train_comparable_iter, train_translational_iter): uidx += 1 xs, masks, im = prepare_data(xs, im, is_translational_, \ worddicts, \ model_options=model_options, \ maxlen=maxlen_w) is_translational.set_value(is_translational_) if xs[0] is None: print 'Minibatch with zero sample under length ', maxlen_w uidx -= 1 continue # do not train on certain small sentences (less than 3 words) #if not x_src.shape[0]>=minlen_w and x_tgt.shape[0]>= minlen_w: if not all(x.shape[0] >= minlen_w for x in xs): print "At least one minibatch (in one of the languages in the model)", print "has less words than %i. Skipping..." % minlen_w skipped_samples += xs[0].shape[1] uidx -= 1 continue n_samples += len(xs[0]) # Update ud_start = time.time() # flatten inputs for theano function inps_ = [] inps_.extend(xs) inps_.extend(masks) inps_.append(im) #inps_.append(is_translational_) #cost = f_grad_shared(xs, masks, im) cost = f_grad_shared(*inps_) f_update(lrate) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'Translational ', is_translational_, 'UD ', ud if numpy.mod(uidx, validFreq) == 0: print 'Computing results...' curr_model = {} curr_model['options'] = model_options # store model's language dependent parameters for lang, worddict, iworddict, f_senc in zip( langs, worddicts, iworddicts, f_sencs): curr_model['worddict_%s' % lang] = worddict curr_model['wordidict_%s' % lang] = iworddict curr_model['f_senc_%s' % lang] = f_senc #curr_model['worddicts'] = worddicts #curr_model['wordidicts'] = iworddicts #curr_model['f_senc'] = f_senc curr_model['f_ienc'] = f_ienc # encode sentences lss = [] for lang_idx, lang in enumerate(model_options['langs']): # dev: # ((c_dev_caps_list, c_dev_ims, 0), (t_dev_caps_list, t_dev_ims, 1)) dev_set = dev[is_translational_] #ls = encode_multilingual_sentences(curr_model, dev[0][lang_idx], lang=lang) ls = encode_multilingual_sentences(curr_model, dev_set[0][lang_idx], lang=lang) lss.append(ls) lim = encode_images(curr_model, dev_set[1].astype('float32')) #lim = encode_images(curr_model, dev[1].astype('float32')) # compute scores currscore = 0 for i in range(len(lss)): (r1, r5, r10, medr) = i2t(lim, lss[i]) print "Image to %s text: %.1f, %.1f, %.1f, %.1f" % ( model_options['langs'][i], r1, r5, r10, medr) (r1i, r5i, r10i, medri) = t2i(lim, lss[i]) print "%s text to image: %.1f, %.1f, %.1f, %.1f" % ( model_options['langs'][i], r1i, r5i, r10i, medri) # adjust current overall score #currscore += r1 + r5 + r10 + r1i + r5i + r10i currscore += r1 + (r5 / 1.5) + (r10 / 2) + r1i + ( r5i / 1.5) + (r10i / 2) # best current score for individual language/image pair #currscore_lang = r1 + r5 + r10 + r1i + r5i + r10i currscore_lang = r1 + (r5 / 1.5) + (r10 / 2) + r1i + ( r5i / 1.5) + (r10i / 2) if currscore_lang > curr_langs[i]: curr_langs[i] = currscore_lang # save model print 'saving best %s...' % model_options['langs'][i], params = unzip(tparams) numpy.savez( '%s.best-%s' % (saveto, model_options['langs'][i]), **params) pkl.dump( model_options, open( '%s.best-%s.pkl' % (saveto, model_options['langs'][i]), 'wb')) print 'done' # adjust current best overall score if needed if currscore > curr: curr = currscore # Save model print 'Saving best overall model (%s)...' % str("-".join( model_options['langs'])), params = unzip(tparams) numpy.savez(saveto, **params) pkl.dump(model_options, open('%s.pkl' % saveto, 'wb')) print 'Done' ep_end = time.time() ep_times.append(ep_end) print 'Seen %d samples' % n_samples seconds = ep_times[-1] - ep_times[0] m, s = divmod(seconds, 60) h, m = divmod(m, 60) print "Finished execution in %d:%02d:%02d" % (h, m, s)