def regularities(model, net, captions, imvecs, file_name, negword, posword, k=5, rerank=False): """ This is an example of how the 'Multimodal Lingustic Regularities' was done. Returns nearest neighbours to 'image - negword + posword' model: the embedding model, with encoder='bow' net: VGG ConvNet captions: a list of sentences imvecs: the corresponding image embeddings to each sentence in 'captions' file_name: location of the query image negword: the word to subtract posword: the word to add k: number of results to return rerank: whether to rerank results based on their mean (to push down outliers) 'captions' is used only as a reference, to avoid loading/displaying images. Returns: The top k closest sentences in captions The indices of the top k captions Note that in our paper we used the SBU dataset (not COCO) """ # Load the image im = load_image(file_name) # Run image through convnet query = compute_features(net, im).flatten() query /= norm(query) # Embed words pos = tools.encode_sentences(model, [posword], verbose=False) neg = tools.encode_sentences(model, [negword], verbose=False) # Embed image query = tools.encode_images(model, query[None,:]) # Transform feats = query - neg + pos feats /= norm(feats) # Compute nearest neighbours scores = numpy.dot(feats, imvecs.T).flatten() sorted_args = numpy.argsort(scores)[::-1] sentences = [captions[a] for a in sorted_args[:k]] # Re-rank based on the mean of the returned results if rerank: nearest = imvecs[sorted_args[:k]] meanvec = numpy.mean(nearest, 0)[None,:] scores = numpy.dot(nearest, meanvec.T).flatten() sargs = numpy.argsort(scores)[::-1] sentences = [sentences[a] for a in sargs[:k]] sorted_args = [sorted_args[a] for a in sargs[:k]] return sentences, sorted_args[:k]
def ranking_eval_5fold(model, split='dev'): """ Evaluate a trained model on either dev or test of the dataset it was trained on Evaluate separately on 5 1000-image splits, and average the metrics """ data = model['options']['data'] cnn = model['options']['cnn'] results = [] for fold in range(5): print 'Loading fold ' + str(fold) dataset = datasets.load_dataset(data, cnn, load_train=False, fold=fold) caps, ims = Datasource(dataset[split], model['worddict']).all() print 'Computing results...' c_emb = tools.encode_sentences(model, caps) i_emb = tools.encode_images(model, ims) errs = tools.compute_errors(model, c_emb, i_emb) r = t2i(errs) print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple(r) ri = i2t(errs) print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple(ri) results.append(r + ri) print("-----------------------------------") print("Mean metrics: ") mean_metrics = numpy.array(results).mean(axis=0).flatten() print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple(mean_metrics[:5]) print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple(mean_metrics[5:])
def embed(self, text1, text2): p = tokenizer() text1 = [p.tokenize(t, cn=False) for t in text1] text2 = [p.tokenize(t, cn=False) for t in text2] feats1, feats2 = encode_sentences(self.model, (text1, text2), test=True) return feats1, feats2
def evalrank(model, data, split='dev'): """ Evaluate a trained model on either dev ortest """ print 'Loading dataset' if split == 'dev': X = load_dataset(data)[1] else: X = load_dataset(data, load_test=True) print 'Computing results...' ls = encode_sentences(model, X[0]) lim = encode_images(model, X[1]) if data == 'arch': # Find the good case in test dataset (r1, r5, r10, medr) = i2t_arch_case(lim, ls, X[0]) print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr) (r1i, r5i, r10i, medri) = t2i_arch_case(lim, ls, X[0]) print "Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri) else: (r1, r5, r10, medr) = i2t(lim, ls) print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr) (r1i, r5i, r10i, medri) = t2i(lim, ls) print "Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri)
def ranking_eval_5fold(model, split='dev'): """ Evaluate a trained model on either dev or test of the dataset it was trained on Evaluate separately on 5 1000-image splits, and average the metrics """ data = model['options']['data'] cnn = model['options']['cnn'] results = [] for fold in range(5): print 'Loading fold ' + str(fold) dataset = datasets.load_dataset(data, cnn, load_train=False, fold=fold) caps, ims = Datasource(dataset[split], model['worddict']).all() print 'Computing results...' c_emb = tools.encode_sentences(model, caps) i_emb = tools.encode_images(model, ims) errs = tools.compute_errors(model, c_emb, i_emb) r = t2i(errs) print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple(r) ri = i2t(errs) print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple(ri) results.append(r + ri) print("-----------------------------------") print("Mean metrics: ") mean_metrics = numpy.array(results).mean(axis=0).flatten() print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple( mean_metrics[:5]) print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple( mean_metrics[5:])
def evalrank(model, data, split='dev'): """ Evaluate a trained model on either dev or test data options: f8k, f30k, coco """ print 'Loading dataset' if split == 'dev': X = load_dataset(data, load_train=False)[1] else: X = load_dataset(data, load_train=False)[2] print 'Computing results...' train = load_dataset('CAD', load_train=True)[0] vectors = encode_sentences(model, train[0], verbose=False) # demo.retrieve_captions(model, net, train[0], vectors, 'image.jpg', k=5) ls = encode_sentences(model, X[0]) lim = encode_images(model, X[1]) (r1, r5, r10) = i2t(lim, X[0], train[0], vectors) print "Image to text: %.1f, %.1f, %.1f" % (r1, r5, r10)
def get_sentence_embedding(self,sentences): """ """ # check input paths if not hasattr(sentences,'__iter__'): if isinstance(sentences,str): sentences = [sentences] else: raise ValueError('Sentences must be a iterable of strings!') Z = tools.encode_sentences(self.model,sentences) return Z
def evalRank(model, data, batchsize=64, transforms_list=None, use_gpu=True, verbose=False): """ Evaluate a trainrd model on val or test dataset """ model['imagecnn'].eval() model['textcnn'].eval() imgs, labels_i, caps, labels_c = data.get_data() images = Variable(torch.empty((len(imgs), 3, 224, 224))) captions = Variable(torch.empty(len(caps), 32, dtype=torch.long)) for i, cap in enumerate(caps): cap = torch.Tensor(cap) captions[i] = cap for i, img in enumerate(imgs): img = Image.open(img) if transforms_list: img = transforms_list(img) images[i] = img del imgs del caps with torch.no_grad(): imgs_codes = encode_images(model, images, batch_size=batchsize, use_gpu=True) captions_codes = encode_sentences(model, captions, batch_size=batchsize, use_gpu=True) imgs_codes = imgs_codes captions_codes = captions_codes (r1, r5, r10, medr) = image2txt(imgs_codes, captions_codes) if verbose: print("Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr)) (r1i, r5i, r10i, medri) = txt2image(captions_codes, imgs_codes) if verbose: print("Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri)) model['imagecnn'].train() model['textcnn'].train() return (r1, r5, r10, medr), (r1i, r5i, r10i, medri)
def evalrank(model, data, split='test'): """ Evaluate a trained model on either dev or test """ print('Loading dataset') if split == 'dev': _, X = load_dataset(data) else: X = load_dataset(data, load_test=True) print('Computing results...') en, cn = encode_sentences(model, X, test=True) score = devloss(en, cn, margin=model['options']['margin']) print(split + ' loss: ', score)
def query(): query_sen = request.form.get('query_sentence', '') k_input = int(request.form.get('k_input', '')) query_img = request.files['query_image'] img_name = query_img.filename upload_img = os.path.join(app.config['UPLOAD_FOLDER'], img_name) sim_images, sim_image_degree = [], [] sim_texts, sim_text_degree = [], [] if img_name: query_img.save(upload_img) img_vec = image_transform( Image.open(upload_img).convert('RGB')).unsqueeze(0) image_emb = encode_images( curr_model, resnet(Variable(img_vec.cuda())).data.cpu().numpy()) d = torch.mm(image_emb, texts_dump.t()) d_sorted, inds = torch.sort(d, descending=True) inds = inds.data.squeeze(0).cpu().numpy() # sim_text_degree = 1-distance[0][:k_input]/distance[0][-1] sim_texts = np.array(texts_orig)[inds[:k_input]] # sim_texts, sim_text_degree = sim_texts.tolist(), sim_text_degree.tolist() sim_texts, sim_text_degree = sim_texts.tolist(), sim_text_degree if query_sen: query_sen = ' '.join( jieba.analyse.extract_tags(query_sen, topK=100, withWeight=False, allowPOS=())) query_sen = [query_sen] sentence = encode_sentences(curr_model, query_sen) # d = torch.mm(sentence, images_dump.t()) d = torch.mm(sentence, images_dump.t()) d_sorted, inds = torch.sort(d, descending=True) inds = inds.data.squeeze(0).cpu().numpy() # sim_image_degree = 1-distance[0][:k_input]/distance[0][-1] sim_images = np.array(images_path)[inds[:k_input]] # sim_images, sim_image_degree = sim_images.tolist(), sim_image_degree.tolist() sim_images, sim_image_degree = sim_images.tolist(), sim_image_degree upload_img = upload_img if img_name else 'no_upload_img' return jsonify(sim_images=sim_images, sim_image_degree=sim_image_degree, upload_img=upload_img, sim_texts=sim_texts, sim_text_degree=sim_text_degree)
def evalrank(model, data, split='dev'): """ Evaluate a trained model on either dev ortest """ print('Loading dataset') if split == 'dev': X = load_dataset(data)[1] else: X = load_dataset(data, load_test=True) print('Computing results...') ls = encode_sentences(model, X[0]) lim = encode_images(model, X[1]) (r1, r5, r10, medr) = i2t(lim, ls) print("Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr)) (r1i, r5i, r10i, medri) = t2i(lim, ls) print("Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri))
def evalrank(model, data, split='dev'): """ Evaluate a trained model on either dev or test data options: euronews, f30k, coco """ print 'Loading dataset' if split == 'dev': X = load_dataset(data, load_train=False)[1] else: X = load_dataset(data, load_train=False)[2] print 'Computing results...' ls = encode_sentences(model, X[0]) lim = encode_images(model, X[1]) (r1, r5, r10, medr) = i2t(lim, ls) print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr) (r1i, r5i, r10i, medri) = t2i(lim, ls) print "Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri)
def evalrank(model, data, split='dev'): """ Evaluate a trained model on either dev or test data options: f8k, f30k, coco """ print 'Loading dataset' if split == 'dev': X = load_dataset(data, load_train=False)[1] else: X = load_dataset(data, load_train=False)[2] print 'Computing results...' ls = encode_sentences(model, X[0]) lim = encode_images(model, X[1]) (r1, r5, r10, medr) = i2t(lim, ls) print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr) (r1i, r5i, r10i, medri) = t2i(lim, ls) print "Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri)
def main(args): # read in the data with open(args.result_path, 'r') as f: data = json.load(f) with open(args.data_json, 'r') as f: vocab = json.load(f) options = data['opt'] captions = data['captions'] vocab_size = options['vocab_size'] token_to_idx = vocab['token_to_idx'] idx_to_token = vocab['idx_to_token'] deviation = [0] * len(captions) all_vecs = np.zeros((0, 1024), 'float32') for iid, img in enumerate(captions): print 'collecting captions (%d/%d)' % ((iid), len(captions)) num_of_box = int((np.sqrt(4 * len(img) + 1) + 1) / 2) per_img_hist = np.array([0] * vocab_size) #hist among imgs per_box_hist = np.array([([0] * vocab_size)] * num_of_box) words_per_box = [0] * num_of_box #pdb.set_trace() vectors = tools.encode_sentences(model, img, verbose=False) #sentence embedding #pdb.set_trace() all_vecs = np.concatenate((all_vecs, vectors), axis=0) deviation[iid] = np.std(np.array(vectors)) #pdb.set_trace() #captions mean_deviation = np.mean(deviation) print 'mean stanard deviation=%.3f' % (mean_deviation) print 'total stanard deviation=%.3f' % np.std(np.array(all_vecs)) pdb.set_trace() return mean_deviation
def evalrank(model, data, split='dev'): """ Evaluate a trained model on either dev or test data options: f8k, f30k, coco """ print('Loading dataset') if split == 'dev': X = load_dataset(data, load_train=False)[1] else: X = load_dataset(data, load_train=False)[2] print('Computing results...') ls = encode_sentences(model, X[0]) lim = encode_images(model, X[1]) #(r1, r5, r10, medr) = i2t(lim, ls) (r1, r5, r10, medr, meanr) = i2t(lim, ls, return_ranks=False) print(("Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr, meanr))) #(r1i, r5i, r10i, medri) = t2i(lim, ls) (r1i, r5i, r10i, medri, meanri) = t2i(lim, ls, return_ranks=False) print(("Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri, meanri)))
def ranking_eval_Nfold(model, n_fold=1, subset='val'): """ Evaluate a trained model on either val or test of the dataset it was trained on Evaluate separately on n_fold image splits, and average the metrics Parameters: ----------- model: dict Dictionay containing the parameters of the current model n_fold: int Number of image splits to be evaluated on. Only supported n_fold=1 with provided datasets. subset: str subset to perform the evaluation on. One of: 'val', 'test' Returns: -------- results_dict: dict Dictionary containing the evaluaton results. Structured as results_dict['cap_ret', 'img_ret']['r1', 'r5', 'r10', 'medr'] score: float Score obtained, the sum of recalls for both problems caption retrival and image retrieval. """ results = [] for fold in range(n_fold): print 'Loading fold ' + str(fold) dataset = load_dataset(dataset_name=model['options']['data'], embedding=model['options']['embedding'], path_to_data=model['options']['data_path'], test_subset=model['options']['test_subset'], load_train=False, fold=fold) caps, ims = Datasource(dataset[subset], model['worddict']).all() print 'Computing results...' c_emb = tools.encode_sentences(model, caps) i_emb = tools.encode_images(model, ims) errs = tools.compute_errors(model, c_emb, i_emb) r = t2i(errs) print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple(r) ri = i2t(errs) print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple(ri) results.append(r + ri) print("-----------------------------------") print("Mean metrics: ") mean_metrics = numpy.array(results).mean(axis=0).flatten() print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple( mean_metrics[:5]) print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple( mean_metrics[5:]) # Join everything in a dict results_dict = OrderedDict([('cap_ret', OrderedDict([])), ('img_ret', OrderedDict([]))]) # Caption retrieval (image to text) results_dict["cap_ret"]["r1"] = mean_metrics[5] results_dict["cap_ret"]["r5"] = mean_metrics[6] results_dict["cap_ret"]["r10"] = mean_metrics[7] results_dict["cap_ret"]["medr"] = mean_metrics[8] # Image retrieval (text to image) results_dict["img_ret"]["r1"] = mean_metrics[0] results_dict["img_ret"]["r5"] = mean_metrics[1] results_dict["img_ret"]["r10"] = mean_metrics[2] results_dict["img_ret"]["medr"] = mean_metrics[3] score = mean_metrics[0:3].sum() + mean_metrics[5:8].sum() return results_dict, score
def trainer(data='coco', margin=0.2, dim=1024, dim_image=4096, dim_word=300, max_epochs=15, encoder='lstm', dispFreq=10, grad_clip=2.0, maxlen_w=150, batch_size=128, saveto='vse/coco', validFreq=100, early_stop=20, lrate=0.0002, reload_=False): # Model options model_options = {} model_options['data'] = data model_options['margin'] = margin model_options['dim'] = dim model_options['dim_image'] = dim_image model_options['dim_word'] = dim_word model_options['max_epochs'] = max_epochs model_options['dispFreq'] = dispFreq model_options['grad_clip'] = grad_clip model_options['maxlen_w'] = maxlen_w model_options['batch_size'] = batch_size model_options['saveto'] = saveto model_options['validFreq'] = validFreq model_options['lrate'] = lrate model_options['reload_'] = reload_ print model_options # reload options if reload_ and os.path.exists(saveto): print 'reloading...' + saveto with open('%s.pkl' % saveto, 'rb') as f: model_options = pkl.load(f) # Load training and development sets print 'loading dataset' train, dev = load_dataset(data) # Create and save dictionary print 'Create dictionary' worddict = build_dictionary(train[0] + dev[0])[0] n_words = len(worddict) model_options['n_words'] = n_words print 'Dictionary size: ' + str(n_words) with open('%s.dictionary.pkl' % saveto, 'wb') as f: pkl.dump(worddict, f) # Inverse dictionary word_idict = dict() for kk, vv in worddict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' model_options['worddict'] = worddict model_options['word_idict'] = word_idict # Each sentence in the minibatch have same length (for encoder) train_iter = homogeneous_data.HomogeneousData([train[0], train[1]], batch_size=batch_size, maxlen=maxlen_w) img_sen_model = ImgSenRanking(model_options) img_sen_model = img_sen_model.cuda() loss_fn = PairwiseRankingLoss(margin=margin) loss_fn = loss_fn.cuda() params = filter(lambda p: p.requires_grad, img_sen_model.parameters()) optimizer = torch.optim.Adam(params, lrate) uidx = 0 curr = 0.0 n_samples = 0 # For Early-stopping best_r1, best_r5, best_r10, best_medr = 0.0, 0.0, 0.0, 0 best_r1i, best_r5i, best_r10i, best_medri = 0.0, 0.0, 0.0, 0 best_step = 0 for eidx in xrange(max_epochs): print 'Epoch ', eidx for x, im in train_iter: n_samples += len(x) uidx += 1 x, im = homogeneous_data.prepare_data(x, im, worddict, maxlen=maxlen_w, n_words=n_words) if x is None: print 'Minibatch with zero sample under length ', maxlen_w uidx -= 1 continue x = Variable(torch.from_numpy(x).cuda()) im = Variable(torch.from_numpy(im).cuda()) # Update x, im = img_sen_model(x, im) cost = loss_fn(im, x) optimizer.zero_grad() cost.backward() torch.nn.utils.clip_grad_norm(params, grad_clip) optimizer.step() if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, '\tUpdate ', uidx, '\tCost ', cost.data.cpu( ).numpy()[0] if numpy.mod(uidx, validFreq) == 0: print 'Computing results...' curr_model = {} curr_model['options'] = model_options curr_model['worddict'] = worddict curr_model['word_idict'] = word_idict curr_model['img_sen_model'] = img_sen_model ls, lim = encode_sentences(curr_model, dev[0]), encode_images( curr_model, dev[1]) r_time = time.time() (r1, r5, r10, medr) = i2t(lim, ls) print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr) (r1i, r5i, r10i, medri) = t2i(lim, ls) print "Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri) print "Cal Recall@K using %ss" % (time.time() - r_time) curr_step = uidx / validFreq currscore = r1 + r5 + r10 + r1i + r5i + r10i if currscore > curr: curr = currscore best_r1, best_r5, best_r10, best_medr = r1, r5, r10, medr best_r1i, best_r5i, best_r10i, best_medri = r1i, r5i, r10i, medri best_step = curr_step # Save model print 'Saving model...', pkl.dump( model_options, open('%s_params_%s.pkl' % (saveto, encoder), 'wb')) torch.save(img_sen_model.state_dict(), '%s_model_%s.pkl' % (saveto, encoder)) print 'Done' if curr_step - best_step > early_stop: print 'Early stopping ...' print "Image to text: %.1f, %.1f, %.1f, %.1f" % ( best_r1, best_r5, best_r10, best_medr) print "Text to image: %.1f, %.1f, %.1f, %.1f" % ( best_r1i, best_r5i, best_r10i, best_medri) return 0 print 'Seen %d samples' % n_samples
def trainer(data='coco', #f8k, f30k, coco margin=0.2, dim=1024, dim_image=4096, dim_word=300, encoder='gru', # gru OR bow max_epochs=15, dispFreq=10, decay_c=0., grad_clip=2., maxlen_w=100, optimizer='adam', batch_size = 128, saveto='/ais/gobi3/u/rkiros/uvsmodels/coco.npz', validFreq=100, lrate=0.0002, reload_=False): # Model options model_options = {} model_options['data'] = data model_options['margin'] = margin model_options['dim'] = dim model_options['dim_image'] = dim_image model_options['dim_word'] = dim_word model_options['encoder'] = encoder model_options['max_epochs'] = max_epochs model_options['dispFreq'] = dispFreq model_options['decay_c'] = decay_c model_options['grad_clip'] = grad_clip model_options['maxlen_w'] = maxlen_w model_options['optimizer'] = optimizer model_options['batch_size'] = batch_size model_options['saveto'] = saveto model_options['validFreq'] = validFreq model_options['lrate'] = lrate model_options['reload_'] = reload_ print model_options # reload options if reload_ and os.path.exists(saveto): print 'reloading...' + saveto with open('%s.pkl'%saveto, 'rb') as f: models_options = pkl.load(f) # Load training and development sets print 'Loading dataset' train, dev = load_dataset(data)[:2] # Create and save dictionary print 'Creating dictionary' worddict = build_dictionary(train[0]+dev[0])[0] n_words = len(worddict) model_options['n_words'] = n_words print 'Dictionary size: ' + str(n_words) with open('%s.dictionary.pkl'%saveto, 'wb') as f: pkl.dump(worddict, f) # Inverse dictionary word_idict = dict() for kk, vv in worddict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' print 'Building model' params = init_params(model_options) # reload parameters if reload_ and os.path.exists(saveto): params = load_params(saveto, params) tparams = init_tparams(params) trng, inps, cost = build_model(tparams, model_options) # before any regularizer print 'Building f_log_probs...', f_log_probs = theano.function(inps, cost, profile=False) print 'Done' # weight decay, if applicable if decay_c > 0.: decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') weight_decay = 0. for kk, vv in tparams.iteritems(): weight_decay += (vv ** 2).sum() weight_decay *= decay_c cost += weight_decay # after any regularizer print 'Building f_cost...', f_cost = theano.function(inps, cost, profile=False) print 'Done' print 'Building sentence encoder' trng, inps_se, sentences = build_sentence_encoder(tparams, model_options) f_senc = theano.function(inps_se, sentences, profile=False) print 'Building image encoder' trng, inps_ie, images = build_image_encoder(tparams, model_options) f_ienc = theano.function(inps_ie, images, profile=False) print 'Building f_grad...', grads = tensor.grad(cost, wrt=itemlist(tparams)) f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False) f_weight_norm = theano.function([], [(t**2).sum() for k,t in tparams.iteritems()], profile=False) if grad_clip > 0.: g2 = 0. for g in grads: g2 += (g**2).sum() new_grads = [] for g in grads: new_grads.append(tensor.switch(g2 > (grad_clip**2), g / tensor.sqrt(g2) * grad_clip, g)) grads = new_grads lr = tensor.scalar(name='lr') print 'Building optimizers...', # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost) print 'Optimization' # Each sentence in the minibatch have same length (for encoder) train_iter = homogeneous_data.HomogeneousData([train[0], train[1]], batch_size=batch_size, maxlen=maxlen_w) uidx = 0 curr = 0. n_samples = 0 for eidx in xrange(max_epochs): print 'Epoch ', eidx for x, im in train_iter: n_samples += len(x) uidx += 1 x, mask, im = homogeneous_data.prepare_data(x, im, worddict, maxlen=maxlen_w, n_words=n_words) if x == None: print 'Minibatch with zero sample under length ', maxlen_w uidx -= 1 continue # Update ud_start = time.time() cost = f_grad_shared(x, mask, im) f_update(lrate) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud if numpy.mod(uidx, validFreq) == 0: print 'Computing results...' curr_model = {} curr_model['options'] = model_options curr_model['worddict'] = worddict curr_model['word_idict'] = word_idict curr_model['f_senc'] = f_senc curr_model['f_ienc'] = f_ienc ls = encode_sentences(curr_model, dev[0]) lim = encode_images(curr_model, dev[1]) (r1, r5, r10, medr) = i2t(lim, ls) print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr) (r1i, r5i, r10i, medri) = t2i(lim, ls) print "Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri) currscore = r1 + r5 + r10 + r1i + r5i + r10i if currscore > curr: curr = currscore # Save model print 'Saving...', params = unzip(tparams) numpy.savez(saveto, **params) pkl.dump(model_options, open('%s.pkl'%saveto, 'wb')) print 'Done' print 'Seen %d samples'%n_samples
def trainer( data='coco', #f8k, f30k, coco margin=0.2, dim=1024, dim_image=4096, dim_word=300, encoder='gru', # gru OR bow max_epochs=15, dispFreq=10, decay_c=0., grad_clip=2., maxlen_w=100, optimizer='adam', batch_size=128, saveto='/ais/gobi3/u/rkiros/uvsmodels/coco.npz', validFreq=100, lrate=0.0002, reload_=False): # Model options model_options = {} model_options['data'] = data model_options['margin'] = margin model_options['dim'] = dim model_options['dim_image'] = dim_image model_options['dim_word'] = dim_word model_options['encoder'] = encoder model_options['max_epochs'] = max_epochs model_options['dispFreq'] = dispFreq model_options['decay_c'] = decay_c model_options['grad_clip'] = grad_clip model_options['maxlen_w'] = maxlen_w model_options['optimizer'] = optimizer model_options['batch_size'] = batch_size model_options['saveto'] = saveto model_options['validFreq'] = validFreq model_options['lrate'] = lrate model_options['reload_'] = reload_ print(model_options) # reload options if reload_ and os.path.exists(saveto): print('reloading...' + saveto) with open('%s.pkl' % saveto, 'rb') as f: models_options = pkl.load(f) # Load training and development sets print('Loading dataset') train, dev = load_dataset(data)[:2] # Create and save dictionary print('Creating dictionary') worddict = build_dictionary(train[0] + dev[0])[0] n_words = len(worddict) model_options['n_words'] = n_words print('Dictionary size: ' + str(n_words)) with open('%s.dictionary.pkl' % saveto, 'wb') as f: pkl.dump(worddict, f) # Inverse dictionary word_idict = dict() for kk, vv in worddict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' print('Building model') params = init_params(model_options) # reload parameters if reload_ and os.path.exists(saveto): params = load_params(saveto, params) tparams = init_tparams(params) trng, inps, cost = build_model(tparams, model_options) # before any regularizer print('Building f_log_probs...', ) f_log_probs = theano.function(inps, cost, profile=False) print('Done') # weight decay, if applicable if decay_c > 0.: decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') weight_decay = 0. for kk, vv in tparams.iteritems(): weight_decay += (vv**2).sum() weight_decay *= decay_c cost += weight_decay # after any regularizer print('Building f_cost...', ) f_cost = theano.function(inps, cost, profile=False) print('Done') print('Building sentence encoder') trng, inps_se, sentences = build_sentence_encoder(tparams, model_options) f_senc = theano.function(inps_se, sentences, profile=False) print('Building image encoder') trng, inps_ie, images = build_image_encoder(tparams, model_options) f_ienc = theano.function(inps_ie, images, profile=False) print('Building f_grad...', ) grads = tensor.grad(cost, wrt=itemlist(tparams)) f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False) f_weight_norm = theano.function([], [(t**2).sum() for k, t in tparams.iteritems()], profile=False) if grad_clip > 0.: g2 = 0. for g in grads: g2 += (g**2).sum() new_grads = [] for g in grads: new_grads.append( tensor.switch(g2 > (grad_clip**2), g / tensor.sqrt(g2) * grad_clip, g)) grads = new_grads lr = tensor.scalar(name='lr') print('Building optimizers...', ) # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost) print('Optimization') # Each sentence in the minibatch have same length (for encoder) train_iter = homogeneous_data.HomogeneousData([train[0], train[1]], batch_size=batch_size, maxlen=maxlen_w) uidx = 0 curr = 0. n_samples = 0 for eidx in xrange(max_epochs): print('Epoch ', eidx) for x, im in train_iter: n_samples += len(x) uidx += 1 x, mask, im = homogeneous_data.prepare_data(x, im, worddict, maxlen=maxlen_w, n_words=n_words) if x == None: print('Minibatch with zero sample under length ', maxlen_w) uidx -= 1 continue # Update ud_start = time.time() cost = f_grad_shared(x, mask, im) f_update(lrate) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print('NaN detected') return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print('Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud) if numpy.mod(uidx, validFreq) == 0: print('Computing results...') curr_model = {} curr_model['options'] = model_options curr_model['worddict'] = worddict curr_model['word_idict'] = word_idict curr_model['f_senc'] = f_senc curr_model['f_ienc'] = f_ienc ls = encode_sentences(curr_model, dev[0]) lim = encode_images(curr_model, dev[1]) (r1, r5, r10, medr) = i2t(lim, ls) print("Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr)) (r1i, r5i, r10i, medri) = t2i(lim, ls) print("Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri)) currscore = r1 + r5 + r10 + r1i + r5i + r10i if currscore > curr: curr = currscore # Save model print('Saving...', ) params = unzip(tparams) numpy.savez(saveto, **params) pkl.dump(model_options, open('%s.pkl' % saveto, 'wb')) print('Done') print('Seen %d samples' % n_samples)
def trainer(load_from=None, save_dir='snapshots', name='anon', **kwargs): """ :param load_from: location to load parameters + options from :param name: name of model, used as location to save parameters + options """ curr_model = dict() # load old model, including parameters, but overwrite with new options if load_from: print 'reloading...' + load_from with open('%s.pkl' % load_from, 'rb') as f: curr_model = pkl.load(f) else: curr_model['options'] = {} for k, v in kwargs.iteritems(): curr_model['options'][k] = v model_options = curr_model['options'] # initialize logger import datetime timestampedName = datetime.datetime.now().strftime( '%Y_%m_%d_%H_%M_%S') + '_' + name from logger import Log log = Log(name=timestampedName, hyperparams=model_options, saveDir='vis/training', xLabel='Examples Seen', saveFrequency=1) print curr_model['options'] # Load training and development sets print 'Loading dataset' dataset = load_dataset(model_options['data'], cnn=model_options['cnn'], load_train=True) train = dataset['train'] dev = dataset['dev'] # Create dictionary print 'Creating dictionary' worddict = build_dictionary(train['caps'] + dev['caps']) print 'Dictionary size: ' + str(len(worddict)) curr_model['worddict'] = worddict curr_model['options']['n_words'] = len(worddict) + 2 # save model pkl.dump(curr_model, open('%s/%s.pkl' % (save_dir, name), 'wb')) print 'Loading data' train_iter = datasource.Datasource(train, batch_size=model_options['batch_size'], worddict=worddict) dev = datasource.Datasource(dev, worddict=worddict) dev_caps, dev_ims = dev.all() print 'Building model' params = init_params(model_options) # reload parameters if load_from is not None and os.path.exists(load_from): params = load_params(load_from, params) tparams = init_tparams(params) inps, cost = build_model(tparams, model_options) print 'Building sentence encoder' inps_se, sentences = build_sentence_encoder(tparams, model_options) f_senc = theano.function(inps_se, sentences, profile=False) print 'Building image encoder' inps_ie, images = build_image_encoder(tparams, model_options) f_ienc = theano.function(inps_ie, images, profile=False) print 'Building f_grad...', grads = tensor.grad(cost, wrt=itemlist(tparams)) print 'Building errors..' inps_err, errs = build_errors(model_options) f_err = theano.function(inps_err, errs, profile=False) curr_model['f_senc'] = f_senc curr_model['f_ienc'] = f_ienc curr_model['f_err'] = f_err if model_options['grad_clip'] > 0.: grads = [maxnorm(g, model_options['grad_clip']) for g in grads] lr = tensor.scalar(name='lr') print 'Building optimizers...', # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(model_options['optimizer'])(lr, tparams, grads, inps, cost) print 'Optimization' uidx = 0 curr = 0 n_samples = 0 for eidx in xrange(model_options['max_epochs']): print 'Epoch ', eidx for x, mask, im in train_iter: n_samples += x.shape[1] uidx += 1 # Update ud_start = time.time() cost = f_grad_shared(x, mask, im) f_update(model_options['lrate']) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, model_options['dispFreq']) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud log.update({'Error': float(cost)}, n_samples) if numpy.mod(uidx, model_options['validFreq']) == 0: print 'Computing results...' # encode sentences efficiently dev_s = encode_sentences( curr_model, dev_caps, batch_size=model_options['batch_size']) dev_i = encode_images(curr_model, dev_ims) # compute errors dev_errs = compute_errors(curr_model, dev_s, dev_i) # compute ranking error (r1, r5, r10, medr, meanr), vis_details = t2i(dev_errs, vis_details=True) (r1i, r5i, r10i, medri, meanri) = i2t(dev_errs) print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % ( r1, r5, r10, medr, meanr) log.update( { 'R@1': r1, 'R@5': r5, 'R@10': r10, 'median_rank': medr, 'mean_rank': meanr }, n_samples) print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % ( r1i, r5i, r10i, medri, meanri) log.update( { 'Image2Caption_R@1': r1i, 'Image2Caption_R@5': r5i, 'Image2CaptionR@10': r10i, 'Image2Caption_median_rank': medri, 'Image2Caption_mean_rank': meanri }, n_samples) tot = r1 + r5 + r10 if tot > curr: curr = tot # Save parameters print 'Saving...', numpy.savez('%s/%s' % (save_dir, name), **unzip(tparams)) print 'Done' vis_details['hyperparams'] = model_options # Save visualization details with open( 'vis/roc/%s/%s.json' % (model_options['data'], timestampedName), 'w') as f: json.dump(vis_details, f) # Add the new model to the index try: index = json.load(open('vis/roc/index.json', 'r')) except IOError: index = {model_options['data']: []} models = index[model_options['data']] if timestampedName not in models: models.append(timestampedName) with open('vis/roc/index.json', 'w') as f: json.dump(index, f) print 'Seen %d samples' % n_samples
def trainer(load_from=None, save_dir="snapshots", name="anon", **kwargs): """ :param load_from: location to load parameters + options from :param name: name of model, used as location to save parameters + options """ curr_model = dict() # load old model, including parameters, but overwrite with new options if load_from: print "reloading..." + load_from with open("%s.pkl" % load_from, "rb") as f: curr_model = pkl.load(f) else: curr_model["options"] = {} for k, v in kwargs.iteritems(): curr_model["options"][k] = v model_options = curr_model["options"] # initialize logger import datetime timestampedName = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S") + "_" + name from logger import Log log = Log( name=timestampedName, hyperparams=model_options, saveDir="vis/training", xLabel="Examples Seen", saveFrequency=1 ) print curr_model["options"] # Load training and development sets print "Loading dataset" dataset = load_dataset(model_options["data"], cnn=model_options["cnn"], load_train=True) train = dataset["train"] dev = dataset["dev"] # Create dictionary print "Creating dictionary" worddict = build_dictionary(train["caps"] + dev["caps"]) print "Dictionary size: " + str(len(worddict)) curr_model["worddict"] = worddict curr_model["options"]["n_words"] = len(worddict) + 2 # save model pkl.dump(curr_model, open("%s/%s.pkl" % (save_dir, name), "wb")) print "Loading data" train_iter = datasource.Datasource(train, batch_size=model_options["batch_size"], worddict=worddict) dev = datasource.Datasource(dev, worddict=worddict) dev_caps, dev_ims = dev.all() print "Building model" params = init_params(model_options) # reload parameters if load_from is not None and os.path.exists(load_from): params = load_params(load_from, params) tparams = init_tparams(params) inps, cost = build_model(tparams, model_options) print "Building sentence encoder" inps_se, sentences = build_sentence_encoder(tparams, model_options) f_senc = theano.function(inps_se, sentences, profile=False) print "Building image encoder" inps_ie, images = build_image_encoder(tparams, model_options) f_ienc = theano.function(inps_ie, images, profile=False) print "Building f_grad...", grads = tensor.grad(cost, wrt=itemlist(tparams)) print "Building errors.." inps_err, errs = build_errors(model_options) f_err = theano.function(inps_err, errs, profile=False) curr_model["f_senc"] = f_senc curr_model["f_ienc"] = f_ienc curr_model["f_err"] = f_err if model_options["grad_clip"] > 0.0: grads = [maxnorm(g, model_options["grad_clip"]) for g in grads] lr = tensor.scalar(name="lr") print "Building optimizers...", # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(model_options["optimizer"])(lr, tparams, grads, inps, cost) print "Optimization" uidx = 0 curr = 0 n_samples = 0 for eidx in xrange(model_options["max_epochs"]): print "Epoch ", eidx for x, mask, im in train_iter: n_samples += x.shape[1] uidx += 1 # Update ud_start = time.time() cost = f_grad_shared(x, mask, im) f_update(model_options["lrate"]) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print "NaN detected" return 1.0, 1.0, 1.0 if numpy.mod(uidx, model_options["dispFreq"]) == 0: print "Epoch ", eidx, "Update ", uidx, "Cost ", cost, "UD ", ud log.update({"Error": float(cost)}, n_samples) if numpy.mod(uidx, model_options["validFreq"]) == 0: print "Computing results..." # encode sentences efficiently dev_s = encode_sentences(curr_model, dev_caps, batch_size=model_options["batch_size"]) dev_i = encode_images(curr_model, dev_ims) # compute errors dev_errs = compute_errors(curr_model, dev_s, dev_i) # compute ranking error (r1, r5, r10, medr, meanr), vis_details = t2i(dev_errs, vis_details=True) (r1i, r5i, r10i, medri, meanri) = i2t(dev_errs) print "Text to image (dev set): %.1f, %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr, meanr) log.update({"R@1": r1, "R@5": r5, "R@10": r10, "median_rank": medr, "mean_rank": meanr}, n_samples) print "Image to text (dev set): %.1f, %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri, meanri) log.update( { "Image2Caption_R@1": r1i, "Image2Caption_R@5": r5i, "Image2CaptionR@10": r10i, "Image2Caption_median_rank": medri, "Image2Caption_mean_rank": meanri, }, n_samples, ) tot = r1 + r5 + r10 if tot > curr: curr = tot # Save parameters print "Saving...", numpy.savez("%s/%s" % (save_dir, name), **unzip(tparams)) print "Done" vis_details["hyperparams"] = model_options # Save visualization details with open("vis/roc/%s/%s.json" % (model_options["data"], timestampedName), "w") as f: json.dump(vis_details, f) # Add the new model to the index try: index = json.load(open("vis/roc/index.json", "r")) except IOError: index = {model_options["data"]: []} models = index[model_options["data"]] if timestampedName not in models: models.append(timestampedName) with open("vis/roc/index.json", "w") as f: json.dump(index, f) print "Seen %d samples" % n_samples
'$exists': True }, 'name': { '$exists': True } }, limit=10000, projection=['description', 'name']) words = map(lambda i: ((i['name'] + ' ' + i['description']).lower()), items) # # load the model # model = tools.load_model() sentence_vectors = tools.encode_sentences(model, words, verbose=True) print sentence_vectors.shape from sklearn.neighbors import BallTree print 'building ball tree' tree = BallTree(sentence_vectors) print 'finding nearest neighbor for ' + words[1] dist, ind = tree.query(sentence_vectors[1], k=3) print ind print 'was ' + words[ind[0][0]]
# -*- coding: utf-8 -*- """ Created on Sun Mar 19 09:22:17 2017 @author: chahak """ import demo, tools, datasets net = demo.build_convnet() model = tools.load_model() train = datasets.load_dataset('f8k', load_train=True)[0] vectors = tools.encode_sentences(model, train[0], verbose=False) demo.retrieve_captions(model, net, train[0], vectors, 'child.jpg', k=5)
model_params = '%s_model.pkl' % loadfrom print 'Building model ... ', model_options = pkl.load(open(hyper_params, 'r')) model = ImgSenRanking(model_options).cuda() model.load_state_dict(torch.load(model_params)) print 'Done' test = load_dataset(data, load_test=True) print 'Dumping data ... ' curr_model = {} curr_model['options'] = model_options curr_model['worddict'] = model_options['worddict'] curr_model['word_idict'] = model_options['word_idict'] curr_model['img_sen_model'] = model ls, lim = encode_sentences(curr_model, test[0]), encode_images(curr_model, test[1]) # save the using params and model when dumping data torch.save(ls, '%s_ls.pkl' % saveto) torch.save(lim, '%s_lim.pkl' % saveto) pkl.dump(model_options, open('%s_params_dump.pkl' % saveto, 'wb')) torch.save(model.state_dict(), '%s_model_dump.pkl' % saveto) json.dump(test[0], open('%s_caps.json' % saveto, 'w')) print 'ls: ', ls.data.size() print 'lim: ', lim.data.size()
import tools, evaluation, os # Hey Kipster! For this to work, use a python virtualenv # and pip install -r requirements.txt in IF-root # you might also need to install numpy or gfortran with your os pkg manager # First lets make sure the model kinda works __dirname = os.path.dirname(os.path.realpath(__file__)) model = tools.load_model(__dirname + '/data/coco.npz') evaluation.evalrank(model, data='coco', split='test') # Now lets compute sentence vecs for something specific example_sentences = [ 'black tie women', 'warm winter coat', 'long dressi gown tuxedo cocktail black_ti' ] sentence_vectors = tools.encode_sentences(model, example_sentences, verbose=True) print sentence_vectors.shape print sentence_vectors[0].shape
def trainer(load_from=None, save_dir='snapshots', name='anon', **kwargs): """ :param load_from: location to load parameters + options from :param name: name of model, used as location to save parameters + options """ curr_model = dict() # load old model, including parameters, but overwrite with new options if load_from: print 'reloading...' + load_from with open('%s.pkl'%load_from, 'rb') as f: curr_model = pkl.load(f) else: curr_model['options'] = {} for k, v in kwargs.iteritems(): curr_model['options'][k] = v model_options = curr_model['options'] # initialize logger import datetime timestampedName = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S') + '_' + name from logger import Log log = Log(name=timestampedName, hyperparams=model_options, saveDir='vis/training', xLabel='Examples Seen', saveFrequency=1) print curr_model['options'] # Load training and development sets print 'Loading dataset' dataset = load_dataset(model_options['data'], cnn=model_options['cnn'], load_train=True) train = dataset['train'] dev = dataset['dev'] # Create dictionary print 'Creating dictionary' worddict = build_dictionary(train['caps']+dev['caps']) print 'Dictionary size: ' + str(len(worddict)) curr_model['worddict'] = worddict curr_model['options']['n_words'] = len(worddict) + 2 # save model pkl.dump(curr_model, open('%s/%s.pkl' % (save_dir, name), 'wb')) print 'Loading data' train_iter = datasource.Datasource(train, batch_size=model_options['batch_size'], worddict=worddict) dev = datasource.Datasource(dev, worddict=worddict) dev_caps, dev_ims = dev.all() print 'Building model' params = init_params(model_options) # reload parameters if load_from is not None and os.path.exists(load_from): params = load_params(load_from, params) tparams = init_tparams(params) inps, cost = build_model(tparams, model_options) print 'Building sentence encoder' inps_se, sentences = build_sentence_encoder(tparams, model_options) f_senc = theano.function(inps_se, sentences, profile=False) print 'Building image encoder' inps_ie, images = build_image_encoder(tparams, model_options) f_ienc = theano.function(inps_ie, images, profile=False) print 'Building f_grad...', grads = tensor.grad(cost, wrt=itemlist(tparams)) print 'Building errors..' inps_err, errs = build_errors(model_options) f_err = theano.function(inps_err, errs, profile=False) curr_model['f_senc'] = f_senc curr_model['f_ienc'] = f_ienc curr_model['f_err'] = f_err if model_options['grad_clip'] > 0.: grads = [maxnorm(g, model_options['grad_clip']) for g in grads] lr = tensor.scalar(name='lr') print 'Building optimizers...', # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(model_options['optimizer'])(lr, tparams, grads, inps, cost) print 'Optimization' uidx = 0 curr = 0 n_samples = 0 for eidx in xrange(model_options['max_epochs']): print 'Epoch ', eidx for x, mask, im in train_iter: n_samples += x.shape[1] uidx += 1 # Update ud_start = time.time() cost = f_grad_shared(x, mask, im) f_update(model_options['lrate']) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, model_options['dispFreq']) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud log.update({'Error': float(cost)}, n_samples) if numpy.mod(uidx, model_options['validFreq']) == 0: print 'Computing results...' # encode sentences efficiently dev_s = encode_sentences(curr_model, dev_caps, batch_size=model_options['batch_size']) dev_i = encode_images(curr_model, dev_ims) # compute errors dev_errs = compute_errors(curr_model, dev_s, dev_i) # compute ranking error (r1, r5, r10, medr, meanr), vis_details = t2i(dev_errs, vis_details=True) (r1i, r5i, r10i, medri, meanri) = i2t(dev_errs) print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr, meanr) log.update({'R@1': r1, 'R@5': r5, 'R@10': r10, 'median_rank': medr, 'mean_rank': meanr}, n_samples) print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri, meanri) log.update({'Image2Caption_R@1': r1i, 'Image2Caption_R@5': r5i, 'Image2CaptionR@10': r10i, 'Image2Caption_median_rank': medri, 'Image2Caption_mean_rank': meanri}, n_samples) tot = r1 + r5 + r10 if tot > curr: curr = tot # Save parameters print 'Saving...', numpy.savez('%s/%s'%(save_dir, name), **unzip(tparams)) print 'Done' vis_details['hyperparams'] = model_options # Save visualization details with open('vis/roc/%s/%s.json' % (model_options['data'], timestampedName), 'w') as f: json.dump(vis_details, f) # Add the new model to the index index = json.load(open('vis/roc/index.json', 'r')) models = index[model_options['data']] if timestampedName not in models: models.append(timestampedName) with open('vis/roc/index.json', 'w') as f: json.dump(index, f) print 'Seen %d samples'%n_samples
def train(margin=0.2, dim=300, dim_word=300, max_epochs=100, dispFreq=50, validFreq=200, grad_clip=2.0, maxlen_w=150, batch_size=300, early_stop=20, lrate=0.001, reload_=False, load_dict=False): # Model options model_options = {} model_options['UM_Corpus'] = data model_options['margin'] = margin model_options['dim'] = dim model_options['dim_word'] = dim_word model_options['max_epochs'] = max_epochs model_options['dispFreq'] = dispFreq model_options['grad_clip'] = grad_clip model_options['maxlen_w'] = maxlen_w model_options['batch_size'] = batch_size model_options['saveto'] = saveto model_options['validFreq'] = validFreq model_options['lrate'] = lrate model_options['reload_'] = reload_ print(model_options) # reload options if reload_ and os.path.exists(saveto): print('reloading...' + saveto) with open('%s.pkl' % saveto, 'rb') as f: model_options = pkl.load(f) # Load training and development sets print('loading dataset') train, dev = load_dataset() print 'training samples: ', len(train[0]) print 'development samples: ', len(dev[0]) if load_dict: with open('%s.dictionary.pkl' % saveto, 'rb') as f: worddict = pkl.load(f) n_words = len(worddict) model_options['n_words'] = n_words print('Dictionary size: ', n_words) else: # Create and save dictionary print('Create dictionary') worddict = build_dictionary(train[0] + train[1] + dev[0] + dev[1]) n_words = len(worddict) model_options['n_words'] = n_words print('Dictionary size: ', n_words) with open('%s.dictionary_%s.pkl' % (saveto, run), 'wb') as f: pkl.dump(worddict, f) # # Inverse dictionary # word_idict = dict() # for kk, vv in worddict.iteritems(): # word_idict[vv] = kk # word_idict[0] = '<eos>' # word_idict[1] = 'UNK' model_options['worddict'] = worddict # model_options['word_idict'] = word_idict # # Each sentence in the minibatch have same length (for encoder) # train_iter = HomogeneousData([train[0], train[1]], batch_size=batch_size, maxlen=maxlen_w) share_model = LIUMCVC_Encoder(model_options) share_model = share_model.cuda() loss_fn = PairwiseRankingLoss(margin=margin) loss_fn = loss_fn.cuda() params = filter(lambda p: p.requires_grad, share_model.parameters()) optimizer = torch.optim.Adam(params, lrate) # decrease learning rate scheduler = ReduceLROnPlateau(optimizer, factor=0.1, patience=10) uidx = 0 curr = 1e10 n_samples = 0 # For Early-stopping best_step = 0 for eidx in xrange(1, max_epochs + 1): print('Epoch ', eidx) train_data_index = prepare_data(train, worddict) for en, cn, en_lengths, cn_lengths, en_index, cn_index in data_generator( train_data_index, batch_size): uidx += 1 n_samples += len(en) en, cn = share_model(en, en_lengths, en_index, cn, cn_lengths, cn_index) loss = loss_fn(en, cn) optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm(params, grad_clip) optimizer.step() if np.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, '\tUpdate ', uidx, '\tCost ', loss.data.cpu( ).numpy()[0] if np.mod(uidx, validFreq) == 0: print 'Computing results...' share_model.eval() curr_model = {} curr_model['options'] = model_options curr_model['worddict'] = worddict # curr_model['word_idict'] = word_idict curr_model['en_cn_model'] = share_model r_time = time.time() fen, fcn = encode_sentences(curr_model, dev, test=True) score = devloss(fen, fcn, margin=margin) print "Cal Recall@K using %ss" % (time.time() - r_time) share_model.train() curr_step = uidx / validFreq # scheduler.step(score) currscore = score print 'loss on dev', score if currscore < curr: curr = currscore best_step = curr_step # Save model print 'Saving model...', pkl.dump(model_options, open('%s_params_%s.pkl' % (saveto, run), 'wb')) torch.save(share_model.state_dict(), '%s_model_%s.pkl' % (saveto, run)) print 'Done' if curr_step - best_step > early_stop: print 'Early stopping ...' print return print 'Seen %d samples' % n_samples
def trainer(data='coco', margin=0.2, dim=1024, dim_image=4096, dim_word=300, encoder='gru', max_epochs=15, dispFreq=10, decay_c=0.0, grad_clip=2.0, maxlen_w=150, batch_size=128, saveto='vse/coco', validFreq=100, lrate=0.0002, concat=True, reload_=False): hyper_params = { 'data': data, 'encoder': encoder, 'batch_size': batch_size, 'time': cur_time, 'lrate': lrate, 'concat': concat, } i2t_r1 = dict([('i2t_recall', 'r1')] + hyper_params.items()) i2t_r5 = dict([('i2t_recall', 'r5')] + hyper_params.items()) i2t_r10 = dict([('i2t_recall', 'r10')] + hyper_params.items()) t2i_r1 = dict([('t2i_recall', 'r1')] + hyper_params.items()) t2i_r5 = dict([('t2i_recall', 'r5')] + hyper_params.items()) t2i_r10 = dict([('t2i_recall', 'r10')] + hyper_params.items()) i2t_med = dict([('i2t_med', 'i2t_med')] + hyper_params.items()) t2i_med = dict([('t2i_med', 't2i_med')] + hyper_params.items()) agent = Agent(port=5020) i2t_r1_agent = agent.register(i2t_r1, 'recall', overwrite=True) i2t_r5_agent = agent.register(i2t_r5, 'recall', overwrite=True) i2t_r10_agent = agent.register(i2t_r10, 'recall', overwrite=True) t2i_r1_agent = agent.register(t2i_r1, 'recall', overwrite=True) t2i_r5_agent = agent.register(t2i_r5, 'recall', overwrite=True) t2i_r10_agent = agent.register(t2i_r10, 'recall', overwrite=True) i2t_med_agent = agent.register(i2t_med, 'median', overwrite=True) t2i_med_agent = agent.register(t2i_med, 'median', overwrite=True) # Model options model_options = {} model_options['data'] = data model_options['margin'] = margin model_options['dim'] = dim model_options['dim_image'] = dim_image model_options['dim_word'] = dim_word model_options['encoder'] = encoder model_options['max_epochs'] = max_epochs model_options['dispFreq'] = dispFreq model_options['decay_c'] = decay_c model_options['grad_clip'] = grad_clip model_options['maxlen_w'] = maxlen_w model_options['batch_size'] = batch_size model_options['saveto'] = saveto model_options['validFreq'] = validFreq model_options['lrate'] = lrate model_options['reload_'] = reload_ model_options['concat'] = concat print model_options # reload options if reload_ and os.path.exists(saveto): print 'reloading...' + saveto with open('%s.pkl' % saveto, 'rb') as f: model_options = pkl.load(f) # Load training and development sets print 'loading dataset' train, dev = load_dataset(data)[:2] # Create and save dictionary print 'Create dictionary' worddict = build_dictionary(train[0] + dev[0])[0] n_words = len(worddict) model_options['n_words'] = n_words print 'Dictionary size: ' + str(n_words) with open('%s.dictionary.pkl' % saveto, 'wb') as f: pkl.dump(worddict, f) # Inverse dictionary word_idict = dict() for kk, vv in worddict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' model_options['worddict'] = worddict model_options['word_idict'] = word_idict # Each sentence in the minibatch have same length (for encoder) train_iter = homogeneous_data.HomogeneousData([train[0], train[1]], batch_size=batch_size, maxlen=maxlen_w) img_sen_model = ImgSenRanking(model_options) img_sen_model = img_sen_model.cuda() loss_fn = PairwiseRankingLoss(margin=margin) loss_fn = loss_fn.cuda() params = filter(lambda p: p.requires_grad, img_sen_model.parameters()) optimizer = torch.optim.Adam(params, lrate) uidx = 0 curr = 0.0 n_samples = 0 for eidx in xrange(max_epochs): print 'Epoch ', eidx for x, im in train_iter: n_samples += len(x) uidx += 1 x_id, im = homogeneous_data.prepare_data(x, im, worddict, maxlen=maxlen_w, n_words=n_words) if x_id is None: print 'Minibatch with zero sample under length ', maxlen_w uidx -= 1 continue x_id = Variable(torch.from_numpy(x_id).cuda()) im = Variable(torch.from_numpy(im).cuda()) # Update ud_start = time.time() x, im = img_sen_model(x_id, im, x) cost = loss_fn(im, x) optimizer.zero_grad() cost.backward() torch.nn.utils.clip_grad_norm(params, grad_clip) optimizer.step() ud = time.time() - ud_start if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost.data.cpu( ).numpy()[0], 'UD ', ud if numpy.mod(uidx, validFreq) == 0: print 'Computing results...' curr_model = {} curr_model['options'] = model_options curr_model['worddict'] = worddict curr_model['word_idict'] = word_idict curr_model['img_sen_model'] = img_sen_model ls, lim = encode_sentences(curr_model, dev[0]), encode_images( curr_model, dev[1]) r1, r5, r10, medr = 0.0, 0.0, 0.0, 0 r1i, r5i, r10i, medri = 0.0, 0.0, 0.0, 0 r_time = time.time() if data == 'arch' or data == 'arch_small': (r1, r5, r10, medr) = i2t_arch(lim, ls) print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr) (r1i, r5i, r10i, medri) = t2i_arch(lim, ls) print "Text to image: %.1f, %.1f, %.1f, %.1f" % ( r1i, r5i, r10i, medri) else: (r1, r5, r10, medr) = i2t(lim, ls) print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr) (r1i, r5i, r10i, medri) = t2i(lim, ls) print "Text to image: %.1f, %.1f, %.1f, %.1f" % ( r1i, r5i, r10i, medri) print "Cal Recall@K using %ss" % (time.time() - r_time) record_num = uidx / validFreq agent.append(i2t_r1_agent, record_num, r1) agent.append(i2t_r5_agent, record_num, r5) agent.append(i2t_r10_agent, record_num, r10) agent.append(t2i_r1_agent, record_num, r1i) agent.append(t2i_r5_agent, record_num, r5i) agent.append(t2i_r10_agent, record_num, r10i) agent.append(i2t_med_agent, record_num, medr) agent.append(t2i_med_agent, record_num, medri) currscore = r1 + r5 + r10 + r1i + r5i + r10i if currscore > curr: curr = currscore # Save model print 'Saving model...', pkl.dump( model_options, open('%s_params_%s.pkl' % (saveto, encoder), 'wb')) torch.save(img_sen_model.state_dict(), '%s_model_%s.pkl' % (saveto, encoder)) print 'Done' print 'Seen %d samples' % n_samples
def trainer(**kwargs): """ Train the model according to input params Info about input params is available in parameters.py """ # Timing print('Starting time:', datetime.now()) sys.stdout.flush() t_start_train = time.time() # Model options # load old model, including parameters, but overwrite with new options # Extract model options from arguments model_options = {} for k, v in kwargs.iteritems(): model_options[k] = v # Print input options print('PARAMETERS BEFORE LOADING:') for k, v in model_options.items(): print('{:>26}: {}'.format(k, v)) sys.stdout.flush() # Reload options if required curr_model = dict() if model_options['reload_']: # Reload model parameters opt_filename_reload = get_opt_filename(model_options, previous=True) print('reloading...', opt_filename_reload) sys.stdout.flush() try: with open(opt_filename_reload, 'rb') as f: curr_model = pkl.load(f) except: print( 'Failed to reload parameters, try to use only feeded parameters' ) curr_model['options'] = {} # Check if we reload from best model or last model if model_options['load_from'] in ['Best', 'best', 'B', 'b']: load_from_best = True print('Loading from Best saved model in validation results') elif model_options['load_from'] in ['Last', 'last', 'L', 'l']: load_from_best = False print('Loading from Last saved model') else: print('Unkown choice for "load_from" parameter', model_options['load_from']) print('Please choose one of:', ['Best', 'best', 'B', 'b'], ['Last', 'last', 'L', 'l']) print('Using Last as default') load_from_best = False # Reload end-point parameters state_filename = get_sol_filename(model_options, best=load_from_best, previous=True) print('reloading...', state_filename) sys.stdout.flush() try: with open(state_filename, 'rb') as f: state_params = pkl.load(f) if load_from_best: init_epoch = state_params['epoch'] solution = state_params else: init_epoch = state_params['epoch_done'] + 1 solution = state_params['solution'] best_val_score = solution['best_val_score'] n_samples = solution['samples_seen'] except: print('Failed to reload state parameters, starting from 0') init_epoch = 0 best_val_score = 0 n_samples = 0 else: curr_model['options'] = {} init_epoch = 0 best_val_score = 0 n_samples = 0 # Overwrite loaded options with input options for k, v in kwargs.iteritems(): curr_model['options'][k] = v model_options = curr_model['options'] # Print final options loaded if model_options['reload_']: print('PARAMETERS AFTER LOADING:') for k, v in model_options.items(): print('{:>26}: {}'.format(k, v)) sys.stdout.flush() # Load training and development sets print('Loading dataset') sys.stdout.flush() dataset = load_dataset(dataset_name=model_options['data'], embedding=model_options['embedding'], path_to_data=model_options['data_path'], test_subset=model_options['test_subset'], load_train=True, fold=0) train = dataset['train'] dev = dataset['val'] # Create word dictionary print('Creating dictionary') sys.stdout.flush() worddict = build_dictionary(train['caps'] + dev['caps']) print('Dictionary size: ' + str(len(worddict))) sys.stdout.flush() curr_model['worddict'] = worddict curr_model['options']['n_words'] = len(worddict) + 2 # save model opt_filename_save = get_opt_filename(model_options, previous=False) print('Saving model parameters in', opt_filename_save) sys.stdout.flush() try: os.makedirs(os.path.dirname(opt_filename_save)) except: pass pkl.dump(curr_model, open(opt_filename_save, 'wb')) # Load data from dataset print('Loading data') sys.stdout.flush() train_iter = datasource.Datasource(train, batch_size=model_options['batch_size'], worddict=worddict) dev = datasource.Datasource(dev, worddict=worddict) dev_caps, dev_ims = dev.all() print('Building model') sys.stdout.flush() params = init_params(model_options) # reload network parameters, ie. weights if model_options['reload_']: params_filename = get_npz_filename(model_options, best=load_from_best, previous=True) params = load_params(params_filename, params) tparams = init_tparams(params) inps, cost = build_model(tparams, model_options) print('Building sentence encoder') sys.stdout.flush() inps_se, sentences = build_sentence_encoder(tparams, model_options) f_senc = theano.function(inps_se, sentences, profile=False) print('Building image encoder') sys.stdout.flush() inps_ie, images = build_image_encoder(tparams, model_options) f_ienc = theano.function(inps_ie, images, profile=False) print('Building f_grad...') sys.stdout.flush() grads = tensor.grad(cost, wrt=itemlist(tparams)) print('Building errors...') sys.stdout.flush() inps_err, errs = build_errors(model_options) f_err = theano.function(inps_err, errs, profile=False) curr_model['f_senc'] = f_senc curr_model['f_ienc'] = f_ienc curr_model['f_err'] = f_err if model_options['grad_clip'] > 0.: grads = [maxnorm(g, model_options['grad_clip']) for g in grads] lr = tensor.scalar(name='lr') print('Building optimizers...') sys.stdout.flush() # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(model_options['optimizer'])(lr, tparams, grads, inps, cost) # Get names for the files to save model and solution sol_filename_best = get_sol_filename(model_options, best=True, previous=False) sol_filename_last = get_sol_filename(model_options, best=False, previous=False) params_filename_best = get_npz_filename(model_options, best=True, previous=False) params_filename_last = get_npz_filename(model_options, best=False, previous=False) print('PATHS TO MODELS:') for filename in [ sol_filename_best, sol_filename_last, params_filename_best, params_filename_last ]: print(filename) sys.stdout.flush() try: os.makedirs(os.path.dirname(filename)) except: pass # Start optimization print('Optimization') sys.stdout.flush() uidx = 0 # Timing t_start = time.time() print('Starting time:', datetime.now()) for eidx in range(init_epoch, model_options['max_epochs']): t_start_epoch = time.time() print('Epoch ', eidx) sys.stdout.flush() for x, mask, im in train_iter: n_samples += x.shape[1] uidx += 1 # Update ud_start = time.time() cost = f_grad_shared(x, mask, im) f_update(model_options['lrate']) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print('NaN detected') sys.stdout.flush() return 1., 1., 1. if numpy.mod(uidx, model_options['dispFreq']) == 0: print('Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud) sys.stdout.flush() if numpy.mod(uidx, model_options['validFreq']) == 0: print('Computing results...') sys.stdout.flush() # encode sentences efficiently dev_s = encode_sentences( curr_model, dev_caps, batch_size=model_options['batch_size']) dev_i = encode_images(curr_model, dev_ims) # compute errors dev_errs = compute_errors(curr_model, dev_s, dev_i) # compute ranking error (r1, r5, r10, medr, meanr) = i2t(dev_errs) (r1i, r5i, r10i, medri, meanri) = t2i(dev_errs) print("Text to image (dev set): %.1f, %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri, meanri)) sys.stdout.flush() print("Image to text (dev set): %.1f, %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr, meanr)) sys.stdout.flush() # Score val_score = r1 + r5 + r10 + r1i + r5i + r10i if val_score > best_val_score: print('BEST MODEL FOUND') print('Score:', val_score) print('Previous best score:', best_val_score) best_val_score = val_score # Join in a results dict results_dict = build_results_dict(r1, r5, r10, medr, r1i, r5i, r10i, medri) # Save parameters print('Saving...', end=' ') sys.stdout.flush() numpy.savez(params_filename_best, **unzip(tparams)) print('Done') sys.stdout.flush() # Update solution solution = OrderedDict([ ('epoch', eidx), ('update', uidx), ('samples_seen', n_samples), ('best_val_score', best_val_score), ('best_val_res', results_dict), ('time_until_results', str(timedelta(seconds=(time.time() - t_start_train)))) ]) pkl.dump(solution, open(sol_filename_best, 'wb')) print('Seen %d samples' % n_samples) sys.stdout.flush() # Timing t_epoch = time.time() - t_start_epoch t_epoch_avg = (time.time() - t_start) / (eidx + 1 - (init_epoch)) print('Time for this epoch:', str(timedelta(seconds=t_epoch)), 'Average:', str(timedelta(seconds=t_epoch_avg))) t_2_complete = t_epoch_avg * (model_options['max_epochs'] - (eidx + 1)) print('Time since start session:', str(timedelta(seconds=time.time() - t_start)), 'Estimated time to complete training:', str(timedelta(seconds=t_2_complete))) print('Current time:', datetime.now()) sys.stdout.flush() # Save current model try: state_params = OrderedDict([('epoch_done', eidx), ('solution', solution)]) except: solution = OrderedDict([ ('epoch', eidx), ('update', uidx), ('samples_seen', n_samples), ('best_val_score', best_val_score), ('time_until_results', str(timedelta(seconds=(time.time() - t_start_train)))) ]) state_params = OrderedDict([('epoch_done', eidx), ('solution', solution)]) pkl.dump(state_params, open(sol_filename_last, 'wb')) # Save parameters print('Saving LAST npz...', end=' ') sys.stdout.flush() numpy.savez(params_filename_last, **unzip(tparams)) print('Done') sys.stdout.flush() return solution