コード例 #1
0
def regularities(model, net, captions, imvecs, file_name, negword, posword, k=5, rerank=False):
    """
    This is an example of how the 'Multimodal Lingustic Regularities' was done.
    Returns nearest neighbours to 'image - negword + posword'

    model: the embedding model, with encoder='bow'
    net: VGG ConvNet
    captions: a list of sentences
    imvecs: the corresponding image embeddings to each sentence in 'captions'
    file_name: location of the query image
    negword: the word to subtract
    posword: the word to add
    k: number of results to return
    rerank: whether to rerank results based on their mean (to push down outliers)

    'captions' is used only as a reference, to avoid loading/displaying images.

    Returns:
    The top k closest sentences in captions
    The indices of the top k captions

    Note that in our paper we used the SBU dataset (not COCO)
    """
    # Load the image
    im = load_image(file_name)

    # Run image through convnet
    query = compute_features(net, im).flatten()
    query /= norm(query)

    # Embed words
    pos = tools.encode_sentences(model, [posword], verbose=False)
    neg = tools.encode_sentences(model, [negword], verbose=False)

    # Embed image
    query = tools.encode_images(model, query[None,:])

    # Transform
    feats = query - neg + pos
    feats /= norm(feats)

    # Compute nearest neighbours
    scores = numpy.dot(feats, imvecs.T).flatten()
    sorted_args = numpy.argsort(scores)[::-1]
    sentences = [captions[a] for a in sorted_args[:k]]

    # Re-rank based on the mean of the returned results
    if rerank:
        nearest = imvecs[sorted_args[:k]]
        meanvec = numpy.mean(nearest, 0)[None,:]
        scores = numpy.dot(nearest, meanvec.T).flatten()
        sargs = numpy.argsort(scores)[::-1]
        sentences = [sentences[a] for a in sargs[:k]]
        sorted_args = [sorted_args[a] for a in sargs[:k]]

    return sentences, sorted_args[:k]
コード例 #2
0
def regularities(model, net, captions, imvecs, file_name, negword, posword, k=5, rerank=False):
    """
    This is an example of how the 'Multimodal Lingustic Regularities' was done.
    Returns nearest neighbours to 'image - negword + posword'

    model: the embedding model, with encoder='bow'
    net: VGG ConvNet
    captions: a list of sentences
    imvecs: the corresponding image embeddings to each sentence in 'captions'
    file_name: location of the query image
    negword: the word to subtract
    posword: the word to add
    k: number of results to return
    rerank: whether to rerank results based on their mean (to push down outliers)

    'captions' is used only as a reference, to avoid loading/displaying images.

    Returns:
    The top k closest sentences in captions
    The indices of the top k captions

    Note that in our paper we used the SBU dataset (not COCO)
    """
    # Load the image
    im = load_image(file_name)

    # Run image through convnet
    query = compute_features(net, im).flatten()
    query /= norm(query)

    # Embed words
    pos = tools.encode_sentences(model, [posword], verbose=False)
    neg = tools.encode_sentences(model, [negword], verbose=False)

    # Embed image
    query = tools.encode_images(model, query[None,:])

    # Transform
    feats = query - neg + pos
    feats /= norm(feats)

    # Compute nearest neighbours
    scores = numpy.dot(feats, imvecs.T).flatten()
    sorted_args = numpy.argsort(scores)[::-1]
    sentences = [captions[a] for a in sorted_args[:k]]

    # Re-rank based on the mean of the returned results
    if rerank:
        nearest = imvecs[sorted_args[:k]]
        meanvec = numpy.mean(nearest, 0)[None,:]
        scores = numpy.dot(nearest, meanvec.T).flatten()
        sargs = numpy.argsort(scores)[::-1]
        sentences = [sentences[a] for a in sargs[:k]]
        sorted_args = [sorted_args[a] for a in sargs[:k]]

    return sentences, sorted_args[:k]    
コード例 #3
0
def ranking_eval_5fold(model, split='dev'):
    """
    Evaluate a trained model on either dev or test of the dataset it was trained on
    Evaluate separately on 5 1000-image splits, and average the metrics
    """
    data = model['options']['data']
    cnn = model['options']['cnn']

    results = []

    for fold in range(5):
        print 'Loading fold ' + str(fold)
        dataset = datasets.load_dataset(data, cnn, load_train=False, fold=fold)
        caps, ims = Datasource(dataset[split], model['worddict']).all()

        print 'Computing results...'
        c_emb = tools.encode_sentences(model, caps)
        i_emb = tools.encode_images(model, ims)

        errs = tools.compute_errors(model, c_emb, i_emb)


        r = t2i(errs)
        print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple(r)

        ri = i2t(errs)
        print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple(ri)
        results.append(r + ri)

    print("-----------------------------------")
    print("Mean metrics: ")
    mean_metrics = numpy.array(results).mean(axis=0).flatten()
    print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple(mean_metrics[:5])
    print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple(mean_metrics[5:])
コード例 #4
0
 def embed(self, text1, text2):
     p = tokenizer()
     text1 = [p.tokenize(t, cn=False) for t in text1]
     text2 = [p.tokenize(t, cn=False) for t in text2]
     feats1, feats2 = encode_sentences(self.model, (text1, text2),
                                       test=True)
     return feats1, feats2
コード例 #5
0
def evalrank(model, data, split='dev'):
    """
    Evaluate a trained model on either dev ortest
    """

    print 'Loading dataset'
    if split == 'dev':
        X = load_dataset(data)[1]
    else:
        X = load_dataset(data, load_test=True)

    print 'Computing results...'
    ls = encode_sentences(model, X[0])
    lim = encode_images(model, X[1])

    if data == 'arch':
        # Find the good case in test dataset
        (r1, r5, r10, medr) = i2t_arch_case(lim, ls, X[0])
        print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr)
        (r1i, r5i, r10i, medri) = t2i_arch_case(lim, ls, X[0])
        print "Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri)
    else:
        (r1, r5, r10, medr) = i2t(lim, ls)
        print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr)
        (r1i, r5i, r10i, medri) = t2i(lim, ls)
        print "Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri)
コード例 #6
0
def ranking_eval_5fold(model, split='dev'):
    """
    Evaluate a trained model on either dev or test of the dataset it was trained on
    Evaluate separately on 5 1000-image splits, and average the metrics
    """
    data = model['options']['data']
    cnn = model['options']['cnn']

    results = []

    for fold in range(5):
        print 'Loading fold ' + str(fold)
        dataset = datasets.load_dataset(data, cnn, load_train=False, fold=fold)
        caps, ims = Datasource(dataset[split], model['worddict']).all()

        print 'Computing results...'
        c_emb = tools.encode_sentences(model, caps)
        i_emb = tools.encode_images(model, ims)

        errs = tools.compute_errors(model, c_emb, i_emb)

        r = t2i(errs)
        print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple(r)

        ri = i2t(errs)
        print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple(ri)
        results.append(r + ri)

    print("-----------------------------------")
    print("Mean metrics: ")
    mean_metrics = numpy.array(results).mean(axis=0).flatten()
    print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple(
        mean_metrics[:5])
    print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple(
        mean_metrics[5:])
コード例 #7
0
def evalrank(model, data, split='dev'):
    """
    Evaluate a trained model on either dev or test
    data options: f8k, f30k, coco
    """
    print 'Loading dataset'
    if split == 'dev':
        X = load_dataset(data, load_train=False)[1]
    else:
        X = load_dataset(data, load_train=False)[2]

    print 'Computing results...'
    train = load_dataset('CAD', load_train=True)[0]
    vectors = encode_sentences(model, train[0], verbose=False)
    # demo.retrieve_captions(model, net, train[0], vectors, 'image.jpg', k=5)
    ls = encode_sentences(model, X[0])
    lim = encode_images(model, X[1])

    (r1, r5, r10) = i2t(lim, X[0], train[0], vectors)
    print "Image to text: %.1f, %.1f, %.1f" % (r1, r5, r10)
コード例 #8
0
ファイル: embedding.py プロジェクト: shashankpr/cs4065
    def get_sentence_embedding(self,sentences):
        """
        """
        # check input paths
        if not hasattr(sentences,'__iter__'):
            if isinstance(sentences,str):
                sentences = [sentences]
            else:
                raise ValueError('Sentences must be a iterable of strings!')

        Z = tools.encode_sentences(self.model,sentences)

        return Z
コード例 #9
0
def evalRank(model,
             data,
             batchsize=64,
             transforms_list=None,
             use_gpu=True,
             verbose=False):
    """
    Evaluate a trainrd model on val or test dataset
    """
    model['imagecnn'].eval()
    model['textcnn'].eval()

    imgs, labels_i, caps, labels_c = data.get_data()
    images = Variable(torch.empty((len(imgs), 3, 224, 224)))
    captions = Variable(torch.empty(len(caps), 32, dtype=torch.long))
    for i, cap in enumerate(caps):
        cap = torch.Tensor(cap)
        captions[i] = cap
    for i, img in enumerate(imgs):
        img = Image.open(img)
        if transforms_list:
            img = transforms_list(img)
        images[i] = img
    del imgs
    del caps
    with torch.no_grad():
        imgs_codes = encode_images(model,
                                   images,
                                   batch_size=batchsize,
                                   use_gpu=True)
        captions_codes = encode_sentences(model,
                                          captions,
                                          batch_size=batchsize,
                                          use_gpu=True)
        imgs_codes = imgs_codes
        captions_codes = captions_codes
        (r1, r5, r10, medr) = image2txt(imgs_codes, captions_codes)
    if verbose:
        print("Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr))
    (r1i, r5i, r10i, medri) = txt2image(captions_codes, imgs_codes)
    if verbose:
        print("Text to image: %.1f, %.1f, %.1f, %.1f" %
              (r1i, r5i, r10i, medri))

    model['imagecnn'].train()
    model['textcnn'].train()

    return (r1, r5, r10, medr), (r1i, r5i, r10i, medri)
コード例 #10
0
def evalrank(model, data, split='test'):
    """
    Evaluate a trained model on either dev or test
    """

    print('Loading dataset')
    if split == 'dev':
        _, X = load_dataset(data)
    else:
        X = load_dataset(data, load_test=True)

    print('Computing results...')
    en, cn = encode_sentences(model, X, test=True)

    score = devloss(en, cn, margin=model['options']['margin'])

    print(split + ' loss: ', score)
コード例 #11
0
ファイル: server.py プロジェクト: linxd5/jianzhu_VSE_w2v
def query():
    query_sen = request.form.get('query_sentence', '')
    k_input = int(request.form.get('k_input', ''))
    query_img = request.files['query_image']
    img_name = query_img.filename
    upload_img = os.path.join(app.config['UPLOAD_FOLDER'], img_name)
    sim_images, sim_image_degree = [], []
    sim_texts, sim_text_degree = [], []
    if img_name:
        query_img.save(upload_img)
        img_vec = image_transform(
            Image.open(upload_img).convert('RGB')).unsqueeze(0)
        image_emb = encode_images(
            curr_model,
            resnet(Variable(img_vec.cuda())).data.cpu().numpy())
        d = torch.mm(image_emb, texts_dump.t())
        d_sorted, inds = torch.sort(d, descending=True)
        inds = inds.data.squeeze(0).cpu().numpy()
        # sim_text_degree = 1-distance[0][:k_input]/distance[0][-1]
        sim_texts = np.array(texts_orig)[inds[:k_input]]
        # sim_texts, sim_text_degree = sim_texts.tolist(), sim_text_degree.tolist()
        sim_texts, sim_text_degree = sim_texts.tolist(), sim_text_degree
    if query_sen:
        query_sen = ' '.join(
            jieba.analyse.extract_tags(query_sen,
                                       topK=100,
                                       withWeight=False,
                                       allowPOS=()))
        query_sen = [query_sen]
        sentence = encode_sentences(curr_model, query_sen)
        # d = torch.mm(sentence, images_dump.t())
        d = torch.mm(sentence, images_dump.t())
        d_sorted, inds = torch.sort(d, descending=True)
        inds = inds.data.squeeze(0).cpu().numpy()
        # sim_image_degree = 1-distance[0][:k_input]/distance[0][-1]
        sim_images = np.array(images_path)[inds[:k_input]]
        # sim_images, sim_image_degree = sim_images.tolist(), sim_image_degree.tolist()
        sim_images, sim_image_degree = sim_images.tolist(), sim_image_degree

    upload_img = upload_img if img_name else 'no_upload_img'
    return jsonify(sim_images=sim_images,
                   sim_image_degree=sim_image_degree,
                   upload_img=upload_img,
                   sim_texts=sim_texts,
                   sim_text_degree=sim_text_degree)
コード例 #12
0
ファイル: evaluation.py プロジェクト: Deepayan137/VSE_Pytorch
def evalrank(model, data, split='dev'):
    """
    Evaluate a trained model on either dev ortest
    """

    print('Loading dataset')
    if split == 'dev':
        X = load_dataset(data)[1]
    else:
        X = load_dataset(data, load_test=True)

    print('Computing results...')
    ls = encode_sentences(model, X[0])
    lim = encode_images(model, X[1])

    (r1, r5, r10, medr) = i2t(lim, ls)
    print("Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr))
    (r1i, r5i, r10i, medri) = t2i(lim, ls)
    print("Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri))
コード例 #13
0
def evalrank(model, data, split='dev'):
    """
    Evaluate a trained model on either dev or test
    data options: euronews, f30k, coco
    """
    print 'Loading dataset'
    if split == 'dev':
        X = load_dataset(data, load_train=False)[1]
    else:
        X = load_dataset(data, load_train=False)[2]

    print 'Computing results...'
    ls = encode_sentences(model, X[0])
    lim = encode_images(model, X[1])

    (r1, r5, r10, medr) = i2t(lim, ls)
    print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr)
    (r1i, r5i, r10i, medri) = t2i(lim, ls)
    print "Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri)
コード例 #14
0
def evalrank(model, data, split='dev'):
    """
    Evaluate a trained model on either dev or test
    data options: f8k, f30k, coco
    """
    print 'Loading dataset'
    if split == 'dev':
        X = load_dataset(data, load_train=False)[1]
    else:
        X = load_dataset(data, load_train=False)[2]

    print 'Computing results...'
    ls = encode_sentences(model, X[0])
    lim = encode_images(model, X[1])

    (r1, r5, r10, medr) = i2t(lim, ls)
    print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr)
    (r1i, r5i, r10i, medri) = t2i(lim, ls)
    print "Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri)
コード例 #15
0
def main(args):

    # read in the data
    with open(args.result_path, 'r') as f:
        data = json.load(f)
    with open(args.data_json, 'r') as f:
        vocab = json.load(f)

    options = data['opt']
    captions = data['captions']
    vocab_size = options['vocab_size']
    token_to_idx = vocab['token_to_idx']
    idx_to_token = vocab['idx_to_token']

    deviation = [0] * len(captions)
    all_vecs = np.zeros((0, 1024), 'float32')

    for iid, img in enumerate(captions):
        print 'collecting captions (%d/%d)' % ((iid), len(captions))
        num_of_box = int((np.sqrt(4 * len(img) + 1) + 1) / 2)

        per_img_hist = np.array([0] * vocab_size)  #hist among imgs
        per_box_hist = np.array([([0] * vocab_size)] * num_of_box)
        words_per_box = [0] * num_of_box

        #pdb.set_trace()
        vectors = tools.encode_sentences(model, img,
                                         verbose=False)  #sentence embedding
        #pdb.set_trace()
        all_vecs = np.concatenate((all_vecs, vectors), axis=0)
        deviation[iid] = np.std(np.array(vectors))

        #pdb.set_trace()

    #captions
    mean_deviation = np.mean(deviation)

    print 'mean stanard deviation=%.3f' % (mean_deviation)
    print 'total stanard deviation=%.3f' % np.std(np.array(all_vecs))
    pdb.set_trace()
    return mean_deviation
コード例 #16
0
def evalrank(model, data, split='dev'):
    """
    Evaluate a trained model on either dev or test
    data options: f8k, f30k, coco
    """
    print('Loading dataset')
    if split == 'dev':
        X = load_dataset(data, load_train=False)[1]
    else:
        X = load_dataset(data, load_train=False)[2]

    print('Computing results...')
    ls = encode_sentences(model, X[0])
    lim = encode_images(model, X[1])

    #(r1, r5, r10, medr) = i2t(lim, ls)
    (r1, r5, r10, medr, meanr) = i2t(lim, ls, return_ranks=False)
    print(("Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" %
           (r1, r5, r10, medr, meanr)))
    #(r1i, r5i, r10i, medri) = t2i(lim, ls)
    (r1i, r5i, r10i, medri, meanri) = t2i(lim, ls, return_ranks=False)
    print(("Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" %
           (r1i, r5i, r10i, medri, meanri)))
コード例 #17
0
def ranking_eval_Nfold(model, n_fold=1, subset='val'):
    """
    Evaluate a trained model on either val or test of the dataset it was trained on
    Evaluate separately on n_fold image splits, and average the metrics
    Parameters:
    -----------
    model: dict
        Dictionay containing the parameters of the current model
    n_fold: int
        Number of image splits to be evaluated on.
        Only supported n_fold=1 with provided datasets.
    subset: str
        subset to perform the evaluation on.
        One of: 'val', 'test'

    Returns:
    --------
    results_dict: dict
        Dictionary containing the evaluaton results.
        Structured as results_dict['cap_ret', 'img_ret']['r1', 'r5', 'r10', 'medr'] 
    score: float
        Score obtained, the sum of recalls for both problems caption retrival and image retrieval.
    """

    results = []

    for fold in range(n_fold):
        print 'Loading fold ' + str(fold)
        dataset = load_dataset(dataset_name=model['options']['data'],
                               embedding=model['options']['embedding'],
                               path_to_data=model['options']['data_path'],
                               test_subset=model['options']['test_subset'],
                               load_train=False,
                               fold=fold)
        caps, ims = Datasource(dataset[subset], model['worddict']).all()

        print 'Computing results...'
        c_emb = tools.encode_sentences(model, caps)
        i_emb = tools.encode_images(model, ims)

        errs = tools.compute_errors(model, c_emb, i_emb)

        r = t2i(errs)
        print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple(r)

        ri = i2t(errs)
        print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple(ri)
        results.append(r + ri)

    print("-----------------------------------")
    print("Mean metrics: ")
    mean_metrics = numpy.array(results).mean(axis=0).flatten()
    print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple(
        mean_metrics[:5])
    print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % tuple(
        mean_metrics[5:])

    # Join everything in a dict
    results_dict = OrderedDict([('cap_ret', OrderedDict([])),
                                ('img_ret', OrderedDict([]))])
    # Caption retrieval (image to text)
    results_dict["cap_ret"]["r1"] = mean_metrics[5]
    results_dict["cap_ret"]["r5"] = mean_metrics[6]
    results_dict["cap_ret"]["r10"] = mean_metrics[7]
    results_dict["cap_ret"]["medr"] = mean_metrics[8]
    # Image retrieval (text to image)
    results_dict["img_ret"]["r1"] = mean_metrics[0]
    results_dict["img_ret"]["r5"] = mean_metrics[1]
    results_dict["img_ret"]["r10"] = mean_metrics[2]
    results_dict["img_ret"]["medr"] = mean_metrics[3]
    score = mean_metrics[0:3].sum() + mean_metrics[5:8].sum()
    return results_dict, score
コード例 #18
0
def trainer(data='coco',
            margin=0.2,
            dim=1024,
            dim_image=4096,
            dim_word=300,
            max_epochs=15,
            encoder='lstm',
            dispFreq=10,
            grad_clip=2.0,
            maxlen_w=150,
            batch_size=128,
            saveto='vse/coco',
            validFreq=100,
            early_stop=20,
            lrate=0.0002,
            reload_=False):

    # Model options
    model_options = {}
    model_options['data'] = data
    model_options['margin'] = margin
    model_options['dim'] = dim
    model_options['dim_image'] = dim_image
    model_options['dim_word'] = dim_word
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['grad_clip'] = grad_clip
    model_options['maxlen_w'] = maxlen_w
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['validFreq'] = validFreq
    model_options['lrate'] = lrate
    model_options['reload_'] = reload_

    print model_options

    # reload options
    if reload_ and os.path.exists(saveto):
        print 'reloading...' + saveto
        with open('%s.pkl' % saveto, 'rb') as f:
            model_options = pkl.load(f)

    # Load training and development sets
    print 'loading dataset'
    train, dev = load_dataset(data)

    # Create and save dictionary
    print 'Create dictionary'
    worddict = build_dictionary(train[0] + dev[0])[0]
    n_words = len(worddict)
    model_options['n_words'] = n_words
    print 'Dictionary size: ' + str(n_words)
    with open('%s.dictionary.pkl' % saveto, 'wb') as f:
        pkl.dump(worddict, f)

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    model_options['worddict'] = worddict
    model_options['word_idict'] = word_idict

    # Each sentence in the minibatch have same length (for encoder)
    train_iter = homogeneous_data.HomogeneousData([train[0], train[1]],
                                                  batch_size=batch_size,
                                                  maxlen=maxlen_w)

    img_sen_model = ImgSenRanking(model_options)
    img_sen_model = img_sen_model.cuda()

    loss_fn = PairwiseRankingLoss(margin=margin)
    loss_fn = loss_fn.cuda()

    params = filter(lambda p: p.requires_grad, img_sen_model.parameters())
    optimizer = torch.optim.Adam(params, lrate)

    uidx = 0
    curr = 0.0
    n_samples = 0

    # For Early-stopping
    best_r1, best_r5, best_r10, best_medr = 0.0, 0.0, 0.0, 0
    best_r1i, best_r5i, best_r10i, best_medri = 0.0, 0.0, 0.0, 0
    best_step = 0

    for eidx in xrange(max_epochs):

        print 'Epoch ', eidx

        for x, im in train_iter:
            n_samples += len(x)
            uidx += 1

            x, im = homogeneous_data.prepare_data(x,
                                                  im,
                                                  worddict,
                                                  maxlen=maxlen_w,
                                                  n_words=n_words)

            if x is None:
                print 'Minibatch with zero sample under length ', maxlen_w
                uidx -= 1
                continue

            x = Variable(torch.from_numpy(x).cuda())
            im = Variable(torch.from_numpy(im).cuda())
            # Update
            x, im = img_sen_model(x, im)
            cost = loss_fn(im, x)
            optimizer.zero_grad()
            cost.backward()
            torch.nn.utils.clip_grad_norm(params, grad_clip)
            optimizer.step()

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, '\tUpdate ', uidx, '\tCost ', cost.data.cpu(
                ).numpy()[0]

            if numpy.mod(uidx, validFreq) == 0:

                print 'Computing results...'
                curr_model = {}
                curr_model['options'] = model_options
                curr_model['worddict'] = worddict
                curr_model['word_idict'] = word_idict
                curr_model['img_sen_model'] = img_sen_model

                ls, lim = encode_sentences(curr_model, dev[0]), encode_images(
                    curr_model, dev[1])

                r_time = time.time()
                (r1, r5, r10, medr) = i2t(lim, ls)
                print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10,
                                                                 medr)
                (r1i, r5i, r10i, medri) = t2i(lim, ls)
                print "Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i,
                                                                 r10i, medri)

                print "Cal Recall@K using %ss" % (time.time() - r_time)

                curr_step = uidx / validFreq

                currscore = r1 + r5 + r10 + r1i + r5i + r10i
                if currscore > curr:
                    curr = currscore
                    best_r1, best_r5, best_r10, best_medr = r1, r5, r10, medr
                    best_r1i, best_r5i, best_r10i, best_medri = r1i, r5i, r10i, medri
                    best_step = curr_step

                    # Save model
                    print 'Saving model...',
                    pkl.dump(
                        model_options,
                        open('%s_params_%s.pkl' % (saveto, encoder), 'wb'))
                    torch.save(img_sen_model.state_dict(),
                               '%s_model_%s.pkl' % (saveto, encoder))
                    print 'Done'

                if curr_step - best_step > early_stop:
                    print 'Early stopping ...'
                    print "Image to text: %.1f, %.1f, %.1f, %.1f" % (
                        best_r1, best_r5, best_r10, best_medr)
                    print "Text to image: %.1f, %.1f, %.1f, %.1f" % (
                        best_r1i, best_r5i, best_r10i, best_medri)
                    return 0

        print 'Seen %d samples' % n_samples
コード例 #19
0
def trainer(data='coco',  #f8k, f30k, coco
            margin=0.2,
            dim=1024,
            dim_image=4096,
            dim_word=300,
            encoder='gru',  # gru OR bow
            max_epochs=15,
            dispFreq=10,
            decay_c=0.,
            grad_clip=2.,
            maxlen_w=100,
            optimizer='adam',
            batch_size = 128,
            saveto='/ais/gobi3/u/rkiros/uvsmodels/coco.npz',
            validFreq=100,
            lrate=0.0002,
            reload_=False):

    # Model options
    model_options = {}
    model_options['data'] = data
    model_options['margin'] = margin
    model_options['dim'] = dim
    model_options['dim_image'] = dim_image
    model_options['dim_word'] = dim_word
    model_options['encoder'] = encoder
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['decay_c'] = decay_c
    model_options['grad_clip'] = grad_clip
    model_options['maxlen_w'] = maxlen_w
    model_options['optimizer'] = optimizer
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['validFreq'] = validFreq
    model_options['lrate'] = lrate
    model_options['reload_'] = reload_

    print model_options

    # reload options
    if reload_ and os.path.exists(saveto):
        print 'reloading...' + saveto
        with open('%s.pkl'%saveto, 'rb') as f:
            models_options = pkl.load(f)

    # Load training and development sets
    print 'Loading dataset'
    train, dev = load_dataset(data)[:2]

    # Create and save dictionary
    print 'Creating dictionary'
    worddict = build_dictionary(train[0]+dev[0])[0]
    n_words = len(worddict)
    model_options['n_words'] = n_words
    print 'Dictionary size: ' + str(n_words)
    with open('%s.dictionary.pkl'%saveto, 'wb') as f:
        pkl.dump(worddict, f)

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    print 'Building model'
    params = init_params(model_options)
    # reload parameters
    if reload_ and os.path.exists(saveto):
        params = load_params(saveto, params)

    tparams = init_tparams(params)

    trng, inps, cost = build_model(tparams, model_options)

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, cost, profile=False)
    print 'Done'

    # weight decay, if applicable
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv ** 2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # after any regularizer
    print 'Building f_cost...',
    f_cost = theano.function(inps, cost, profile=False)
    print 'Done'

    print 'Building sentence encoder'
    trng, inps_se, sentences = build_sentence_encoder(tparams, model_options)
    f_senc = theano.function(inps_se, sentences, profile=False)

    print 'Building image encoder'
    trng, inps_ie, images = build_image_encoder(tparams, model_options)
    f_ienc = theano.function(inps_ie, images, profile=False)

    print 'Building f_grad...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False)
    f_weight_norm = theano.function([], [(t**2).sum() for k,t in tparams.iteritems()], profile=False)

    if grad_clip > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(tensor.switch(g2 > (grad_clip**2),
                                           g / tensor.sqrt(g2) * grad_clip,
                                           g))
        grads = new_grads

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)

    print 'Optimization'

    # Each sentence in the minibatch have same length (for encoder)
    train_iter = homogeneous_data.HomogeneousData([train[0], train[1]], batch_size=batch_size, maxlen=maxlen_w)

    uidx = 0
    curr = 0.
    n_samples = 0
    
    for eidx in xrange(max_epochs):

        print 'Epoch ', eidx

        for x, im in train_iter:
            n_samples += len(x)
            uidx += 1

            x, mask, im = homogeneous_data.prepare_data(x, im, worddict, maxlen=maxlen_w, n_words=n_words)

            if x == None:
                print 'Minibatch with zero sample under length ', maxlen_w
                uidx -= 1
                continue

            # Update
            ud_start = time.time()
            cost = f_grad_shared(x, mask, im)
            f_update(lrate)
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud

            if numpy.mod(uidx, validFreq) == 0:

                print 'Computing results...'
                curr_model = {}
                curr_model['options'] = model_options
                curr_model['worddict'] = worddict
                curr_model['word_idict'] = word_idict
                curr_model['f_senc'] = f_senc
                curr_model['f_ienc'] = f_ienc

                ls = encode_sentences(curr_model, dev[0])
                lim = encode_images(curr_model, dev[1])

                (r1, r5, r10, medr) = i2t(lim, ls)
                print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr)
                (r1i, r5i, r10i, medri) = t2i(lim, ls)
                print "Text to image: %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri)

                currscore = r1 + r5 + r10 + r1i + r5i + r10i
                if currscore > curr:
                    curr = currscore

                    # Save model
                    print 'Saving...',
                    params = unzip(tparams)
                    numpy.savez(saveto, **params)
                    pkl.dump(model_options, open('%s.pkl'%saveto, 'wb'))
                    print 'Done'

        print 'Seen %d samples'%n_samples
コード例 #20
0
def trainer(
        data='coco',  #f8k, f30k, coco
        margin=0.2,
        dim=1024,
        dim_image=4096,
        dim_word=300,
        encoder='gru',  # gru OR bow
        max_epochs=15,
        dispFreq=10,
        decay_c=0.,
        grad_clip=2.,
        maxlen_w=100,
        optimizer='adam',
        batch_size=128,
        saveto='/ais/gobi3/u/rkiros/uvsmodels/coco.npz',
        validFreq=100,
        lrate=0.0002,
        reload_=False):

    # Model options
    model_options = {}
    model_options['data'] = data
    model_options['margin'] = margin
    model_options['dim'] = dim
    model_options['dim_image'] = dim_image
    model_options['dim_word'] = dim_word
    model_options['encoder'] = encoder
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['decay_c'] = decay_c
    model_options['grad_clip'] = grad_clip
    model_options['maxlen_w'] = maxlen_w
    model_options['optimizer'] = optimizer
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['validFreq'] = validFreq
    model_options['lrate'] = lrate
    model_options['reload_'] = reload_

    print(model_options)

    # reload options
    if reload_ and os.path.exists(saveto):
        print('reloading...' + saveto)
        with open('%s.pkl' % saveto, 'rb') as f:
            models_options = pkl.load(f)

    # Load training and development sets
    print('Loading dataset')
    train, dev = load_dataset(data)[:2]

    # Create and save dictionary
    print('Creating dictionary')
    worddict = build_dictionary(train[0] + dev[0])[0]
    n_words = len(worddict)
    model_options['n_words'] = n_words
    print('Dictionary size: ' + str(n_words))
    with open('%s.dictionary.pkl' % saveto, 'wb') as f:
        pkl.dump(worddict, f)

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    print('Building model')
    params = init_params(model_options)
    # reload parameters
    if reload_ and os.path.exists(saveto):
        params = load_params(saveto, params)

    tparams = init_tparams(params)

    trng, inps, cost = build_model(tparams, model_options)

    # before any regularizer
    print('Building f_log_probs...', )
    f_log_probs = theano.function(inps, cost, profile=False)
    print('Done')

    # weight decay, if applicable
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv**2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    # after any regularizer
    print('Building f_cost...', )
    f_cost = theano.function(inps, cost, profile=False)
    print('Done')

    print('Building sentence encoder')
    trng, inps_se, sentences = build_sentence_encoder(tparams, model_options)
    f_senc = theano.function(inps_se, sentences, profile=False)

    print('Building image encoder')
    trng, inps_ie, images = build_image_encoder(tparams, model_options)
    f_ienc = theano.function(inps_ie, images, profile=False)

    print('Building f_grad...', )
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads],
                                  profile=False)
    f_weight_norm = theano.function([], [(t**2).sum()
                                         for k, t in tparams.iteritems()],
                                    profile=False)

    if grad_clip > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(
                tensor.switch(g2 > (grad_clip**2),
                              g / tensor.sqrt(g2) * grad_clip, g))
        grads = new_grads

    lr = tensor.scalar(name='lr')
    print('Building optimizers...', )
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)

    print('Optimization')

    # Each sentence in the minibatch have same length (for encoder)
    train_iter = homogeneous_data.HomogeneousData([train[0], train[1]],
                                                  batch_size=batch_size,
                                                  maxlen=maxlen_w)

    uidx = 0
    curr = 0.
    n_samples = 0

    for eidx in xrange(max_epochs):

        print('Epoch ', eidx)

        for x, im in train_iter:
            n_samples += len(x)
            uidx += 1

            x, mask, im = homogeneous_data.prepare_data(x,
                                                        im,
                                                        worddict,
                                                        maxlen=maxlen_w,
                                                        n_words=n_words)

            if x == None:
                print('Minibatch with zero sample under length ', maxlen_w)
                uidx -= 1
                continue

            # Update
            ud_start = time.time()
            cost = f_grad_shared(x, mask, im)
            f_update(lrate)
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print('NaN detected')
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print('Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ',
                      ud)

            if numpy.mod(uidx, validFreq) == 0:

                print('Computing results...')
                curr_model = {}
                curr_model['options'] = model_options
                curr_model['worddict'] = worddict
                curr_model['word_idict'] = word_idict
                curr_model['f_senc'] = f_senc
                curr_model['f_ienc'] = f_ienc

                ls = encode_sentences(curr_model, dev[0])
                lim = encode_images(curr_model, dev[1])

                (r1, r5, r10, medr) = i2t(lim, ls)
                print("Image to text: %.1f, %.1f, %.1f, %.1f" %
                      (r1, r5, r10, medr))
                (r1i, r5i, r10i, medri) = t2i(lim, ls)
                print("Text to image: %.1f, %.1f, %.1f, %.1f" %
                      (r1i, r5i, r10i, medri))

                currscore = r1 + r5 + r10 + r1i + r5i + r10i
                if currscore > curr:
                    curr = currscore

                    # Save model
                    print('Saving...', )
                    params = unzip(tparams)
                    numpy.savez(saveto, **params)
                    pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'))
                    print('Done')

        print('Seen %d samples' % n_samples)
コード例 #21
0
ファイル: train.py プロジェクト: zclfly/order-embedding
def trainer(load_from=None, save_dir='snapshots', name='anon', **kwargs):
    """
    :param load_from: location to load parameters + options from
    :param name: name of model, used as location to save parameters + options
    """

    curr_model = dict()

    # load old model, including parameters, but overwrite with new options
    if load_from:
        print 'reloading...' + load_from
        with open('%s.pkl' % load_from, 'rb') as f:
            curr_model = pkl.load(f)
    else:
        curr_model['options'] = {}

    for k, v in kwargs.iteritems():
        curr_model['options'][k] = v

    model_options = curr_model['options']

    # initialize logger
    import datetime
    timestampedName = datetime.datetime.now().strftime(
        '%Y_%m_%d_%H_%M_%S') + '_' + name

    from logger import Log
    log = Log(name=timestampedName,
              hyperparams=model_options,
              saveDir='vis/training',
              xLabel='Examples Seen',
              saveFrequency=1)

    print curr_model['options']

    # Load training and development sets
    print 'Loading dataset'
    dataset = load_dataset(model_options['data'],
                           cnn=model_options['cnn'],
                           load_train=True)
    train = dataset['train']
    dev = dataset['dev']

    # Create dictionary
    print 'Creating dictionary'
    worddict = build_dictionary(train['caps'] + dev['caps'])
    print 'Dictionary size: ' + str(len(worddict))
    curr_model['worddict'] = worddict
    curr_model['options']['n_words'] = len(worddict) + 2

    # save model
    pkl.dump(curr_model, open('%s/%s.pkl' % (save_dir, name), 'wb'))

    print 'Loading data'
    train_iter = datasource.Datasource(train,
                                       batch_size=model_options['batch_size'],
                                       worddict=worddict)
    dev = datasource.Datasource(dev, worddict=worddict)
    dev_caps, dev_ims = dev.all()

    print 'Building model'
    params = init_params(model_options)
    # reload parameters
    if load_from is not None and os.path.exists(load_from):
        params = load_params(load_from, params)

    tparams = init_tparams(params)

    inps, cost = build_model(tparams, model_options)

    print 'Building sentence encoder'
    inps_se, sentences = build_sentence_encoder(tparams, model_options)
    f_senc = theano.function(inps_se, sentences, profile=False)

    print 'Building image encoder'
    inps_ie, images = build_image_encoder(tparams, model_options)
    f_ienc = theano.function(inps_ie, images, profile=False)

    print 'Building f_grad...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))

    print 'Building errors..'
    inps_err, errs = build_errors(model_options)
    f_err = theano.function(inps_err, errs, profile=False)

    curr_model['f_senc'] = f_senc
    curr_model['f_ienc'] = f_ienc
    curr_model['f_err'] = f_err

    if model_options['grad_clip'] > 0.:
        grads = [maxnorm(g, model_options['grad_clip']) for g in grads]

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(model_options['optimizer'])(lr, tparams,
                                                               grads, inps,
                                                               cost)

    print 'Optimization'

    uidx = 0
    curr = 0
    n_samples = 0

    for eidx in xrange(model_options['max_epochs']):

        print 'Epoch ', eidx

        for x, mask, im in train_iter:
            n_samples += x.shape[1]
            uidx += 1

            # Update
            ud_start = time.time()
            cost = f_grad_shared(x, mask, im)
            f_update(model_options['lrate'])
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, model_options['dispFreq']) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud
                log.update({'Error': float(cost)}, n_samples)

            if numpy.mod(uidx, model_options['validFreq']) == 0:

                print 'Computing results...'

                # encode sentences efficiently
                dev_s = encode_sentences(
                    curr_model,
                    dev_caps,
                    batch_size=model_options['batch_size'])
                dev_i = encode_images(curr_model, dev_ims)

                # compute errors
                dev_errs = compute_errors(curr_model, dev_s, dev_i)

                # compute ranking error
                (r1, r5, r10, medr, meanr), vis_details = t2i(dev_errs,
                                                              vis_details=True)
                (r1i, r5i, r10i, medri, meanri) = i2t(dev_errs)
                print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % (
                    r1, r5, r10, medr, meanr)
                log.update(
                    {
                        'R@1': r1,
                        'R@5': r5,
                        'R@10': r10,
                        'median_rank': medr,
                        'mean_rank': meanr
                    }, n_samples)
                print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % (
                    r1i, r5i, r10i, medri, meanri)
                log.update(
                    {
                        'Image2Caption_R@1': r1i,
                        'Image2Caption_R@5': r5i,
                        'Image2CaptionR@10': r10i,
                        'Image2Caption_median_rank': medri,
                        'Image2Caption_mean_rank': meanri
                    }, n_samples)

                tot = r1 + r5 + r10
                if tot > curr:
                    curr = tot
                    # Save parameters
                    print 'Saving...',
                    numpy.savez('%s/%s' % (save_dir, name), **unzip(tparams))
                    print 'Done'
                    vis_details['hyperparams'] = model_options
                    # Save visualization details
                    with open(
                            'vis/roc/%s/%s.json' %
                        (model_options['data'], timestampedName), 'w') as f:
                        json.dump(vis_details, f)
                    # Add the new model to the index
                    try:
                        index = json.load(open('vis/roc/index.json', 'r'))
                    except IOError:
                        index = {model_options['data']: []}

                    models = index[model_options['data']]
                    if timestampedName not in models:
                        models.append(timestampedName)

                    with open('vis/roc/index.json', 'w') as f:
                        json.dump(index, f)

        print 'Seen %d samples' % n_samples
コード例 #22
0
ファイル: train.py プロジェクト: ivendrov/order-embedding
def trainer(load_from=None, save_dir="snapshots", name="anon", **kwargs):
    """
    :param load_from: location to load parameters + options from
    :param name: name of model, used as location to save parameters + options
    """

    curr_model = dict()

    # load old model, including parameters, but overwrite with new options
    if load_from:
        print "reloading..." + load_from
        with open("%s.pkl" % load_from, "rb") as f:
            curr_model = pkl.load(f)
    else:
        curr_model["options"] = {}

    for k, v in kwargs.iteritems():
        curr_model["options"][k] = v

    model_options = curr_model["options"]

    # initialize logger
    import datetime

    timestampedName = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S") + "_" + name

    from logger import Log

    log = Log(
        name=timestampedName, hyperparams=model_options, saveDir="vis/training", xLabel="Examples Seen", saveFrequency=1
    )

    print curr_model["options"]

    # Load training and development sets
    print "Loading dataset"
    dataset = load_dataset(model_options["data"], cnn=model_options["cnn"], load_train=True)
    train = dataset["train"]
    dev = dataset["dev"]

    # Create dictionary
    print "Creating dictionary"
    worddict = build_dictionary(train["caps"] + dev["caps"])
    print "Dictionary size: " + str(len(worddict))
    curr_model["worddict"] = worddict
    curr_model["options"]["n_words"] = len(worddict) + 2

    # save model
    pkl.dump(curr_model, open("%s/%s.pkl" % (save_dir, name), "wb"))

    print "Loading data"
    train_iter = datasource.Datasource(train, batch_size=model_options["batch_size"], worddict=worddict)
    dev = datasource.Datasource(dev, worddict=worddict)
    dev_caps, dev_ims = dev.all()

    print "Building model"
    params = init_params(model_options)
    # reload parameters
    if load_from is not None and os.path.exists(load_from):
        params = load_params(load_from, params)

    tparams = init_tparams(params)

    inps, cost = build_model(tparams, model_options)

    print "Building sentence encoder"
    inps_se, sentences = build_sentence_encoder(tparams, model_options)
    f_senc = theano.function(inps_se, sentences, profile=False)

    print "Building image encoder"
    inps_ie, images = build_image_encoder(tparams, model_options)
    f_ienc = theano.function(inps_ie, images, profile=False)

    print "Building f_grad...",
    grads = tensor.grad(cost, wrt=itemlist(tparams))

    print "Building errors.."
    inps_err, errs = build_errors(model_options)
    f_err = theano.function(inps_err, errs, profile=False)

    curr_model["f_senc"] = f_senc
    curr_model["f_ienc"] = f_ienc
    curr_model["f_err"] = f_err

    if model_options["grad_clip"] > 0.0:
        grads = [maxnorm(g, model_options["grad_clip"]) for g in grads]

    lr = tensor.scalar(name="lr")
    print "Building optimizers...",
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(model_options["optimizer"])(lr, tparams, grads, inps, cost)

    print "Optimization"

    uidx = 0
    curr = 0
    n_samples = 0

    for eidx in xrange(model_options["max_epochs"]):

        print "Epoch ", eidx

        for x, mask, im in train_iter:
            n_samples += x.shape[1]
            uidx += 1

            # Update
            ud_start = time.time()
            cost = f_grad_shared(x, mask, im)
            f_update(model_options["lrate"])
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print "NaN detected"
                return 1.0, 1.0, 1.0

            if numpy.mod(uidx, model_options["dispFreq"]) == 0:
                print "Epoch ", eidx, "Update ", uidx, "Cost ", cost, "UD ", ud
                log.update({"Error": float(cost)}, n_samples)

            if numpy.mod(uidx, model_options["validFreq"]) == 0:

                print "Computing results..."

                # encode sentences efficiently
                dev_s = encode_sentences(curr_model, dev_caps, batch_size=model_options["batch_size"])
                dev_i = encode_images(curr_model, dev_ims)

                # compute errors
                dev_errs = compute_errors(curr_model, dev_s, dev_i)

                # compute ranking error
                (r1, r5, r10, medr, meanr), vis_details = t2i(dev_errs, vis_details=True)
                (r1i, r5i, r10i, medri, meanri) = i2t(dev_errs)
                print "Text to image (dev set): %.1f, %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr, meanr)
                log.update({"R@1": r1, "R@5": r5, "R@10": r10, "median_rank": medr, "mean_rank": meanr}, n_samples)
                print "Image to text (dev set): %.1f, %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri, meanri)
                log.update(
                    {
                        "Image2Caption_R@1": r1i,
                        "Image2Caption_R@5": r5i,
                        "Image2CaptionR@10": r10i,
                        "Image2Caption_median_rank": medri,
                        "Image2Caption_mean_rank": meanri,
                    },
                    n_samples,
                )

                tot = r1 + r5 + r10
                if tot > curr:
                    curr = tot
                    # Save parameters
                    print "Saving...",
                    numpy.savez("%s/%s" % (save_dir, name), **unzip(tparams))
                    print "Done"
                    vis_details["hyperparams"] = model_options
                    # Save visualization details
                    with open("vis/roc/%s/%s.json" % (model_options["data"], timestampedName), "w") as f:
                        json.dump(vis_details, f)
                    # Add the new model to the index
                    try:
                        index = json.load(open("vis/roc/index.json", "r"))
                    except IOError:
                        index = {model_options["data"]: []}

                    models = index[model_options["data"]]
                    if timestampedName not in models:
                        models.append(timestampedName)

                    with open("vis/roc/index.json", "w") as f:
                        json.dump(index, f)

        print "Seen %d samples" % n_samples
コード例 #23
0
ファイル: kip_process.py プロジェクト: truth4oll/kip-bubbles
        '$exists': True
    },
    'name': {
        '$exists': True
    }
}, limit=10000, projection=['description', 'name'])

words = map(lambda i: ((i['name'] + ' ' + i['description']).lower()), items)


#
# load the model
#
model = tools.load_model()

sentence_vectors = tools.encode_sentences(model, words, verbose=True)

print sentence_vectors.shape

from sklearn.neighbors import BallTree

print 'building ball tree'
tree = BallTree(sentence_vectors)

print 'finding nearest neighbor for ' + words[1]
dist, ind = tree.query(sentence_vectors[1], k=3)
print ind

print 'was ' + words[ind[0][0]]

コード例 #24
0
# -*- coding: utf-8 -*-
"""
Created on Sun Mar 19 09:22:17 2017

@author: chahak
"""

import demo, tools, datasets
net = demo.build_convnet()
model = tools.load_model()
train = datasets.load_dataset('f8k', load_train=True)[0]
vectors = tools.encode_sentences(model, train[0], verbose=False)
demo.retrieve_captions(model, net, train[0], vectors, 'child.jpg', k=5)
コード例 #25
0
model_params = '%s_model.pkl' % loadfrom

print 'Building model ...   ',
model_options = pkl.load(open(hyper_params, 'r'))
model = ImgSenRanking(model_options).cuda()
model.load_state_dict(torch.load(model_params))
print 'Done'

test = load_dataset(data, load_test=True)

print 'Dumping data ...   '

curr_model = {}
curr_model['options'] = model_options
curr_model['worddict'] = model_options['worddict']
curr_model['word_idict'] = model_options['word_idict']
curr_model['img_sen_model'] = model

ls, lim = encode_sentences(curr_model,
                           test[0]), encode_images(curr_model, test[1])

# save the using params and model when dumping data
torch.save(ls, '%s_ls.pkl' % saveto)
torch.save(lim, '%s_lim.pkl' % saveto)
pkl.dump(model_options, open('%s_params_dump.pkl' % saveto, 'wb'))
torch.save(model.state_dict(), '%s_model_dump.pkl' % saveto)
json.dump(test[0], open('%s_caps.json' % saveto, 'w'))

print 'ls: ', ls.data.size()
print 'lim: ', lim.data.size()
コード例 #26
0
import tools, evaluation, os

# Hey Kipster!  For this to work, use a python virtualenv
# and pip install -r requirements.txt in IF-root
# you might also need to install numpy or gfortran with your os pkg manager

# First lets make sure the model kinda works
__dirname = os.path.dirname(os.path.realpath(__file__))
model = tools.load_model(__dirname + '/data/coco.npz')
evaluation.evalrank(model, data='coco', split='test')

# Now lets compute sentence vecs for something specific
example_sentences = [
    'black tie women', 'warm winter coat',
    'long dressi gown tuxedo cocktail black_ti'
]
sentence_vectors = tools.encode_sentences(model,
                                          example_sentences,
                                          verbose=True)

print sentence_vectors.shape
print sentence_vectors[0].shape
コード例 #27
0
ファイル: train.py プロジェクト: Peratham/order-embedding
def trainer(load_from=None,
            save_dir='snapshots',
            name='anon',
            **kwargs):
    """
    :param load_from: location to load parameters + options from
    :param name: name of model, used as location to save parameters + options
    """

    curr_model = dict()

    # load old model, including parameters, but overwrite with new options
    if load_from:
        print 'reloading...' + load_from
        with open('%s.pkl'%load_from, 'rb') as f:
            curr_model = pkl.load(f)
    else:
        curr_model['options'] = {}

    for k, v in kwargs.iteritems():
        curr_model['options'][k] = v

    model_options = curr_model['options']

    # initialize logger
    import datetime
    timestampedName = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S') + '_' + name

    from logger import Log
    log = Log(name=timestampedName, hyperparams=model_options, saveDir='vis/training',
              xLabel='Examples Seen', saveFrequency=1)


    print curr_model['options']




    # Load training and development sets
    print 'Loading dataset'
    dataset = load_dataset(model_options['data'], cnn=model_options['cnn'], load_train=True)
    train = dataset['train']
    dev = dataset['dev']

    # Create dictionary
    print 'Creating dictionary'
    worddict = build_dictionary(train['caps']+dev['caps'])
    print 'Dictionary size: ' + str(len(worddict))
    curr_model['worddict'] = worddict
    curr_model['options']['n_words'] = len(worddict) + 2

    # save model
    pkl.dump(curr_model, open('%s/%s.pkl' % (save_dir, name), 'wb'))


    print 'Loading data'
    train_iter = datasource.Datasource(train, batch_size=model_options['batch_size'], worddict=worddict)
    dev = datasource.Datasource(dev, worddict=worddict)
    dev_caps, dev_ims = dev.all()

    print 'Building model'
    params = init_params(model_options)
    # reload parameters
    if load_from is not None and os.path.exists(load_from):
        params = load_params(load_from, params)

    tparams = init_tparams(params)

    inps, cost = build_model(tparams, model_options)

    print 'Building sentence encoder'
    inps_se, sentences = build_sentence_encoder(tparams, model_options)
    f_senc = theano.function(inps_se, sentences, profile=False)

    print 'Building image encoder'
    inps_ie, images = build_image_encoder(tparams, model_options)
    f_ienc = theano.function(inps_ie, images, profile=False)

    print 'Building f_grad...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))

    print 'Building errors..'
    inps_err, errs = build_errors(model_options)
    f_err = theano.function(inps_err, errs, profile=False)

    curr_model['f_senc'] = f_senc
    curr_model['f_ienc'] = f_ienc
    curr_model['f_err'] = f_err



    if model_options['grad_clip'] > 0.:
        grads = [maxnorm(g, model_options['grad_clip']) for g in grads]

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(model_options['optimizer'])(lr, tparams, grads, inps, cost)

    print 'Optimization'

    uidx = 0
    curr = 0
    n_samples = 0


    
    for eidx in xrange(model_options['max_epochs']):

        print 'Epoch ', eidx

        for x, mask, im in train_iter:
            n_samples += x.shape[1]
            uidx += 1

            # Update
            ud_start = time.time()
            cost = f_grad_shared(x, mask, im)
            f_update(model_options['lrate'])
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, model_options['dispFreq']) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud
                log.update({'Error': float(cost)}, n_samples)


            if numpy.mod(uidx, model_options['validFreq']) == 0:

                print 'Computing results...'

                # encode sentences efficiently
                dev_s = encode_sentences(curr_model, dev_caps, batch_size=model_options['batch_size'])
                dev_i = encode_images(curr_model, dev_ims)


                # compute errors
                dev_errs = compute_errors(curr_model, dev_s, dev_i)

                # compute ranking error
                (r1, r5, r10, medr, meanr), vis_details = t2i(dev_errs, vis_details=True)
                (r1i, r5i, r10i, medri, meanri) = i2t(dev_errs)
                print "Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr, meanr)
                log.update({'R@1': r1, 'R@5': r5, 'R@10': r10, 'median_rank': medr, 'mean_rank': meanr}, n_samples)
                print "Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" % (r1i, r5i, r10i, medri, meanri)
                log.update({'Image2Caption_R@1': r1i, 'Image2Caption_R@5': r5i, 'Image2CaptionR@10': r10i, 'Image2Caption_median_rank': medri, 'Image2Caption_mean_rank': meanri}, n_samples)

                tot = r1 + r5 + r10
                if tot > curr:
                    curr = tot
                    # Save parameters
                    print 'Saving...',
                    numpy.savez('%s/%s'%(save_dir, name), **unzip(tparams))
                    print 'Done'
                    vis_details['hyperparams'] = model_options
                    # Save visualization details
                    with open('vis/roc/%s/%s.json' % (model_options['data'], timestampedName), 'w') as f:
                        json.dump(vis_details, f)
                    # Add the new model to the index
                    index = json.load(open('vis/roc/index.json', 'r'))
                    models = index[model_options['data']]
                    if timestampedName not in models:
                        models.append(timestampedName)

                    with open('vis/roc/index.json', 'w') as f:
                        json.dump(index, f)






        print 'Seen %d samples'%n_samples
コード例 #28
0
def train(margin=0.2,
          dim=300,
          dim_word=300,
          max_epochs=100,
          dispFreq=50,
          validFreq=200,
          grad_clip=2.0,
          maxlen_w=150,
          batch_size=300,
          early_stop=20,
          lrate=0.001,
          reload_=False,
          load_dict=False):
    # Model options
    model_options = {}
    model_options['UM_Corpus'] = data
    model_options['margin'] = margin
    model_options['dim'] = dim
    model_options['dim_word'] = dim_word
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['grad_clip'] = grad_clip
    model_options['maxlen_w'] = maxlen_w
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['validFreq'] = validFreq
    model_options['lrate'] = lrate
    model_options['reload_'] = reload_

    print(model_options)

    # reload options
    if reload_ and os.path.exists(saveto):
        print('reloading...' + saveto)
        with open('%s.pkl' % saveto, 'rb') as f:
            model_options = pkl.load(f)

    # Load training and development sets
    print('loading dataset')
    train, dev = load_dataset()

    print 'training samples: ', len(train[0])
    print 'development samples: ', len(dev[0])

    if load_dict:
        with open('%s.dictionary.pkl' % saveto, 'rb') as f:
            worddict = pkl.load(f)
            n_words = len(worddict)
            model_options['n_words'] = n_words
            print('Dictionary size: ', n_words)
    else:
        # Create and save dictionary
        print('Create dictionary')

        worddict = build_dictionary(train[0] + train[1] + dev[0] + dev[1])
        n_words = len(worddict)
        model_options['n_words'] = n_words
        print('Dictionary size: ', n_words)
        with open('%s.dictionary_%s.pkl' % (saveto, run), 'wb') as f:
            pkl.dump(worddict, f)

    # # Inverse dictionary
    # word_idict = dict()
    # for kk, vv in worddict.iteritems():
    #     word_idict[vv] = kk
    # word_idict[0] = '<eos>'
    # word_idict[1] = 'UNK'

    model_options['worddict'] = worddict
    # model_options['word_idict'] = word_idict

    # # Each sentence in the minibatch have same length (for encoder)
    # train_iter = HomogeneousData([train[0], train[1]], batch_size=batch_size, maxlen=maxlen_w)

    share_model = LIUMCVC_Encoder(model_options)
    share_model = share_model.cuda()

    loss_fn = PairwiseRankingLoss(margin=margin)
    loss_fn = loss_fn.cuda()

    params = filter(lambda p: p.requires_grad, share_model.parameters())
    optimizer = torch.optim.Adam(params, lrate)

    # decrease learning rate
    scheduler = ReduceLROnPlateau(optimizer, factor=0.1, patience=10)

    uidx = 0
    curr = 1e10
    n_samples = 0

    # For Early-stopping
    best_step = 0

    for eidx in xrange(1, max_epochs + 1):

        print('Epoch ', eidx)

        train_data_index = prepare_data(train, worddict)
        for en, cn, en_lengths, cn_lengths, en_index, cn_index in data_generator(
                train_data_index, batch_size):
            uidx += 1
            n_samples += len(en)
            en, cn = share_model(en, en_lengths, en_index, cn, cn_lengths,
                                 cn_index)

            loss = loss_fn(en, cn)
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm(params, grad_clip)
            optimizer.step()

            if np.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, '\tUpdate ', uidx, '\tCost ', loss.data.cpu(
                ).numpy()[0]

            if np.mod(uidx, validFreq) == 0:

                print 'Computing results...'
                share_model.eval()
                curr_model = {}
                curr_model['options'] = model_options
                curr_model['worddict'] = worddict
                # curr_model['word_idict'] = word_idict
                curr_model['en_cn_model'] = share_model

                r_time = time.time()
                fen, fcn = encode_sentences(curr_model, dev, test=True)
                score = devloss(fen, fcn, margin=margin)

                print "Cal Recall@K using %ss" % (time.time() - r_time)
                share_model.train()

                curr_step = uidx / validFreq

                # scheduler.step(score)

                currscore = score
                print 'loss on dev', score
                if currscore < curr:
                    curr = currscore
                    best_step = curr_step

                    # Save model
                    print 'Saving model...',
                    pkl.dump(model_options,
                             open('%s_params_%s.pkl' % (saveto, run), 'wb'))
                    torch.save(share_model.state_dict(),
                               '%s_model_%s.pkl' % (saveto, run))
                    print 'Done'

                if curr_step - best_step > early_stop:
                    print 'Early stopping ...'
                    print
                    return

        print 'Seen %d samples' % n_samples
コード例 #29
0
def trainer(data='coco',
            margin=0.2,
            dim=1024,
            dim_image=4096,
            dim_word=300,
            encoder='gru',
            max_epochs=15,
            dispFreq=10,
            decay_c=0.0,
            grad_clip=2.0,
            maxlen_w=150,
            batch_size=128,
            saveto='vse/coco',
            validFreq=100,
            lrate=0.0002,
            concat=True,
            reload_=False):

    hyper_params = {
        'data': data,
        'encoder': encoder,
        'batch_size': batch_size,
        'time': cur_time,
        'lrate': lrate,
        'concat': concat,
    }

    i2t_r1 = dict([('i2t_recall', 'r1')] + hyper_params.items())
    i2t_r5 = dict([('i2t_recall', 'r5')] + hyper_params.items())
    i2t_r10 = dict([('i2t_recall', 'r10')] + hyper_params.items())
    t2i_r1 = dict([('t2i_recall', 'r1')] + hyper_params.items())
    t2i_r5 = dict([('t2i_recall', 'r5')] + hyper_params.items())
    t2i_r10 = dict([('t2i_recall', 'r10')] + hyper_params.items())

    i2t_med = dict([('i2t_med', 'i2t_med')] + hyper_params.items())
    t2i_med = dict([('t2i_med', 't2i_med')] + hyper_params.items())

    agent = Agent(port=5020)
    i2t_r1_agent = agent.register(i2t_r1, 'recall', overwrite=True)
    i2t_r5_agent = agent.register(i2t_r5, 'recall', overwrite=True)
    i2t_r10_agent = agent.register(i2t_r10, 'recall', overwrite=True)
    t2i_r1_agent = agent.register(t2i_r1, 'recall', overwrite=True)
    t2i_r5_agent = agent.register(t2i_r5, 'recall', overwrite=True)
    t2i_r10_agent = agent.register(t2i_r10, 'recall', overwrite=True)

    i2t_med_agent = agent.register(i2t_med, 'median', overwrite=True)
    t2i_med_agent = agent.register(t2i_med, 'median', overwrite=True)

    # Model options
    model_options = {}
    model_options['data'] = data
    model_options['margin'] = margin
    model_options['dim'] = dim
    model_options['dim_image'] = dim_image
    model_options['dim_word'] = dim_word
    model_options['encoder'] = encoder
    model_options['max_epochs'] = max_epochs
    model_options['dispFreq'] = dispFreq
    model_options['decay_c'] = decay_c
    model_options['grad_clip'] = grad_clip
    model_options['maxlen_w'] = maxlen_w
    model_options['batch_size'] = batch_size
    model_options['saveto'] = saveto
    model_options['validFreq'] = validFreq
    model_options['lrate'] = lrate
    model_options['reload_'] = reload_
    model_options['concat'] = concat

    print model_options

    # reload options
    if reload_ and os.path.exists(saveto):
        print 'reloading...' + saveto
        with open('%s.pkl' % saveto, 'rb') as f:
            model_options = pkl.load(f)

    # Load training and development sets
    print 'loading dataset'
    train, dev = load_dataset(data)[:2]

    # Create and save dictionary
    print 'Create dictionary'
    worddict = build_dictionary(train[0] + dev[0])[0]
    n_words = len(worddict)
    model_options['n_words'] = n_words
    print 'Dictionary size: ' + str(n_words)
    with open('%s.dictionary.pkl' % saveto, 'wb') as f:
        pkl.dump(worddict, f)

    # Inverse dictionary
    word_idict = dict()
    for kk, vv in worddict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    model_options['worddict'] = worddict
    model_options['word_idict'] = word_idict

    # Each sentence in the minibatch have same length (for encoder)
    train_iter = homogeneous_data.HomogeneousData([train[0], train[1]],
                                                  batch_size=batch_size,
                                                  maxlen=maxlen_w)

    img_sen_model = ImgSenRanking(model_options)
    img_sen_model = img_sen_model.cuda()

    loss_fn = PairwiseRankingLoss(margin=margin)
    loss_fn = loss_fn.cuda()

    params = filter(lambda p: p.requires_grad, img_sen_model.parameters())
    optimizer = torch.optim.Adam(params, lrate)

    uidx = 0
    curr = 0.0
    n_samples = 0

    for eidx in xrange(max_epochs):

        print 'Epoch ', eidx

        for x, im in train_iter:
            n_samples += len(x)
            uidx += 1

            x_id, im = homogeneous_data.prepare_data(x,
                                                     im,
                                                     worddict,
                                                     maxlen=maxlen_w,
                                                     n_words=n_words)

            if x_id is None:
                print 'Minibatch with zero sample under length ', maxlen_w
                uidx -= 1
                continue

            x_id = Variable(torch.from_numpy(x_id).cuda())
            im = Variable(torch.from_numpy(im).cuda())
            # Update
            ud_start = time.time()
            x, im = img_sen_model(x_id, im, x)
            cost = loss_fn(im, x)
            optimizer.zero_grad()
            cost.backward()
            torch.nn.utils.clip_grad_norm(params, grad_clip)
            optimizer.step()
            ud = time.time() - ud_start

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost.data.cpu(
                ).numpy()[0], 'UD ', ud

            if numpy.mod(uidx, validFreq) == 0:

                print 'Computing results...'
                curr_model = {}
                curr_model['options'] = model_options
                curr_model['worddict'] = worddict
                curr_model['word_idict'] = word_idict
                curr_model['img_sen_model'] = img_sen_model

                ls, lim = encode_sentences(curr_model, dev[0]), encode_images(
                    curr_model, dev[1])

                r1, r5, r10, medr = 0.0, 0.0, 0.0, 0
                r1i, r5i, r10i, medri = 0.0, 0.0, 0.0, 0
                r_time = time.time()
                if data == 'arch' or data == 'arch_small':
                    (r1, r5, r10, medr) = i2t_arch(lim, ls)
                    print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5,
                                                                     r10, medr)
                    (r1i, r5i, r10i, medri) = t2i_arch(lim, ls)
                    print "Text to image: %.1f, %.1f, %.1f, %.1f" % (
                        r1i, r5i, r10i, medri)
                else:
                    (r1, r5, r10, medr) = i2t(lim, ls)
                    print "Image to text: %.1f, %.1f, %.1f, %.1f" % (r1, r5,
                                                                     r10, medr)
                    (r1i, r5i, r10i, medri) = t2i(lim, ls)
                    print "Text to image: %.1f, %.1f, %.1f, %.1f" % (
                        r1i, r5i, r10i, medri)

                print "Cal Recall@K using %ss" % (time.time() - r_time)

                record_num = uidx / validFreq
                agent.append(i2t_r1_agent, record_num, r1)
                agent.append(i2t_r5_agent, record_num, r5)
                agent.append(i2t_r10_agent, record_num, r10)
                agent.append(t2i_r1_agent, record_num, r1i)
                agent.append(t2i_r5_agent, record_num, r5i)
                agent.append(t2i_r10_agent, record_num, r10i)

                agent.append(i2t_med_agent, record_num, medr)
                agent.append(t2i_med_agent, record_num, medri)

                currscore = r1 + r5 + r10 + r1i + r5i + r10i
                if currscore > curr:
                    curr = currscore

                    # Save model
                    print 'Saving model...',
                    pkl.dump(
                        model_options,
                        open('%s_params_%s.pkl' % (saveto, encoder), 'wb'))
                    torch.save(img_sen_model.state_dict(),
                               '%s_model_%s.pkl' % (saveto, encoder))
                    print 'Done'

        print 'Seen %d samples' % n_samples
コード例 #30
0
def trainer(**kwargs):
    """
    Train the model according to input params
    Info about input params is available in parameters.py
    """
    # Timing
    print('Starting time:', datetime.now())
    sys.stdout.flush()
    t_start_train = time.time()

    # Model options
    # load old model, including parameters, but overwrite with new options

    # Extract model options from arguments
    model_options = {}
    for k, v in kwargs.iteritems():
        model_options[k] = v

    # Print input options
    print('PARAMETERS BEFORE LOADING:')
    for k, v in model_options.items():
        print('{:>26}: {}'.format(k, v))
    sys.stdout.flush()

    # Reload options if required
    curr_model = dict()
    if model_options['reload_']:
        # Reload model parameters
        opt_filename_reload = get_opt_filename(model_options, previous=True)
        print('reloading...', opt_filename_reload)
        sys.stdout.flush()
        try:
            with open(opt_filename_reload, 'rb') as f:
                curr_model = pkl.load(f)
        except:
            print(
                'Failed to reload parameters, try to use only feeded parameters'
            )
            curr_model['options'] = {}

        # Check if we reload from best model or last model
        if model_options['load_from'] in ['Best', 'best', 'B', 'b']:
            load_from_best = True
            print('Loading from Best saved model in validation results')
        elif model_options['load_from'] in ['Last', 'last', 'L', 'l']:
            load_from_best = False
            print('Loading from Last saved model')
        else:
            print('Unkown choice for "load_from" parameter',
                  model_options['load_from'])
            print('Please choose one of:', ['Best', 'best', 'B', 'b'],
                  ['Last', 'last', 'L', 'l'])
            print('Using Last as default')
            load_from_best = False

        # Reload end-point parameters
        state_filename = get_sol_filename(model_options,
                                          best=load_from_best,
                                          previous=True)
        print('reloading...', state_filename)
        sys.stdout.flush()
        try:
            with open(state_filename, 'rb') as f:
                state_params = pkl.load(f)
            if load_from_best:
                init_epoch = state_params['epoch']
                solution = state_params
            else:
                init_epoch = state_params['epoch_done'] + 1
                solution = state_params['solution']
            best_val_score = solution['best_val_score']
            n_samples = solution['samples_seen']
        except:
            print('Failed to reload state parameters, starting from 0')
            init_epoch = 0
            best_val_score = 0
            n_samples = 0

    else:
        curr_model['options'] = {}
        init_epoch = 0
        best_val_score = 0
        n_samples = 0

    # Overwrite loaded options with input options
    for k, v in kwargs.iteritems():
        curr_model['options'][k] = v
    model_options = curr_model['options']

    # Print final options loaded
    if model_options['reload_']:
        print('PARAMETERS AFTER LOADING:')
        for k, v in model_options.items():
            print('{:>26}: {}'.format(k, v))
        sys.stdout.flush()

    # Load training and development sets
    print('Loading dataset')
    sys.stdout.flush()

    dataset = load_dataset(dataset_name=model_options['data'],
                           embedding=model_options['embedding'],
                           path_to_data=model_options['data_path'],
                           test_subset=model_options['test_subset'],
                           load_train=True,
                           fold=0)
    train = dataset['train']
    dev = dataset['val']

    # Create word dictionary
    print('Creating dictionary')
    sys.stdout.flush()
    worddict = build_dictionary(train['caps'] + dev['caps'])
    print('Dictionary size: ' + str(len(worddict)))
    sys.stdout.flush()
    curr_model['worddict'] = worddict
    curr_model['options']['n_words'] = len(worddict) + 2

    # save model
    opt_filename_save = get_opt_filename(model_options, previous=False)
    print('Saving model parameters in', opt_filename_save)
    sys.stdout.flush()
    try:
        os.makedirs(os.path.dirname(opt_filename_save))
    except:
        pass
    pkl.dump(curr_model, open(opt_filename_save, 'wb'))

    # Load data from dataset
    print('Loading data')
    sys.stdout.flush()
    train_iter = datasource.Datasource(train,
                                       batch_size=model_options['batch_size'],
                                       worddict=worddict)
    dev = datasource.Datasource(dev, worddict=worddict)
    dev_caps, dev_ims = dev.all()

    print('Building model')
    sys.stdout.flush()
    params = init_params(model_options)

    # reload network parameters, ie. weights
    if model_options['reload_']:
        params_filename = get_npz_filename(model_options,
                                           best=load_from_best,
                                           previous=True)
        params = load_params(params_filename, params)

    tparams = init_tparams(params)
    inps, cost = build_model(tparams, model_options)

    print('Building sentence encoder')
    sys.stdout.flush()
    inps_se, sentences = build_sentence_encoder(tparams, model_options)
    f_senc = theano.function(inps_se, sentences, profile=False)

    print('Building image encoder')
    sys.stdout.flush()
    inps_ie, images = build_image_encoder(tparams, model_options)
    f_ienc = theano.function(inps_ie, images, profile=False)

    print('Building f_grad...')
    sys.stdout.flush()
    grads = tensor.grad(cost, wrt=itemlist(tparams))

    print('Building errors...')
    sys.stdout.flush()
    inps_err, errs = build_errors(model_options)
    f_err = theano.function(inps_err, errs, profile=False)

    curr_model['f_senc'] = f_senc
    curr_model['f_ienc'] = f_ienc
    curr_model['f_err'] = f_err

    if model_options['grad_clip'] > 0.:
        grads = [maxnorm(g, model_options['grad_clip']) for g in grads]

    lr = tensor.scalar(name='lr')
    print('Building optimizers...')
    sys.stdout.flush()
    # (compute gradients), (updates parameters)
    f_grad_shared, f_update = eval(model_options['optimizer'])(lr, tparams,
                                                               grads, inps,
                                                               cost)

    # Get names for the files to save model and solution
    sol_filename_best = get_sol_filename(model_options,
                                         best=True,
                                         previous=False)
    sol_filename_last = get_sol_filename(model_options,
                                         best=False,
                                         previous=False)
    params_filename_best = get_npz_filename(model_options,
                                            best=True,
                                            previous=False)
    params_filename_last = get_npz_filename(model_options,
                                            best=False,
                                            previous=False)

    print('PATHS TO MODELS:')
    for filename in [
            sol_filename_best, sol_filename_last, params_filename_best,
            params_filename_last
    ]:
        print(filename)
        sys.stdout.flush()
        try:
            os.makedirs(os.path.dirname(filename))
        except:
            pass

    # Start optimization
    print('Optimization')
    sys.stdout.flush()

    uidx = 0

    # Timing
    t_start = time.time()
    print('Starting time:', datetime.now())

    for eidx in range(init_epoch, model_options['max_epochs']):
        t_start_epoch = time.time()
        print('Epoch ', eidx)
        sys.stdout.flush()

        for x, mask, im in train_iter:
            n_samples += x.shape[1]
            uidx += 1

            # Update
            ud_start = time.time()
            cost = f_grad_shared(x, mask, im)
            f_update(model_options['lrate'])
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print('NaN detected')
                sys.stdout.flush()
                return 1., 1., 1.

            if numpy.mod(uidx, model_options['dispFreq']) == 0:
                print('Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ',
                      ud)
                sys.stdout.flush()

            if numpy.mod(uidx, model_options['validFreq']) == 0:
                print('Computing results...')
                sys.stdout.flush()

                # encode sentences efficiently
                dev_s = encode_sentences(
                    curr_model,
                    dev_caps,
                    batch_size=model_options['batch_size'])
                dev_i = encode_images(curr_model, dev_ims)

                # compute errors
                dev_errs = compute_errors(curr_model, dev_s, dev_i)

                # compute ranking error
                (r1, r5, r10, medr, meanr) = i2t(dev_errs)
                (r1i, r5i, r10i, medri, meanri) = t2i(dev_errs)
                print("Text to image (dev set): %.1f, %.1f, %.1f, %.1f, %.1f" %
                      (r1i, r5i, r10i, medri, meanri))
                sys.stdout.flush()
                print("Image to text (dev set): %.1f, %.1f, %.1f, %.1f, %.1f" %
                      (r1, r5, r10, medr, meanr))
                sys.stdout.flush()

                # Score
                val_score = r1 + r5 + r10 + r1i + r5i + r10i
                if val_score > best_val_score:

                    print('BEST MODEL FOUND')
                    print('Score:', val_score)
                    print('Previous best score:', best_val_score)
                    best_val_score = val_score
                    # Join in a results dict
                    results_dict = build_results_dict(r1, r5, r10, medr, r1i,
                                                      r5i, r10i, medri)

                    # Save parameters
                    print('Saving...', end=' ')
                    sys.stdout.flush()
                    numpy.savez(params_filename_best, **unzip(tparams))
                    print('Done')
                    sys.stdout.flush()

                    # Update solution
                    solution = OrderedDict([
                        ('epoch', eidx), ('update', uidx),
                        ('samples_seen', n_samples),
                        ('best_val_score', best_val_score),
                        ('best_val_res', results_dict),
                        ('time_until_results',
                         str(timedelta(seconds=(time.time() - t_start_train))))
                    ])
                    pkl.dump(solution, open(sol_filename_best, 'wb'))

        print('Seen %d samples' % n_samples)
        sys.stdout.flush()

        # Timing
        t_epoch = time.time() - t_start_epoch
        t_epoch_avg = (time.time() - t_start) / (eidx + 1 - (init_epoch))
        print('Time for this epoch:', str(timedelta(seconds=t_epoch)),
              'Average:', str(timedelta(seconds=t_epoch_avg)))
        t_2_complete = t_epoch_avg * (model_options['max_epochs'] - (eidx + 1))
        print('Time since start session:',
              str(timedelta(seconds=time.time() - t_start)),
              'Estimated time to complete training:',
              str(timedelta(seconds=t_2_complete)))
        print('Current time:', datetime.now())
        sys.stdout.flush()

        # Save current model
        try:
            state_params = OrderedDict([('epoch_done', eidx),
                                        ('solution', solution)])
        except:
            solution = OrderedDict([
                ('epoch', eidx), ('update', uidx), ('samples_seen', n_samples),
                ('best_val_score', best_val_score),
                ('time_until_results',
                 str(timedelta(seconds=(time.time() - t_start_train))))
            ])
            state_params = OrderedDict([('epoch_done', eidx),
                                        ('solution', solution)])
        pkl.dump(state_params, open(sol_filename_last, 'wb'))

        # Save parameters
        print('Saving LAST npz...', end=' ')
        sys.stdout.flush()
        numpy.savez(params_filename_last, **unzip(tparams))
        print('Done')
        sys.stdout.flush()

    return solution