Ejemplo n.º 1
0
 def rank_coco(self=None):
     # Get 1000 images / captions to test rank
     stream = DataETL.getFinalStream(teX, teY, sources=sources,
                         sources_k=sources_k, batch_size=test_size,
                         shuffle=True)
     print "COCO test"
     rank_function(stream)
Ejemplo n.º 2
0
def trainencoder(
      sources = ("image_vects", "word_vects")
    , sources_k = ("image_vects_k", "word_vects_k")
    , batch_size=128
    , embedding_dim=300
    , n_captions=5
    , n_sbu=None
    , separate_emb=False
    , test_size=1000 # per dataset
    , mode='dev'
    ):
    if mode=="coco120k+flickr38k":
        XYsplit_cum = ([], [], [], [])
        xyloaders = [
              "cocoXYFilenames(dataType='train2014')"
            , "cocoXYFilenames(dataType='val2014')"
            , "flickrXYFilenames(dataType='8k')"
            , "flickrXYFilenames(dataType='30k')"
            ]
        ntrains = [80000, 40000, 8000, 30000]

        for xyloader, ntrain in zip(xyloaders, ntrains):
            X, Y, _ = eval(xyloader)
            XYsplit = train_test_split(X, Y, train_size=ntrain)
            for i in range(len(XYsplit)):
                XYsplit_cum[i].extend(XYsplit[i])

        trX, teX, trY, teY = XYsplit_cum
    else:
        trX, teX, trY, teY = coco(mode=mode, n_captions=n_captions, test_size=test_size)
        if n_sbu:
            sbutrX, sbuteX, sbutrY, sbuteY = sbu(mode=mode, test_size=test_size)
            pairs = (
                  (trX, sbutrX)
                , (teX, sbuteX)
                , (trY, sbutrY)
                , (teY, sbuteY)
                )

            for coco_data, sbu_data in pairs:
                if isinstance(coco_data, list):
                    coco_data.extend(sbu_data)

    print("n_train: %d" % len(trX))
    print("n_test: %d" % len(teX))

    # # # # # # # # # # #
    # Modeling Building #
    # # # # # # # # # # #

    s = Encoder(
          image_feature_dim=4096
        , embedding_dim=embedding_dim
        , biases_init=Constant(0.)
        , weights_init=Uniform(width=0.08)
        )
    s.initialize()

    image_vects = tensor.matrix(sources[0]) # named to match the source name
    word_vects = tensor.tensor3(sources[1]) # named to match the source name
    image_vects_k = tensor.matrix(sources_k[0]) # named to match the contrastive source name
    word_vects_k = tensor.tensor3(sources_k[1]) # named to match the contrastive source name

    # image_vects.tag.test_value = np.zeros((2, 4096), dtype='float32')
    # word_vects.tag.test_value = np.zeros((2, 15, 50), dtype='float32')
    # image_vects_k.tag.test_value = np.zeros((2, 4096), dtype='float32')
    # word_vects_k.tag.test_value = np.zeros((2, 15, 50), dtype='float32')

    # learned image embedding, learned sentence embedding
    lim, ls = s.apply(image_vects, word_vects)

    # learned constrastive im embedding, learned contrastive s embedding
    lcim, lcs = s.apply(image_vects_k, word_vects_k)

    # identical cost code thanks to Ryan Kiros
    # https://github.com/youralien/skip-thoughts/blob/master/eval_rank.py
    lim = l2norm(lim)
    lcim = l2norm(lcim)
    ls = l2norm(ls)
    lcs = l2norm(lcs)

    margin = 0.2 # alpha term should not be more than 1

    cost_im = margin - (lim * ls).sum(axis=1) + (lim * lcs).sum(axis=1)
    cost_im = cost_im * (cost_im > 0.) # this is like the max(0, pairwise-ranking-loss)
    cost_im = cost_im.sum(0)

    cost_s = margin - (ls * lim).sum(axis=1) + (ls * lcim).sum(axis=1)
    cost_s = cost_s * (cost_s > 0.) # this is like max(0, pairwise-ranking-loss)
    cost_s = cost_s.sum(0)

    cost = cost_im + cost_s
    cost.name = "pairwise_ranking_loss"

    # function(s) to produce embedding
    if separate_emb:
        img_encoder = theano.function([image_vects], lim)
        txt_encoder = theano.function([word_vects], ls)
    f_emb = theano.function([image_vects, word_vects], [lim, ls])

    if n_sbu:
        sbuname = "sbu%d+" % n_sbu
    else:
        sbuname = ''
    name = "%sproject1.%s.jointembedder" % (sbuname, mode)
    savename = MODEL_FILES_DIR + name

    def save_function(self):
        if separate_emb:
            ModelIO.save(
                  img_encoder
                , savename + "_Img")
            ModelIO.save(
                  txt_encoder
                , savename + "_Txt")
        ModelIO.save(f_emb, savename)
        print "Similarity Embedding function(s) saved while training"

    def rank_function(stream):
        images, captions, _0, _1 = stream.get_epoch_iterator().next()
        image_embs, caption_embs = f_emb(images, captions)
        ModelEval.ImageSentenceRanking(image_embs, caption_embs)

    def rank_coco(self=None):
        # Get 1000 images / captions to test rank
        stream = DataETL.getFinalStream(teX, teY, sources=sources,
                            sources_k=sources_k, batch_size=test_size,
                            shuffle=True)
        print "COCO test"
        rank_function(stream)

    def rank_sbu(self=None):
        stream = DataETL.getFinalStream(sbuteX, sbuteY, sources=sources,
                            sources_k=sources_k, batch_size=test_size,
                            shuffle=True)
        print "SBU test"
        rank_function(stream)

    def rank_em(self=None):
        rank_coco()
        if n_sbu:
            rank_sbu()

    cg = ComputationGraph(cost)

    # # # # # # # # # # #
    # Modeling Training #
    # # # # # # # # # # #

    algorithm = GradientDescent(
          cost=cost
        , parameters=cg.parameters
        , step_rule=Adam(learning_rate=0.0002)
        )
    main_loop = MainLoop(
          model=Model(cost)
        , data_stream=DataETL.getFinalStream(trX, trY, sources=sources,
              sources_k=sources_k, batch_size=batch_size)
        , algorithm=algorithm
        , extensions=[
              DataStreamMonitoring(
                  [cost]
                , DataETL.getFinalStream(trX, trY, sources=sources,
                      sources_k=sources_k, batch_size=batch_size, shuffle=True)
                , prefix='train')
            , DataStreamMonitoring(
                  [cost]
                , DataETL.getFinalStream(teX, teY, sources=sources,
                      sources_k=sources_k, batch_size=batch_size, shuffle=True)
                , prefix='test')
            , UserFunc(save_function, after_epoch=True)
            , UserFunc(rank_em, after_epoch=True)
            , Printing()
            , LogToFile('logs/%s.csv' % name)
            ]
        )
    main_loop.run()
Ejemplo n.º 3
0
 def rank_sbu(self=None):
     stream = DataETL.getFinalStream(sbuteX, sbuteY, sources=sources,
                         sources_k=sources_k, batch_size=test_size,
                         shuffle=True)
     print "SBU test"
     rank_function(stream)