예제 #1
0
def validate(opt, val_loader, model):

    # compute the encoding for all the validation images and captions
    with torch.no_grad():
        img_embs, cap_embs, cap_lens, freqs = encode_data(
            model, val_loader, opt.log_step, logging.info)

        img_embs = numpy.array(
            [img_embs[i] for i in range(0, len(img_embs), 1)])
        start = time.time()

        # find the similarity between every caption and image in the validation set?
        if opt.cross_attn == 't2i':
            sims, _ = shard_xattn_t2i(img_embs,
                                      cap_embs,
                                      cap_lens,
                                      freqs,
                                      opt,
                                      shard_size=opt.shard_size)
        elif opt.cross_attn == 'i2t':
            sims, _ = shard_xattn_i2t(img_embs,
                                      cap_embs,
                                      cap_lens,
                                      freqs,
                                      opt,
                                      shard_size=opt.shard_size)
        else:
            raise NotImplementedError
        end = time.time()
        print("calculate similarity time:", end - start)

        # caption retrieval (find the right text with every image)
        (r1, r5, r10, r20, r50, medr, meanr) = i2t(img_embs, cap_embs,
                                                   cap_lens, sims)
        logging.info("Image to text: %.1f, %.1f, %.1f, %.1f, %.1f %.1f %.1f" %
                     (r1, r5, r10, r20, r50, medr, meanr))
        # image retrieval (find the right image for every text)
        (r1i, r5i, r10i, r20i, r50i, medri,
         meanr) = t2i(img_embs, cap_embs, cap_lens, sims)
        logging.info("Text to image: %.1f, %.1f, %.1f, %.1f, %.1f %.1f %.1f" %
                     (r1i, r5i, r10i, r20i, r50i, medri, meanr))
        # sum of recalls to be used for early stopping
        currscore = r1 + r5 + r10 + r1i + r5i + r10i

    # record metrics in tensorboard
    tb_logger.log_value('r1', r1, step=model.Eiters)
    tb_logger.log_value('r5', r5, step=model.Eiters)
    tb_logger.log_value('r10', r10, step=model.Eiters)
    tb_logger.log_value('medr', medr, step=model.Eiters)
    tb_logger.log_value('meanr', meanr, step=model.Eiters)
    tb_logger.log_value('r1i', r1i, step=model.Eiters)
    tb_logger.log_value('r5i', r5i, step=model.Eiters)
    tb_logger.log_value('r10i', r10i, step=model.Eiters)
    tb_logger.log_value('medri', medri, step=model.Eiters)
    tb_logger.log_value('meanr', meanr, step=model.Eiters)
    tb_logger.log_value('rsum', currscore, step=model.Eiters)

    return currscore
예제 #2
0
def validate(opt, val_loader, model):
    # compute the encoding for all the validation images and captions
    img_embs, cap_embs, cap_lens = encode_data(model, val_loader, opt.log_step,
                                               logging.info)

    img_embs = numpy.array([img_embs[i] for i in range(0, len(img_embs), 5)])

    print("Img shape in validate:", img_embs.shape)

    start = time.time()
    if opt.cross_attn == 't2i':
        sims = shard_xattn_t2i(img_embs,
                               cap_embs,
                               cap_lens,
                               opt,
                               shard_size=128)
    elif opt.cross_attn == 'i2t':
        sims = shard_xattn_i2t(img_embs,
                               cap_embs,
                               cap_lens,
                               opt,
                               shard_size=128)
    else:
        raise NotImplementedError
    end = time.time()
    print("calculate similarity time:", end - start)

    # caption retrieval
    (r1, r5, r10, medr, meanr) = i2t(img_embs, cap_embs, cap_lens, sims)
    logging.info("Image to text: %.1f, %.1f, %.1f, %.1f, %.1f" %
                 (r1, r5, r10, medr, meanr))
    # image retrieval
    (r1i, r5i, r10i, medri, meanr) = t2i(img_embs, cap_embs, cap_lens, sims)
    logging.info("Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" %
                 (r1i, r5i, r10i, medri, meanr))
    # sum of recalls to be used for early stopping
    currscore = r1 + r5 + r10 + r1i + r5i + r10i

    # record metrics in tensorboard
    tb_logger.log_value('r1', r1, step=model.Eiters)
    tb_logger.log_value('r5', r5, step=model.Eiters)
    tb_logger.log_value('r10', r10, step=model.Eiters)
    tb_logger.log_value('medr', medr, step=model.Eiters)
    tb_logger.log_value('meanr', meanr, step=model.Eiters)
    tb_logger.log_value('r1i', r1i, step=model.Eiters)
    tb_logger.log_value('r5i', r5i, step=model.Eiters)
    tb_logger.log_value('r10i', r10i, step=model.Eiters)
    tb_logger.log_value('medri', medri, step=model.Eiters)
    tb_logger.log_value('meanr', meanr, step=model.Eiters)
    tb_logger.log_value('rsum', currscore, step=model.Eiters)

    return currscore