コード例 #1
def main(params):

    # load the checkpoint
    checkpoint_path = params["checkpoint_path"]
    max_images = params["max_images"]

    print "loading checkpoint %s" % (checkpoint_path,)
    checkpoint = pickle.load(open(checkpoint_path, "rb"))
    checkpoint_params = checkpoint["params"]
    dataset = checkpoint_params["dataset"]
    model = checkpoint["model"]
    dump_folder = params["dump_folder"]

    if dump_folder:
        print "creating dump folder " + dump_folder
        os.system("mkdir -p " + dump_folder)

    # fetch the data provider
    dp = getDataProvider(dataset)

    misc = {}
    misc["wordtoix"] = checkpoint["wordtoix"]
    ixtoword = checkpoint["ixtoword"]

    blob = {}  # output blob which we will dump to JSON for visualizing the results
    blob["params"] = params
    blob["checkpoint_params"] = checkpoint_params
    blob["imgblobs"] = []

    # iterate over all images in test set and predict sentences
    BatchGenerator = decodeGenerator(checkpoint_params)
    n = 0
    all_references = []
    all_candidates = []
    captions_res = []
    for img in dp.iterImages(split="test", max_images=max_images):
        n += 1
        print "image %d/%d:" % (n, max_images)
        references = [" ".join(x["tokens"]) for x in img["sentences"]]  # as list of lists of tokens
        kwparams = {"beam_size": params["beam_size"]}
        Ys = BatchGenerator.predict([{"image": img}], model, checkpoint_params, **kwparams)

        img_blob = {}  # we will build this up
        img_blob["img_path"] = img["local_file_path"]
        img_blob["imgid"] = img["imgid"]
        img_blob["id"] = img["id"]

        if dump_folder:
            # copy source file to some folder. This makes it easier to distribute results
            # into a webpage, because all images that were predicted on are in a single folder
            source_file = img["local_file_path"]
            target_file = os.path.join(dump_folder, os.path.basename(img["local_file_path"]))
            os.system("cp %s %s" % (source_file, target_file))

        # encode the human-provided references
        img_blob["references"] = []
        flag = True
        for gtsent in references:
            if flag:
                print "GT: " + gtsent
                flag = False
            img_blob["references"].append({"text": gtsent})

        # now evaluate and encode the top prediction
        top_predictions = Ys[0]  # take predictions for the first (and only) image we passed in
        top_prediction = top_predictions[0]  # these are sorted with highest on top
        candidate = " ".join([ixtoword[ix] for ix in top_prediction[1] if ix > 0])  # ix 0 is the END token, skip that
        print "PRED: (%f) %s" % (top_prediction[0], candidate)

        # save for later eval
        captions_res.append({"image_id": img_blob["id"], "caption": candidate})
        img_blob["candidate"] = {"text": candidate, "logprob": top_prediction[0]}

    # use perl script to eval BLEU score for fair comparison to other research work
    # first write intermediate files
    print "writing intermediate files into eval/"
    open("eval/output", "w").write("\n".join(all_candidates))
    for q in xrange(5):
        open("eval/reference" + ` q `, "w").write("\n".join([x[q] for x in all_references]))
    # invoke the perl script to get BLEU scores
    print "invoking eval/multi-bleu.perl script..."
    owd = os.getcwd()
    os.system("./multi-bleu.perl reference < output")

    # # now also evaluate test split perplexity
    # gtppl = eval_split('test', dp, model, checkpoint_params, misc, eval_max_images = max_images)
    # print 'perplexity of ground truth words based on dictionary of %d words: %f' % (len(ixtoword), gtppl)
    # blob['gtppl'] = gtppl

    # dump result struct to file
    print "saving result struct to %s" % (params["result_struct_filename"],)
    json.dump(blob, open(params["result_struct_filename"], "w"))

    alg_name = params["checkpoint_path"].split("_")[1]
    res_file_name = params["out_dir"] + "/captions_val_" + alg_name + "_results.json"
    json.dump(captions_res, open(res_file_name, "w"))

    from eval_tools import metrics

    scores = metrics.run(dataset, alg_name, params["out_dir"])

    return scores
def run(checkpoint):

    max_images = -1
    dump_folder = ""

    checkpoint_params = checkpoint["params"]
    dataset = checkpoint_params["dataset"]
    model = checkpoint["model"]
    beam_size = 1
    # dump_folder = params['dump_folder']

    # fetch the data provider
    dp = getDataProvider(dataset)

    misc = {}
    misc["wordtoix"] = checkpoint["wordtoix"]
    ixtoword = checkpoint["ixtoword"]

    blob = {}  # output blob which we will dump to JSON for visualizing the results
    # blob['params'] = params
    blob["checkpoint_params"] = checkpoint_params
    blob["imgblobs"] = []

    # iterate over all images in test set and predict sentences
    BatchGenerator = decodeGenerator(checkpoint_params)
    n = 0
    all_references = []
    all_candidates = []
    captions_res = []
    for img in dp.iterImages(split="test", max_images=max_images):
        n += 1
        print "image %d/%d:" % (n, max_images)
        references = [" ".join(x["tokens"]) for x in img["sentences"]]  # as list of lists of tokens
        kwparams = {"beam_size": beam_size}
        Ys = BatchGenerator.predict([{"image": img}], model, checkpoint_params, **kwparams)

        img_blob = {}  # we will build this up
        img_blob["img_path"] = img["local_file_path"]
        img_blob["imgid"] = img["imgid"]
        img_blob["id"] = img["id"]

        if dump_folder:
            # copy source file to some folder. This makes it easier to distribute results
            # into a webpage, because all images that were predicted on are in a single folder
            source_file = img["local_file_path"]
            target_file = os.path.join(dump_folder, os.path.basename(img["local_file_path"]))
            os.system("cp %s %s" % (source_file, target_file))

        # encode the human-provided references
        img_blob["references"] = []
        flag = True
        for gtsent in references:
            if flag:
                print "GT: " + gtsent
                flag = False
            img_blob["references"].append({"text": gtsent})

        # now evaluate and encode the top prediction
        top_predictions = Ys[0]  # take predictions for the first (and only) image we passed in
        top_prediction = top_predictions[0]  # these are sorted with highest on top
        candidate = " ".join([ixtoword[ix] for ix in top_prediction[1] if ix > 0])  # ix 0 is the END token, skip that
        print "PRED: (%f) %s" % (top_prediction[0], candidate)

        # save for later eval
        captions_res.append({"image_id": img_blob["id"], "caption": candidate})
        img_blob["candidate"] = {"text": candidate, "logprob": top_prediction[0]}

    alg_name = checkpoint["algorithm"]
    res_file_name = checkpoint["outdir"] + "/captions_val_" + alg_name + "_results.json"
    json.dump(captions_res, open(res_file_name, "w"))

    from eval_tools import metrics

    scores = metrics.run(dataset, alg_name, checkpoint["outdir"])

    return scores