def split_train_valid_test(visual_features=VISUAL_FEATURES,
                           text_features=TEXT_FEATURES_400,
                           n_test_classes=100):

    visual_features = ImageDataset().load_hdf5(visual_features)

    class_list = cfg_emb.load_class_list(cfg_emb.CLASS_LIST_500)
    class_name_list = cfg_emb.load_class_list(cfg_emb.CLASS_NAME_LIST_500)

    class_list = np.asarray(class_list, dtype=np.int32)
    class_name_list = np.asarray(class_name_list)

    class_permutation = np.random.permutation(range(0, len(class_list)))

    class_list_test = class_list[class_permutation][0:n_test_classes]
    class_list_train = class_list[class_permutation][n_test_classes:-1]
    class_name_list_test = class_name_list[class_permutation][0:n_test_classes]
    class_name_list_train = class_name_list[class_permutation][
        n_test_classes:-1]

    test_sort = np.argsort(class_list_test)
    train_sort = np.argsort(class_list_train)

    class_list_test = class_list_test[test_sort]
    class_list_train = class_list_train[train_sort]
    class_name_list_test = class_name_list_test[test_sort]
    class_name_list_train = class_name_list_train[train_sort]

    print("Loading textual features..")
    text_features = np.load(text_features)

    text_features_test = text_features[class_permutation][0:n_test_classes][
        test_sort]
    text_features_train = text_features[class_permutation][n_test_classes:-1][
        train_sort]

    visual_features_test = visual_features.sub_dataset_with_labels(
        class_list_test)
    visual_features_train_valid = visual_features.sub_dataset_with_labels(
        class_list_train)

    split_options = [SplitOptions("flickr", 0.25), SplitOptions("google", 0.3)]
    exclude_file_starting_with = ["seed"]
    visual_features_train, visual_features_valid = \
        visual_features_train_valid.validation_per_class_split(split_options, exclude_file_starting_with)

    cfg_emb.save_class_list(class_list_test, cfg_emb.CLASS_LIST_100)
    cfg_emb.save_class_list(class_list_train, cfg_emb.CLASS_LIST_400)

    cfg_emb.save_class_list(class_name_list_test, cfg_emb.CLASS_NAME_LIST_100)
    cfg_emb.save_class_list(class_name_list_train, cfg_emb.CLASS_NAME_LIST_400)

    np.save(cfg_emb.TEXT_FEATURES_TEST, text_features_test)
    np.save(cfg_emb.TEXT_FEATURES_TRAIN, text_features_train)

    visual_features_train.save_hdf5(cfg_emb.VISUAL_FEATURES_TRAIN)
    visual_features_valid.save_hdf5(cfg_emb.VISUAL_FEATURES_VALID)
    visual_features_test.save_hdf5(cfg_emb.VISUAL_FEATURES_TEST)
def test_embedding_zero_shot(tx_map=True,
                             im_map=True,
                             top_similar_output=None):
    docs_vectors_500 = np.load(DOCS_FILE_500)
    docs_vectors_100_zero_shot = []
    class_list_all = cfg_emb.load_class_list(ZERO_SHOT_CLASS_LIST_all)
    class_list_for_map = cfg_emb.load_class_list(ZERO_SHOT_CLASS_LIST_for_map)
    for i, cls in enumerate(class_list_all):
        if cls in class_list_for_map:
            docs_vectors_100_zero_shot.append(docs_vectors_500[i])
    docs_vectors_100_zero_shot = np.asarray(docs_vectors_100_zero_shot)

    if isinstance(top_similar_output, int):
        print("Top similars on zero shot aproach: not yet implemented.")
        # test_embedding_top_similars(visual_features=cfg_emb.VISUAL_FEATURES_VALID,
        #                             docs_vectors_npy=docs_vectors_100_zero_shot,
        #                             class_list_doc2vec=CLASS_LIST_D2V_for_similars,
        #                             im2doc_model=IM2DOC_MODEL,
        #                             im2doc_model_ext=IM2DOC_MODEL_EXT, im2doc_weights_ext=None,
        #                             load_precomputed_imdocs=False,
        #                             top_similars=top_similar_output)
    if tx_map:
        test_embedding_tx_mAP(visual_features=ZERO_SHOT_VISUAL_FEATURES,
                              docs_vectors_npy=docs_vectors_100_zero_shot,
                              class_list_doc2vec=ZERO_SHOT_CLASS_LIST_for_map,
                              im2doc_model=IM2DOC_MODEL,
                              im2doc_model_ext=None,
                              im2doc_weights_ext=None,
                              load_precomputed_imdocs=True)
    if im_map:
        test_embedding_im_mAP(visual_features=ZERO_SHOT_VISUAL_FEATURES,
                              docs_vectors_npy=docs_vectors_100_zero_shot,
                              class_list_doc2vec=ZERO_SHOT_CLASS_LIST_for_map,
                              im2doc_model=IM2DOC_MODEL,
                              im2doc_model_ext=None,
                              im2doc_weights_ext=None,
                              load_precomputed_imdocs=True)
def retrieve_text_map(img_features,
                      txt_features,
                      class_list_doc2vec,
                      joint_model,
                      joint_model_ext=None,
                      joint_model_weights_ext=None,
                      load_precomputed_embedded_feat=None,
                      verbose=False,
                      progressbar=True):
    def printv(str):
        if verbose: print(str)

    emb_txts, emb_imgs, img_labels = get_embedded_vectors(
        img_features, txt_features, joint_model, joint_model_ext,
        joint_model_weights_ext, load_precomputed_embedded_feat, verbose)

    if not isinstance(class_list_doc2vec, list):
        class_list_doc2vec = load_class_list(class_list_doc2vec)
    class_list_inverted_doc2vec = {
        k: i
        for i, k in enumerate(class_list_doc2vec)
    }

    if progressbar:
        bar = pyprind.ProgBar(len(emb_imgs), stream=sys.stdout)

    C = compute_dist_scores(emb_imgs, emb_txts)
    av_prec = []
    for i, iv in enumerate(emb_imgs):
        scores = []
        if progressbar:
            bar.update()

        lbl = int(img_labels[i])
        targets = np.zeros([emb_txts.shape[0]])
        targets[int(class_list_inverted_doc2vec[lbl])] = 1

        for j, tx_vec in enumerate(emb_txts):
            score = C[i, j]
            scores.append(score)

        from sklearn.metrics import average_precision_score
        AP = average_precision_score(targets, scores)
        av_prec.append(AP)
        printv("Img {} - AP = {}".format(lbl, AP))

    mAP = np.mean(np.asarray(av_prec))
    printv("\t\tmAP = {}".format(mAP))
    return mAP
def recall_top_k(img_features,
                 txt_features,
                 class_list_doc2vec,
                 joint_model,
                 joint_model_ext=None,
                 joint_model_weights_ext=None,
                 load_precomputed_embedded_feat=None,
                 top_k=10,
                 verbose=False,
                 progressbar=True):
    def printv(str):
        if verbose: print(str)

    emb_txts, emb_imgs, img_labels = get_embedded_vectors(
        img_features, txt_features, joint_model, joint_model_ext,
        joint_model_weights_ext, load_precomputed_embedded_feat, verbose)
    if not isinstance(class_list_doc2vec, list):
        class_list_doc2vec = load_class_list(class_list_doc2vec)
    class_list_inverted_doc2vec = {
        k: i
        for i, k in enumerate(class_list_doc2vec)
    }

    if progressbar:
        bar = pyprind.ProgBar(len(emb_imgs), stream=sys.stdout)

    C = compute_dist_scores(emb_imgs, emb_txts, is_dist=True)
    recall_per_img = []
    for i, iv in enumerate(emb_imgs):
        if progressbar:
            bar.update()
        lbl = int(img_labels[i])
        arg_lbl = class_list_inverted_doc2vec[lbl]

        dists = C[i, :]
        arg_sort_dist = np.argsort(dists)

        if arg_lbl in arg_sort_dist[0:top_k + 1]:
            recall_per_img.append(1)
        else:
            recall_per_img.append(0)

    return np.sum(recall_per_img) / float(len(recall_per_img))
def joint_embedding_train(config_gen_function=config_gen_TEST,
                          debug_map_val=None):

    visual_features = cfg_emb.VISUAL_FEATURES_TRAIN
    text_features = cfg_emb.TEXT_FEATURES_400
    class_list = cfg_emb.CLASS_LIST_400
    visual_features_valid = cfg_emb.VISUAL_FEATURES_VALID
    visual_features_zs_test = cfg_emb.VISUAL_FEATURES_TEST
    text_features_zs_test = cfg_emb.TEXT_FEATURES_100
    class_list_test = cfg_emb.CLASS_LIST_100
    recall_at_k = [1, 3, 5, 10]

    print("Loading visual features..")
    visual_features = ImageDataset().load_hdf5(visual_features)
    if visual_features_valid is not None:
        visual_features_valid = ImageDataset().load_hdf5(visual_features_valid)

    print("Loading textual features..")
    if not isinstance(text_features, np.ndarray):
        text_features = np.load(text_features)
    if not isinstance(text_features_zs_test,
                      np.ndarray) and text_features_zs_test is not None:
        text_features_zs_test = np.load(text_features_zs_test)

    if class_list is None:
        class_list = np.unique(visual_features.labels).tolist()
    else:
        class_list = cfg_emb.load_class_list(class_list, int_cast=True)

    if not isinstance(class_list_test, list):
        class_list_test = cfg_emb.load_class_list(class_list_test,
                                                  int_cast=True)

    print("Generating dataset..")

    if class_list is not None:
        cycle_clslst_txfeat = class_list, text_features
    else:
        cycle_clslst_txfeat = enumerate(text_features)

    im_data_train = []
    tx_data_train_im_aligned = [
    ]  # 1 text for each image (align: img_lbl_x <-> txt_lbl_x <-> lbl_x )
    tx_data_train = []  # 1 text for each class
    label_train = []
    if visual_features_valid is not None:
        im_data_val = []
        tx_data_valid_im_aligned = []
        label_val = []

    for lbl, docv in zip(cycle_clslst_txfeat[0], cycle_clslst_txfeat[1]):
        lbl = int(lbl)
        norm_docv = docv / np.linalg.norm(docv)  # l2 normalization
        tx_data_train.append(norm_docv)

        visual_features_with_label = visual_features.sub_dataset_with_label(
            lbl)
        for visual_feat in visual_features_with_label.data:
            visual_feat = visual_feat / np.linalg.norm(
                visual_feat)  # l2 normalization
            im_data_train.append(visual_feat)
            tx_data_train_im_aligned.append(norm_docv)
            label_train.append(lbl)

        if visual_features_valid is not None:
            visual_features_valid_with_label = visual_features_valid.sub_dataset_with_label(
                lbl)
            for visual_feat in visual_features_valid_with_label.data:
                visual_feat = visual_feat / np.linalg.norm(
                    visual_feat)  # l2 normalization
                im_data_val.append(visual_feat)
                tx_data_valid_im_aligned.append(norm_docv)
                label_val.append(lbl)

    # Image data conversion
    im_data_train = list_to_ndarray(im_data_train)
    im_data_val = list_to_ndarray(im_data_val)

    # Text data conversion
    tx_data_train = list_to_ndarray(tx_data_train)
    #tx_data_train_im_aligned = list_to_ndarray(tx_data_train_im_aligned)
    #tx_data_valid_im_aligned = list_to_ndarray(tx_data_valid_im_aligned)

    # Label conversion
    label_train = list_to_ndarray(label_train)
    label_val = list_to_ndarray(label_val)

    print("Generating model..")

    configs, config_gen_name = config_gen_function()

    print("Executing training over config generator: " + config_gen_name)
    folder_gen_name = "jointmodel_confgen-" + config_gen_name
    folder_gen_path = os.path.join(JOINT_MODEL_FOLDER, folder_gen_name)
    if not os.path.isdir(folder_gen_path):
        os.mkdir(folder_gen_path)

    class ModelScore:
        def __init__(self,
                     train_set_score=None,
                     valid_set_score=None,
                     test_set_score=None):
            self.train_set = train_set_score
            self.valid_set = valid_set_score
            self.test_set = test_set_score

    class ConfigScore:
        def __init__(self, name=None):
            self.name = name
            self.scores_best_train = ModelScore()
            self.scores_best_valid = ModelScore()
            self.scores_init = ModelScore()

    config_scores = []  # list of Score, one for each config

    for config_counter, c in enumerate(configs):
        if not isinstance(c, Config):
            raise TypeError('c is not an instance of Config class.')

        print("")
        print("")
        print("")
        print("")
        print("Config: ")
        pprint(c)

        fname = folder_gen_name + "__" + str(config_counter)
        folder_path = os.path.join(folder_gen_path, fname)
        fpath = os.path.join(folder_path, fname)
        if not os.path.isdir(folder_path):
            os.mkdir(folder_path)

        c.saveJSON(fpath + '.config.json')

        JE = JointEmbedder(im_dim=im_data_train.shape[-1],
                           tx_dim=tx_data_train.shape[-1],
                           out_dim=c.sp_dim,
                           n_text_classes=len(class_list),
                           use_merge_distance=USE_MERGE_DISTANCE)

        optimizer = c.opt(**c.opt_kwargs)
        model = JE.model(optimizer=optimizer,
                         tx_activation=c.tx_act,
                         im_activation=c.im_act,
                         tx_hidden_layers=c.tx_hid,
                         im_hidden_layers=c.im_hid,
                         contrastive_loss_weight=c.contr_w,
                         logistic_loss_weight=c.log_w_tx,
                         contrastive_loss_weight_inverted=c.contr_inv_w,
                         init=c.w_init,
                         contrastive_loss_margin=c.contr_margin)

        model.summary()

        label_map = {}
        for index, label in enumerate(class_list):
            label_map[label] = index
        size = len(class_list)

        init_model_fname = fpath + '.model.init.random.h5'
        best_valid_fname = fpath + '.model.best.val_loss.h5'
        best_train_fname = fpath + '.model.best.loss.h5'
        model.save(init_model_fname)

        # Creating contrastive training set:
        val_x_im, val_x_tx, val_y_contr, val_y_log = get_contr_data_batch(
            im_data_val,
            tx_data_train,
            label_val,
            class_list,
            no_contrastive=DISABLE_CONTRASTIVE,
            shuffle=True,
            bs=c.bs)
        val_X = [val_x_im, val_x_tx]
        val_Y = [val_y_contr, val_y_contr, val_y_contr, val_y_log]

        best_loss = best_val_loss = float('inf')
        best_loss_epoch = -1
        best_val_loss_epoch = -1
        loss_hist = []
        val_loss_hist = []

        for ep in range(0, c.epochs):
            print("Epoch: {}/{}".format(ep, c.epochs - 1))

            checpoint_path = fpath + ".weights.{:03d}.h5".format(ep)
            checkpoint = ModelCheckpoint(checpoint_path,
                                         monitor='val_loss',
                                         save_best_only=False,
                                         save_weights_only=True)

            x_im, x_tx, y_cont, y_log = get_contr_data_batch(
                im_data_train,
                tx_data_train,
                label_train,
                class_list,
                no_contrastive=DISABLE_CONTRASTIVE,
                shuffle=True,
                bs=c.bs)
            X = [x_im, x_tx]
            Y = [y_cont, y_cont, y_cont, y_log]
            calls = c.callbacks
            calls.append(checkpoint)
            hs = model.fit(X,
                           Y,
                           c.bs,
                           nb_epoch=1,
                           validation_data=[val_X, val_Y],
                           shuffle=False,
                           callbacks=calls)
            # FIT !!! TRAINING

            hist = hs.history
            val_loss = hist['val_loss'][0]
            loss = hist['loss'][0]
            val_loss_hist.append(val_loss)
            loss_hist.append(loss)

            if loss < best_loss:
                best_loss = loss
                model.save(best_train_fname)
                best_loss_epoch = ep
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                model.save(best_valid_fname)
                best_val_loss_epoch = ep

        loss_csv = file(fpath + ".loss.csv", 'w')
        loss_csv.write('Learning curves (loss),Epoch, Loss, Val Loss\n')

        if EVAL_INIT_MODEL_LOSS:
            x_im, x_tx, y_cont, y_log = get_contr_data(im_data_train,
                                                       tx_data_train,
                                                       label_train, class_list)
            X = [x_im, x_tx]
            Y = [y_cont, y_cont, y_cont, y_log]
            init_loss = model.evaluate(X, Y, batch_size=c.bs)[0]
            init_val_loss = model.evaluate(val_X, val_Y, batch_size=c.bs)[0]
            loss_csv.write(', {}, {}, {}\n'.format(-1, init_loss,
                                                   init_val_loss))

        epoch = 0
        for loss, val_loss in zip(loss_hist, val_loss_hist):
            loss_csv.write(", {}, {}, {}\n".format(epoch, loss, val_loss))
            epoch += 1
        loss_csv.write("\n\n\n")
        loss_csv.write("Best loss epoch:, {}, \n".format(best_loss_epoch))
        loss_csv.write(
            "Best val loss epoch:, {}\n".format(best_val_loss_epoch))

        if EVAL_MAP:
            map_call_tr = ModelMAP(visual_features=visual_features,
                                   docs_vectors=text_features,
                                   class_list=class_list,
                                   data_name='TrainSet',
                                   text_retrieval_map=True,
                                   image_retrieval_map=True,
                                   recall_at_k=recall_at_k,
                                   debug_value=debug_map_val)

            map_call_val = ModelMAP(visual_features=visual_features_valid,
                                    docs_vectors=text_features,
                                    class_list=class_list,
                                    data_name='ValidSet',
                                    text_retrieval_map=True,
                                    image_retrieval_map=True,
                                    recall_at_k=recall_at_k,
                                    debug_value=debug_map_val)

            map_call_zs = ModelMAP(visual_features=visual_features_zs_test,
                                   docs_vectors=text_features_zs_test,
                                   class_list=class_list_test,
                                   data_name='TestSetZS',
                                   text_retrieval_map=True,
                                   image_retrieval_map=True,
                                   recall_at_k=recall_at_k,
                                   debug_value=debug_map_val)

            # Map on best loss model
            best_train_model = JointEmbedder.load_model(best_train_fname)
            map_tr_best_train = map_call_tr.call_manual(best_train_model)
            map_val_best_train = map_call_val.call_manual(best_train_model)
            map_zs_best_train = map_call_zs.call_manual(best_train_model)

            score_best_train = ModelScore(map_tr_best_train,
                                          map_val_best_train,
                                          map_zs_best_train)

            # Map on best val_loss model
            best_valid_model = JointEmbedder.load_model(best_valid_fname)
            map_tr_best_valid = map_call_tr.call_manual(best_valid_model)
            map_val_best_valid = map_call_val.call_manual(best_valid_model)
            map_zs_best_valid = map_call_zs.call_manual(best_valid_model)

            score_best_valid = ModelScore(map_tr_best_valid,
                                          map_val_best_valid,
                                          map_zs_best_valid)

            list_map_labels = ["Best Tr Loss", "Best Val Loss"]
            list_map_dict_tr = [map_tr_best_train, map_tr_best_valid]
            list_map_dict_val = [map_val_best_train, map_val_best_valid]
            list_map_dict_zs = [map_zs_best_train, map_zs_best_valid]

            score_init = None

            if EVAL_INIT_MODEL_MAP:
                # Map on init/random model
                init_model = JointEmbedder.load_model(init_model_fname)
                map_tr_init = map_call_tr.call_manual(init_model)
                map_val_init = map_call_val.call_manual(init_model)
                map_zs_init = map_call_zs.call_manual(init_model)
                list_map_labels.append("Init/Random")
                list_map_dict_tr.append(map_tr_init)
                list_map_dict_val.append(map_val_init)
                list_map_dict_zs.append(map_zs_init)

                score_init = ModelScore(map_tr_init, map_val_init, map_zs_init)

            cs = ConfigScore(name=str(config_counter))
            cs.scores_best_train = score_best_train
            cs.scores_best_valid = score_best_valid
            cs.scores_init = score_init
            config_scores.append(cs)

            loss_csv.write("\n\n\n\n")

            loss_csv.write(", Loaded models/weights:, ")
            for l in list_map_labels:
                loss_csv.write("{}, ".format(l))
            loss_csv.write("\n")

            loss_csv.write("\nmAP over training set, ")
            for key in map_tr_best_train.keys():
                loss_csv.write("{}, ".format(key))
                for map_dict in list_map_dict_tr:
                    loss_csv.write("{}, ".format(map_dict[key]))
                loss_csv.write("\n, ")

            loss_csv.write("\nmAP over validation set, ")
            for key in map_tr_best_train.keys():
                loss_csv.write("{}, ".format(key))
                for map_dict in list_map_dict_val:
                    loss_csv.write("{}, ".format(map_dict[key]))
                loss_csv.write("\n, ")

            loss_csv.write("\nmAP over zs-test set, ")
            for key in map_tr_best_train.keys():
                loss_csv.write("{}, ".format(key))
                for map_dict in list_map_dict_zs:
                    loss_csv.write("{}, ".format(map_dict[key]))
                loss_csv.write("\n, ")

        #
        # def write_map_dict(map_dict, str):
        #         loss_csv.write("\n" + str)
        #         for k, v in map_dict.items():
        #             loss_csv.write(",{}, {}\n".format(k, v))
        #
        #     write_map_dict(map_tr_best_train, "mAP on tr set - best loss model")
        #     write_map_dict(map_val_best_train, "mAP on val set - best loss model")
        #     write_map_dict(map_zs_best_train, "mAP on test zs set - best loss model")
        #     loss_csv.write("\n")
        #     write_map_dict(map_tr_best_valid, "mAP on tr set - best valid loss model")
        #     write_map_dict(map_val_best_valid, "mAP on val set - best valid loss model")
        #     write_map_dict(map_zs_best_valid, "mAP on test zs set - best valid loss model")
        #     loss_csv.write("\n")
        #     write_map_dict(map_tr_init, "mAP on tr set - init/random model")
        #     write_map_dict(map_val_init, "mAP on val set - init/random model")
        #     write_map_dict(map_zs_init, "mAP on test zs set - init/random model")

        loss_csv.close()

    if EVAL_MAP:

        assert cs.scores_best_train.test_set.keys() == \
               cs.scores_best_train.train_set.keys() == \
               cs.scores_best_train.valid_set.keys() == \
               cs.scores_best_valid.test_set.keys() == \
               cs.scores_best_valid.train_set.keys() == \
               cs.scores_best_valid.valid_set.keys()

        if EVAL_INIT_MODEL_MAP:
            assert cs.scores_best_train.test_set.keys() == \
                   cs.scores_init.test_set.keys() == \
                   cs.scores_init.train_set.keys() == \
                   cs.scores_init.valid_set.keys()

        keys = cs.scores_best_train.test_set.keys()
        for key in keys:

            stats_csv = file(
                os.path.join(folder_gen_path,
                             folder_gen_name + ".{}.csv".format(key)), 'w')
            stats_csv.write('Stats for {}\n\n'.format(key))

            init_model_comma = ', ' if EVAL_INIT_MODEL_MAP else ''
            stats_csv.write(
                ', test over training set, , , , test over validation set, , , , test over test set, , ,, \n'
            )

            stats_csv.write('Model Weights:, '
                            'best tr loss, best val loss, init/random, , '
                            'best tr loss, best val loss, init/random, , '
                            'best tr loss, best val loss, init/random, , \n')
            stats_csv.write('Config index/name, \n')

            for cs in config_scores:
                index = cs.name
                stats_csv.write(
                    '{}, {}, {}, {}, , {}, {}, {}, , {}, {}, {},\n'.format(
                        cs.name, cs.scores_best_train.train_set[key],
                        cs.scores_best_valid.train_set[key],
                        str(cs.scores_init.train_set[key])
                        if EVAL_INIT_MODEL_MAP else '',
                        cs.scores_best_train.valid_set[key],
                        cs.scores_best_valid.valid_set[key],
                        str(cs.scores_init.valid_set[key])
                        if EVAL_INIT_MODEL_MAP else '',
                        cs.scores_best_train.test_set[key],
                        cs.scores_best_valid.test_set[key],
                        str(cs.scores_init.test_set[key])
                        if EVAL_INIT_MODEL_MAP else ''))
def joint_embedding_train(visual_features=cfg_emb.VISUAL_FEATURES_TRAIN,
                          text_features=cfg_emb.TEXT_FEATURES_400,
                          class_list=cfg_emb.CLASS_LIST_400,
                          visual_features_valid=cfg_emb.VISUAL_FEATURES_VALID,
                          visual_features_zs_test=cfg_emb.VISUAL_FEATURES_TEST,
                          text_features_zs_test=cfg_emb.TEXT_FEATURES_100,
                          class_list_test=cfg_emb.CLASS_LIST_100):
    import numpy as np

    print("Loading visual features..")
    visual_features = ImageDataset().load_hdf5(visual_features)
    if visual_features_valid is not None:
        visual_features_valid = ImageDataset().load_hdf5(visual_features_valid)

    print("Loading textual features..")
    if not isinstance(text_features, np.ndarray):
        text_features = np.load(text_features)
    if not isinstance(text_features_zs_test,
                      np.ndarray) and text_features_zs_test is not None:
        text_features_zs_test = np.load(text_features_zs_test)

    if class_list is None:
        class_list = np.unique(visual_features.labels).tolist()
    else:
        class_list = cfg_emb.load_class_list(class_list, int_cast=True)

    if not isinstance(class_list_test, list):
        class_list_test = cfg_emb.load_class_list(class_list_test,
                                                  int_cast=True)

    print("Generating dataset..")

    if class_list is not None:
        cycle_clslst_txfeat = class_list, text_features
    else:
        cycle_clslst_txfeat = enumerate(text_features)

    im_data_train = []
    tx_data_train = []
    label_train = []
    if visual_features_valid is not None:
        im_data_valid = []
        tx_data_valid = []
        label_valid = []

    for lbl, docv in zip(cycle_clslst_txfeat[0], cycle_clslst_txfeat[1]):
        lbl = int(lbl)
        norm_docv = docv / np.linalg.norm(docv)  # l2 normalization

        visual_features_with_label = visual_features.sub_dataset_with_label(
            lbl)
        for visual_feat in visual_features_with_label.data:
            im_data_train.append(visual_feat)
            tx_data_train.append(norm_docv)
            label_train.append(lbl)

        if visual_features_valid is not None:
            visual_features_valid_with_label = visual_features_valid.sub_dataset_with_label(
                lbl)
            for visual_feat in visual_features_valid_with_label.data:
                im_data_valid.append(visual_feat)
                tx_data_valid.append(norm_docv)
                label_valid.append(lbl)

    # Image data conversion
    im_data_train = np.asarray(im_data_train)
    im_data_valid = np.asarray(im_data_valid)
    while len(im_data_train.shape) > 2:
        if im_data_train.shape[-1] == 1:
            im_data_train = np.squeeze(im_data_train, axis=(-1, ))
    while len(im_data_valid.shape) > 2:
        if im_data_valid.shape[-1] == 1:
            im_data_valid = np.squeeze(im_data_valid, axis=(-1, ))

    # Text data conversion
    tx_data_train = np.asarray(tx_data_train)
    tx_data_valid = np.asarray(tx_data_valid)
    while len(tx_data_train.shape) > 2:
        if tx_data_train.shape[-1] == 1:
            tx_data_train = np.squeeze(tx_data_train, axis=(-1, ))
    while len(tx_data_valid.shape) > 2:
        if tx_data_valid.shape[-1] == 1:
            tx_data_valid = np.squeeze(tx_data_valid, axis=(-1, ))

    # Label conversion
    label_train = np.asarray(label_train)
    label_valid = np.asarray(label_valid)

    while len(label_train.shape) > 2:
        if label_train.shape[-1] == 1:
            label_train = np.squeeze(label_train, axis=(-1, ))
    while len(label_valid.shape) > 2:
        if label_valid.shape[-1] == 1:
            label_valid = np.squeeze(label_valid, axis=(-1, ))

    print("Loading model..")

    #path = 'im2doc_embedding/jointmodel_opt-adadelta_lr-100_bs-64-clamoroso?/jointmodel_opt-adadelta_lr-100_bs-64'
    name = "jointmodel"
    path = 'im2doc_embedding/{}/{}'.format(name, name)
    model_path = path + '.model.best.h5'
    #weight_path = path + '.weights.09.h5'
    weight_path = None

    model = JointEmbedder.load_model(model_path=model_path,
                                     weight_path=weight_path)
    top_k = [1, 3, 5, 10]

    print("\nTest traning: ")
    map = retrieve_text_map(visual_features,
                            text_features,
                            class_list,
                            joint_model=model)
    mapi = retrieve_image_map(visual_features,
                              text_features,
                              class_list,
                              joint_model=model)
    print("mAP = " + str(map))
    print("mAPi = " + str(mapi))
    for k in top_k:
        recall = recall_top_k(visual_features,
                              text_features,
                              class_list,
                              joint_model=model,
                              top_k=k,
                              verbose=False,
                              progressbar=False)
        print("recall@{} = {}".format(k, recall))

    print("\nTest validation: ")
    map = retrieve_text_map(visual_features_valid,
                            text_features,
                            class_list,
                            joint_model=model)
    mapi = retrieve_image_map(visual_features_valid,
                              text_features,
                              class_list,
                              joint_model=model)
    print("mAP = " + str(map))
    print("mAPi = " + str(mapi))
    for k in top_k:
        recall = recall_top_k(visual_features_valid,
                              text_features,
                              class_list,
                              joint_model=model,
                              top_k=k,
                              verbose=False,
                              progressbar=False)
        print("recall@{} = {}".format(k, recall))

    print("\nTest zero shot test: ")
    #map = test_joint_map(visual_features_zs_test, text_features_zs_test, class_list_test, joint_model=model)
    map = retrieve_text_map(visual_features_zs_test,
                            text_features_zs_test,
                            class_list_test,
                            joint_model=model)
    print("mAP = " + str(map))
    print("mAPi = " + str(mapi))
    for k in top_k:
        recall = recall_top_k(visual_features_zs_test,
                              text_features_zs_test,
                              class_list_test,
                              joint_model=model,
                              top_k=k,
                              verbose=False,
                              progressbar=False)
        print("recall@{} = {}".format(k, recall))
Example #7
0
def joint_embedding_train(visual_features=cfg_emb.VISUAL_FEATURES_TRAIN,
                          text_features=cfg_emb.TEXT_FEATURES_400,
                          class_list=cfg_emb.CLASS_LIST_400,
                          visual_features_valid=cfg_emb.VISUAL_FEATURES_VALID,
                          visual_features_zs_test=cfg_emb.VISUAL_FEATURES_TEST,
                          text_features_zs_test=cfg_emb.TEXT_FEATURES_100,
                          class_list_test=cfg_emb.CLASS_LIST_100):
    import numpy as np

    print("Loading visual features..")
    visual_features = ImageDataset().load_hdf5(visual_features)
    if visual_features_valid is not None:
        visual_features_valid = ImageDataset().load_hdf5(visual_features_valid)

    print("Loading textual features..")
    if not isinstance(text_features, np.ndarray):
        text_features = np.load(text_features)
    if not isinstance(text_features_zs_test,
                      np.ndarray) and text_features_zs_test is not None:
        text_features_zs_test = np.load(text_features_zs_test)

    if class_list is None:
        class_list = np.unique(visual_features.labels).tolist()
    else:
        class_list = cfg_emb.load_class_list(class_list, int_cast=True)

    if not isinstance(class_list_test, list):
        class_list_test = cfg_emb.load_class_list(class_list_test,
                                                  int_cast=True)

    print("Generating dataset..")

    if class_list is not None:
        cycle_clslst_txfeat = class_list, text_features
    else:
        cycle_clslst_txfeat = enumerate(text_features)

    im_data_train = []
    tx_data_train = []
    label_train = []
    if visual_features_valid is not None:
        im_data_valid = []
        tx_data_valid = []
        label_valid = []

    for lbl, docv in zip(cycle_clslst_txfeat[0], cycle_clslst_txfeat[1]):
        lbl = int(lbl)
        norm_docv = docv / np.linalg.norm(docv)  # l2 normalization

        visual_features_with_label = visual_features.sub_dataset_with_label(
            lbl)
        for visual_feat in visual_features_with_label.data:
            im_data_train.append(visual_feat)
            tx_data_train.append(norm_docv)
            label_train.append(lbl)

        if visual_features_valid is not None:
            visual_features_valid_with_label = visual_features_valid.sub_dataset_with_label(
                lbl)
            for visual_feat in visual_features_valid_with_label.data:
                im_data_valid.append(visual_feat)
                tx_data_valid.append(norm_docv)
                label_valid.append(lbl)

    # Image data conversion
    im_data_train = np.asarray(im_data_train)
    im_data_valid = np.asarray(im_data_valid)
    while len(im_data_train.shape) > 2:
        if im_data_train.shape[-1] == 1:
            im_data_train = np.squeeze(im_data_train, axis=(-1, ))
    while len(im_data_valid.shape) > 2:
        if im_data_valid.shape[-1] == 1:
            im_data_valid = np.squeeze(im_data_valid, axis=(-1, ))

    # Text data conversion
    tx_data_train = np.asarray(tx_data_train)
    tx_data_valid = np.asarray(tx_data_valid)
    while len(tx_data_train.shape) > 2:
        if tx_data_train.shape[-1] == 1:
            tx_data_train = np.squeeze(tx_data_train, axis=(-1, ))
    while len(tx_data_valid.shape) > 2:
        if tx_data_valid.shape[-1] == 1:
            tx_data_valid = np.squeeze(tx_data_valid, axis=(-1, ))

    # Label conversion
    label_train = np.asarray(label_train)
    label_valid = np.asarray(label_valid)

    while len(label_train.shape) > 2:
        if label_train.shape[-1] == 1:
            label_train = np.squeeze(label_train, axis=(-1, ))
    while len(label_valid.shape) > 2:
        if label_valid.shape[-1] == 1:
            label_valid = np.squeeze(label_valid, axis=(-1, ))

    print("Generating model..")

    MONITOR = 'val_loss'

    class Config:
        def __init__(self):
            self.lr = 10
            self.bs = 64
            self.epochs = 50
            self.opt = Adadelta
            self.opt_str = 'adadelta'
            self.joint_space_dim = 200
            self.tx_activation = 'softmax'
            self.im_activation = 'tanh'
            self.tx_hidden_layers = None
            self.tx_hidden_activation = None
            self.im_hidden_layers = None
            self.im_hidden_activation = None
            self.contrastive_loss_weight = 1
            self.logistic_loss_weight = 1
            self.contrastive_loss_weight_inverted = 1
            self.weight_init = 'glorot_uniform'

    # GOT GREAT RESUTLS WITH THIS PARAMS:
    # configs = []
    # c = Config()
    # c.lr = 100
    # c.bs = 64
    # c.epochs = 50
    # c.joint_space_dim = 200
    # c.emb_activation = 'softmax'
    # c.contrastive_loss_weight = 3
    # c.logistic_loss_weight = 1
    # c.weight_init = 'glorot_uniform' # 'glorot_normal'
    #
    # # train_mAP-fit-end: 0.231570111798
    # # valid_mAP-fit-end: 0.36824232778
    # # test_mAP-fit-end: 0.12500124832
    # Epoch 48 / 50
    # loss: 2.8842 - activation_1_loss: 0.7106 - activation_2_loss: 0.7106 - dense_1_loss: 0.7524 - val_loss: 3.0216 - val_activation_1_loss: 0.8354 - val_activation_2_loss: 0.8354 - val_dense_1_loss: 0.5154
    # Epoch 49 / 50
    # loss: 2.7934 - activation_1_loss: 0.6958 - activation_2_loss: 0.6958 - dense_1_loss: 0.7061 - val_loss: 2.6629 - val_activation_1_loss: 0.5755 - val_activation_2_loss: 0.5755 - val_dense_1_loss: 0.9365
    # Epoch 50 / 50
    # loss: 2.7774 - activation_1_loss: 0.6948 - activation_2_loss: 0.6948 - dense_1_loss: 0.6930 - val_loss: 2.7351 - val_activation_1_loss: 0.5661 - val_activation_2_loss: 0.5661 - val_dense_1_loss: 1.0367

    # configs = []
    # c = Config()
    # c.lr = 100
    # c.bs = 64
    # c.epochs = 50
    # c.joint_space_dim = 200
    # c.emb_activation = 'softmax'
    # c.contrastive_loss_weight = 3
    # c.logistic_loss_weight = 1
    # c.weight_init = 'glorot_uniform' # 'glorot_normal'
    # c.tx_hidden_layers = [250]
    # c.tx_hidden_activation = ['relu']
    # c.im_hidden_layers = [500]
    # c.im_hidden_activation = ['tanh']
    configs = []
    c = Config()
    c.lr = 10
    c.bs = 64
    c.epochs = 10
    c.joint_space_dim = 200
    c.tx_activation = 'sigmoid'
    c.im_activation = 'sigmoid'
    c.contrastive_loss_weight = 3
    c.contrastive_loss_weight_inverted = 3
    c.logistic_loss_weight = 1
    c.weight_init = 'glorot_uniform'  # 'glorot_normal'
    # c.tx_hidden_layers = [250]
    # c.tx_hidden_activation = ['relu']
    # c.im_hidden_layers = [500]
    # c.im_hidden_activation = ['tanh']
    # train_mAP-fit-end: 0.501253132832
    # valid_mAP-fit-end: 0.501253132832
    # test_mAP-fit-end: 0.505
    # # ... in realta' abbiamo tutti i vettori delle distanze IDENTICI per questo si hanno questi risultati

    configs.append(c)

    for c in configs:

        print ""
        print("Training model..")
        print "optim:   " + str(c.opt_str)
        print "lr:      " + str(c.lr)

        fname = "jointmodel_opt-{}_lr-{}_bs-{}".format(c.opt_str, c.lr, c.bs)
        # for i, hu in enumerate(hid):
        #     fname += "_hl-" + str(hu)
        folder = os.path.join(cfg_emb.IM2DOC_MODEL_FOLDER, fname)
        if not os.path.isdir(folder):
            os.mkdir(folder)

        fname = os.path.join(folder, fname)

        JE = JointEmbedder(im_dim=im_data_train.shape[-1],
                           tx_dim=tx_data_train.shape[-1],
                           out_dim=c.joint_space_dim,
                           n_text_classes=len(class_list))

        model = JE.model(
            optimizer=c.opt(lr=c.lr),
            tx_activation=c.tx_activation,
            im_activation=c.im_activation,
            tx_hidden_layers=c.tx_hidden_layers,
            tx_hidden_activation=c.tx_hidden_activation,
            im_hidden_layers=c.im_hidden_layers,
            im_hidden_activation=c.im_hidden_activation,
            contrastive_loss_weight=c.contrastive_loss_weight,
            logistic_loss_weight=c.logistic_loss_weight,
            contrastive_loss_weight_inverted=c.
            contrastive_loss_weight_inverted,
            init=c.weight_init,
        )

        #earlystop = EarlyStopping(monitor=MONITOR, min_delta=0.0005, patience=9)
        #reduceLR = ReduceLROnPlateau(monitor=MONITOR, factor=0.1, patience=4, verbose=1, epsilon=0.0005)
        bestpoint = ModelCheckpoint(fname + '.model.best.h5',
                                    monitor=MONITOR,
                                    save_best_only=True)
        checkpoint = ModelCheckpoint(fname + '.weights.{epoch:02d}.h5',
                                     monitor=MONITOR,
                                     save_best_only=False,
                                     save_weights_only=True)

        mAP_tr = ModelMAP(visual_features=visual_features,
                          docs_vectors=text_features,
                          class_list=class_list,
                          data_name='train-set',
                          exe_fit_end=True,
                          recall_at_k=[10])
        mAP_val = ModelMAP(visual_features=visual_features_valid,
                           docs_vectors=text_features,
                           class_list=class_list,
                           data_name='valid-set',
                           exe_fit_end=True,
                           recall_at_k=[10])
        mAP_zs = ModelMAP(visual_features=visual_features_zs_test,
                          docs_vectors=text_features_zs_test,
                          class_list=class_list_test,
                          data_name='test-set',
                          exe_fit_end=True,
                          recall_at_k=[10])

        callbacks = [mAP_tr, mAP_val, mAP_zs, checkpoint,
                     bestpoint]  #, earlystop, ]

        model.summary()

        label_map = {}
        for index, label in enumerate(class_list):
            label_map[label] = index
        size = len(class_list)

        label_train_converted = []
        for l in label_train:
            new_l = np.zeros([size])
            new_l[label_map[l]] = 1
            label_train_converted.append(new_l)
        label_train_converted = np.asarray(label_train_converted)
        label_valid_converted = []
        for l in label_valid:
            new_l = np.zeros([size])
            new_l[label_map[l]] = 1
            label_valid_converted.append(new_l)
        label_valid_converted = np.asarray(label_valid_converted)
        # label_train_converted = np.asarray([label_map[l] for l in label_train])
        # label_valid_converted = np.asarray([label_map[l] for l in label_valid])

        history = model.fit([im_data_train, tx_data_train], [
            label_train, label_train, label_train_converted,
            label_train_converted
        ],
                            validation_data=[[im_data_valid, tx_data_valid],
                                             [
                                                 label_valid, label_valid,
                                                 label_valid_converted,
                                                 label_valid_converted
                                             ]],
                            batch_size=c.bs,
                            nb_epoch=c.epochs,
                            shuffle=True,
                            verbose=1,
                            callbacks=callbacks)

        loss_csv = file(fname + '.loss.csv', 'w')
        hist = history.history
def retrieve_image_map(img_features, txt_features, class_list_doc2vec, joint_model,
                      joint_model_ext=None, joint_model_weights_ext=None, load_precomputed_embedded_feat=None,
                      verbose=False, progressbar=True):
    def printv(str):
        if verbose:
            print(str)

    if joint_model_ext is None:
        joint_model_ext = DEFAULT_JOINT_MODEL_EXT
    if load_precomputed_embedded_feat is None:
        load_precomputed_embedded_feat = False
    else:
        ValueError("load_precomputed_embedded_feat: not yet implemented.")

    printv("Loading visual features..")
    if not isinstance(img_features, ImageDataset):
        img_features = ImageDataset().load_hdf5(img_features)

    printv("Loading joint model..")
    if not isinstance(joint_model, Model):
        joint_model_name = joint_model
        model_file = os.path.join(JOINT_MODEL_FOLDER,
                                  os.path.join(joint_model_name, joint_model_name + joint_model_ext))
        joint_model = load_model(model_file, custom_objects={'cos_distance': cos_distance})
    else:
        joint_model_name = None

    if joint_model_weights_ext is not None:
        printv("Loading joint model weights..")
        weight_file = os.path.join(JOINT_MODEL_FOLDER, os.path.join(joint_model, joint_model + joint_model_weights_ext))
        joint_model.load_weights(weight_file)



    if joint_model_name is not None:
        img_emb_path = os.path.join(JOINT_MODEL_PREDICTIONS_FOLDER, joint_prediction_fname(joint_model_name, 'img'))
        txt_emb_path = os.path.join(JOINT_MODEL_PREDICTIONS_FOLDER, joint_prediction_fname(joint_model_name, 'txt'))
    else:
        img_emb_path = "precomputed_im_emb.img.npy.temp"
        txt_emb_path = "precomputed_tx_emb.txt.npy.temp"

    if load_precomputed_embedded_feat and os.path.exists(img_emb_path)  and os.path.exists(txt_emb_path):
        printv("Pre computed embedding from images and text found... loading...")
        imgs_embedded = np.load(img_emb_path)
        txts_embedded = np.load(txt_emb_path)

    else:
        printv("Predict embedding from images and text(joint model embedding)...")

        img_data = img_features.data
        while len(img_data.shape) > 2:
            if img_data.shape[-1] == 1:
                img_data = np.squeeze(img_data, axis=(-1,))
        img_emb_model = get_sub_model(joint_model, 'img')
        imgs_embedded = img_emb_model.predict(img_data, verbose=verbose)
        np.save(img_emb_path, imgs_embedded)

        txt_data = txt_features
        while len(txt_data.shape) > 2:
            if txt_data.shape[-1] == 1:
                txt_data = np.squeeze(txt_data, axis=(-1,))
        txt_emb_model = get_sub_model(joint_model, 'txt')
        txts_embedded = txt_emb_model.predict(txt_data, verbose=verbose)
        np.save(txt_emb_path, txts_embedded)

        #[a, b, c] = joint_model.predict(x=[img_data[0:len(txt_data)], txt_data], verbose=verbose)

    if not isinstance(class_list_doc2vec, list):
        class_list_doc2vec = load_class_list(class_list_doc2vec)

    # mAP test (optimized with cdist)
    if progressbar:
        import sys
        bar = pyprind.ProgBar(len(txts_embedded), stream = sys.stdout)
    av_prec = []
    from scipy.spatial.distance import cdist

    #C = cdist(txts_embedded, imgs_embedded, 'cos')
    #C = 1-C
    C = -cdist(txts_embedded, imgs_embedded, 'cos')
    #C = (np.sqrt(C.shape[0])-C)/np.sqrt(C.shape[0])

    for i, dv in enumerate(txts_embedded):
        scores = []
        targets = []
        if progressbar:
            bar.update()

        lbl = int(class_list_doc2vec[i])
        for j, im_label in enumerate(img_features.labels):
            target = not bool(im_label[0] - lbl)
            score = C[i,j]
            scores.append(score)
            targets.append(target)

        from sklearn.metrics import average_precision_score
        AP = average_precision_score(targets, scores)
        av_prec.append(AP)
        printv("Class {} - AP = {}".format(lbl, AP))

    mAP = np.mean(np.asarray(av_prec))
    printv("\t\tmAP = {}".format(mAP))
    return mAP
def recall_top_k(img_features, txt_features, class_list_doc2vec,
                 joint_model, joint_model_ext=None, joint_model_weights_ext=None,
                 load_precomputed_embedded_feat=None,
                 top_k=10,
                 verbose=False, progressbar=True):
    def printv(str):
        if verbose:
            print(str)

    if joint_model_ext is None:
        joint_model_ext = DEFAULT_JOINT_MODEL_EXT
    if load_precomputed_embedded_feat is None:
        load_precomputed_embedded_feat = False
    else:
        ValueError("load_precomputed_embedded_feat: not yet implemented.")

    printv("Loading visual features..")
    if not isinstance(img_features, ImageDataset):
        img_features = ImageDataset().load_hdf5(img_features)

    printv("Loading im2doc model..")
    if not isinstance(joint_model, Model):
        joint_model_name = joint_model
        model_file = os.path.join(JOINT_MODEL_FOLDER,
                                  os.path.join(joint_model_name, joint_model_name + joint_model_ext))
        joint_model = load_model(model_file, custom_objects={'cos_distance': cos_distance})
    else:
        joint_model_name = None

    if joint_model_weights_ext is not None:
        printv("Loading im2doc weights..")
        weight_file = os.path.join(JOINT_MODEL_FOLDER, os.path.join(joint_model, joint_model + joint_model_weights_ext))
        joint_model.load_weights(weight_file)



    if joint_model_name is not None:
        img_emb_path = os.path.join(JOINT_MODEL_PREDICTIONS_FOLDER, joint_prediction_fname(joint_model_name, 'img'))
        txt_emb_path = os.path.join(JOINT_MODEL_PREDICTIONS_FOLDER, joint_prediction_fname(joint_model_name, 'txt'))
    else:
        img_emb_path = "precomputed_im_emb.img.npy.temp"
        txt_emb_path = "precomputed_tx_emb.txt.npy.temp"

    if load_precomputed_embedded_feat and os.path.exists(img_emb_path)  and os.path.exists(txt_emb_path):
        printv("Pre computed embedding from images and text found... loading...")
        imgs_embedded = np.load(img_emb_path)
        txts_embedded = np.load(txt_emb_path)

    else:
        printv("Predict embedding from images and text(joint model embedding)...")

        img_data = img_features.data
        while len(img_data.shape) > 2:
            if img_data.shape[-1] == 1:
                img_data = np.squeeze(img_data, axis=(-1,))
        img_emb_model = get_sub_model(joint_model, 'img')
        imgs_embedded = img_emb_model.predict(img_data, verbose=verbose)
        np.save(img_emb_path, imgs_embedded)

        txt_data = txt_features
        while len(txt_data.shape) > 2:
            if txt_data.shape[-1] == 1:
                txt_data = np.squeeze(txt_data, axis=(-1,))
        txt_emb_model = get_sub_model(joint_model, 'txt')
        txts_embedded = txt_emb_model.predict(txt_data, verbose=verbose)
        np.save(txt_emb_path, txts_embedded)

        #[a, b, c] = joint_model.predict(x=[img_data[0:len(txt_data)], txt_data], verbose=verbose)

    if not isinstance(class_list_doc2vec, list):
        class_list_doc2vec = load_class_list(class_list_doc2vec)
    class_list_inverted_doc2vec = {k: i for i,k in enumerate(class_list_doc2vec)}
    # mAP test (optimized with cdist)
    if progressbar:
        import sys
        bar = pyprind.ProgBar(len(imgs_embedded), stream = sys.stdout)

    from scipy.spatial.distance import cdist

    #C = cdist(txts_embedded, imgs_embedded, 'cos')
    #C = 1-C
    C = -cdist(imgs_embedded, txts_embedded, 'euclidean')
    #C = (np.sqrt(C.shape[0])-C)/np.sqrt(C.shape[0])
    recall_per_img = []
    for i, iv in enumerate(imgs_embedded):
        if progressbar:
            bar.update()
        lbl = int(img_features.labels[i])
        arg_lbl = class_list_inverted_doc2vec[lbl]

        dists = C[i, :]
        arg_sort_dist = np.argsort(dists)

        if arg_lbl in arg_sort_dist[0:top_k+1]:
            recall_per_img.append(1)
        else:
            recall_per_img.append(0)


    return np.sum(recall_per_img)/float(len(recall_per_img))
def load_class_list(class_list_doc2vec):
    from E5_embedding.cfg_emb import load_class_list
    return load_class_list(class_list_doc2vec) # TODO
Example #11
0
def im2docvec_wvalid_map(visual_features=cfg_emb.VISUAL_FEATURES_TRAIN,
                         text_features=cfg_emb.TEXT_FEATURES_400,
                         class_list=cfg_emb.CLASS_LIST_400,
                         visual_features_valid=cfg_emb.VISUAL_FEATURES_VALID,
                         visual_features_zs_test=cfg_emb.VISUAL_FEATURES_TEST,
                         text_features_zs_test=cfg_emb.GET_TEXT_FEATURES_100(),
                         class_list_test=cfg_emb.CLASS_LIST_100):
    import numpy as np

    print("Loading visual features..")
    visual_features = ImageDataset().load_hdf5(visual_features)
    if visual_features_valid is not None:
        visual_features_valid = ImageDataset().load_hdf5(visual_features_valid)

    print("Loading textual features..")
    if not isinstance(text_features, np.ndarray):
        text_features = np.load(text_features)
    if not isinstance(text_features_zs_test,
                      np.ndarray) and text_features_zs_test is not None:
        text_features_zs_test = np.load(text_features_zs_test)

    if class_list is None:
        class_list = np.unique(visual_features.labels).tolist()
    else:
        class_list = cfg_emb.load_class_list(class_list)

    if not isinstance(class_list_test, list):
        class_list_test = cfg_emb.load_class_list(class_list_test)

    print("Generating dataset..")

    if class_list is not None:
        cycle_on = class_list, text_features
    else:
        cycle_on = enumerate(text_features)

    data_train = []
    target_train = []
    if visual_features_valid is not None:
        data_valid = []
        target_valid = []

    for lbl, docv in zip(cycle_on[0], cycle_on[1]):
        lbl = int(lbl)
        norm_docv = docv / np.linalg.norm(docv)  # l2 normalization
        visual_features_with_label = visual_features.sub_dataset_with_label(
            lbl)
        for visual_feat in visual_features_with_label.data:
            data_train.append(visual_feat)
            target_train.append(norm_docv)

        if visual_features_valid is not None:
            visual_features_valid_with_label = visual_features_valid.sub_dataset_with_label(
                lbl)
            for visual_feat in visual_features_valid_with_label.data:
                data_valid.append(visual_feat)
                target_valid.append(norm_docv)

    data_train = np.asarray(data_train)
    data_valid = np.asarray(data_valid)

    while len(data_train.shape) > 2:
        if data_train.shape[-1] == 1:
            data_train = np.squeeze(data_train, axis=(-1, ))

    while len(data_valid.shape) > 2:
        if data_valid.shape[-1] == 1:
            data_valid = np.squeeze(data_valid, axis=(-1, ))

    target_train = np.asarray(target_train)
    target_valid = np.asarray(target_valid)

    validation_data = [data_valid, target_valid]

    print("Generating model..")

    EPOCHS = 20
    hiddens = [[1000], [500], [200]]
    #hiddens = [ [2000,1000], ]

    lrs = [10]
    batch_sizes = [32]
    optimizers_str = ['Adadelta']
    optimizers = [Adadelta]

    #hiddens = [ [1000], [2000], [4000], [2000,1000], [4000, 2000], [4000, 2000, 1000]]
    #lrs = [10, 1]
    #batch_sizes = [64, 32, 16]
    #optimizers_str = ['Adadelta', 'Adagrad']
    #optimizers = [Adadelta, Adagrad]

    for hid in hiddens:
        for opt, opt_str in zip(optimizers, optimizers_str):
            for lr in lrs:
                for bs in batch_sizes:

                    print ""
                    print("Training model..")
                    print "hiddens: " + str(hid)
                    print "optim:   " + str(opt_str)
                    print "lr:      " + str(lr)

                    fname = "im2docvec_opt-{}_lr-{}_bs-{}".format(
                        opt_str, lr, bs)
                    for i, hu in enumerate(hid):
                        fname += "_hl-" + str(hu)
                    folder = os.path.join(cfg_emb.IM2DOC_MODEL_FOLDER, fname)
                    if not os.path.isdir(folder):
                        os.mkdir(folder)

                    fname = os.path.join(folder, fname)

                    model = get_model(data_train.shape[1],
                                      target_train.shape[-1], hid)
                    model.compile(optimizer=opt(lr=lr), loss=cos_distance)

                    earlystop = EarlyStopping(monitor=MONITOR,
                                              min_delta=0.0005,
                                              patience=9)
                    reduceLR = ReduceLROnPlateau(monitor=MONITOR,
                                                 factor=0.1,
                                                 patience=4,
                                                 verbose=1,
                                                 epsilon=0.0005)
                    bestpoint = ModelCheckpoint(fname + '.model.best.h5',
                                                monitor=MONITOR,
                                                save_best_only=True)
                    checkpoint = ModelCheckpoint(fname +
                                                 '.weights.{epoch:02d}.h5',
                                                 monitor=MONITOR,
                                                 save_best_only=False,
                                                 save_weights_only=True)

                    #mAP_tr = ModelMAP(visual_features=visual_features, docs_vectors=text_features, class_list=class_list)
                    mAP_val = ModelMAP(visual_features=visual_features_valid,
                                       docs_vectors=text_features,
                                       class_list=class_list,
                                       history_key='val_mAP',
                                       exe_on_train_begin=True,
                                       on_train_begin_key='tr_begin-val_map',
                                       exe_on_batch_end=False,
                                       on_batch_end_key='batch-val_map')
                    mAP_zs = ModelMAP(visual_features=visual_features_zs_test,
                                      docs_vectors=text_features_zs_test,
                                      class_list=class_list_test,
                                      history_key='zs_mAP',
                                      exe_on_train_begin=True,
                                      on_train_begin_key='tr_begin-zs_map',
                                      exe_on_batch_end=False,
                                      on_batch_end_key='batch-zs_map')

                    callbacks = [
                        reduceLR, bestpoint, checkpoint, mAP_val, mAP_zs
                    ]  #, earlystop, ]
                    # mAP = test_embedding_map(visual_features=visual_features_zs_test,
                    #                          class_list_doc2vec=class_list_test,
                    #                          docs_vectors_npy=text_features_zs_test,
                    #                          im2doc_model=model,
                    #                          verbose=False)
                    # print("Pre train mAP: " + str(mAP))
                    history = model.fit(data_train,
                                        target_train,
                                        batch_size=bs,
                                        nb_epoch=EPOCHS,
                                        verbose=1,
                                        shuffle=True,
                                        callbacks=callbacks,
                                        validation_data=validation_data)

                    loss_csv = file(fname + '.loss.csv', 'w')
                    hist = history.history
                    return

                    if 'tr_begin-val_map' in hist.keys():
                        loss_csv.write('val_mAP pre train:, {}\n'.format(
                            hist['tr_begin-val_map'][0]))
                    if 'tr_begin-zs_map' in hist.keys():
                        loss_csv.write('zs_mAP pre train:, {}\n'.format(
                            hist['tr_begin-zs_map'][0]))

                    loss_csv.write(
                        'Epoch, Loss, Val Loss, valid mAP, test mAP\n')
                    epoch = 0
                    for loss, val_loss, val_mAP, zs_mAP in zip(
                            hist['loss'], hist['val_loss'], hist['val_mAP'],
                            hist['zs_mAP']):
                        epoch += 1
                        loss_csv.write(
                            str(epoch) + ', ' + str(loss) + ', ' +
                            str(val_loss) + ', ' + str(val_mAP) + ', ' +
                            str(zs_mAP) + '\n')

                    if 'batch-zs_map' in hist.keys(
                    ) or 'batch-val_map' in hist.keys():
                        loss_csv.write(
                            '\n\n\n\nbatch_size:, {}\n\n'.format(bs))
                        loss_csv.write('Batch, val mAP, test mAP\n')
                        batch = 0
                        for val_mAP, zs_mAP in zip(hist['batch-val_map',
                                                        'batch-zs_map']):
                            batch += 1
                            loss_csv.write('{}, {}, {}\n'.format(
                                batch, str(val_mAP), str(zs_mAP)))
def test_embedding_im_mAP(visual_features, docs_vectors_npy, class_list_doc2vec, im2doc_model,
                          im2doc_model_ext=None, im2doc_weights_ext=None, load_precomputed_imdocs=None,
                          verbose=False, progressbar=True):
    def printv(str):
        if verbose:
            print(str)

    if im2doc_model_ext is None:
        im2doc_model_ext = DEFAULT_IM2DOC_MODEL_EXT
    if load_precomputed_imdocs is None:
        load_precomputed_imdocs = False

    printv("Loading visual features..")
    if not isinstance(visual_features, ImageDataset):
        visual_features = ImageDataset().load_hdf5(visual_features)

    printv("Loading im2doc model..")
    if not isinstance(im2doc_model, Model):
        im2doc_model_name = im2doc_model
        model_file = os.path.join(cfg_emb.IM2DOC_MODEL_FOLDER,
                                  os.path.join(im2doc_model_name, im2doc_model_name + im2doc_model_ext))
        im2doc_model = load_model(model_file, custom_objects={'cos_distance': cos_distance})
    else:
        im2doc_model_name = None

    if im2doc_weights_ext is not None:
        printv("Loading im2doc weights..")
        weight_file = os.path.join(cfg_emb.IM2DOC_MODEL_FOLDER,
                                   os.path.join(im2doc_model, im2doc_model + im2doc_weights_ext))
        im2doc_model.load_weights(weight_file)



    if im2doc_model_name is not None:
        imdocs_path = os.path.join(cfg_emb.IM2DOC_PREDICTION_FOLDER, im2doc_prediction_fname(im2doc_model_name))
    else:
        imdocs_path = "precomputed_imdocs.temp"

    if load_precomputed_imdocs and os.path.exists(imdocs_path):
        printv("Pre computed docs from images found (im2doc embedding)... loading...")
        output_doc_vectors = np.load(imdocs_path)
    else:
        printv("Predict docs from images (im2doc embedding)..")
        im_data = visual_features.data
        while len(im_data.shape) > 2:
            if im_data.shape[-1] == 1:
                im_data = np.squeeze(im_data, axis=(-1,))
        output_doc_vectors = im2doc_model.predict(im_data, verbose=verbose)
        np.save(imdocs_path, output_doc_vectors)

    printv("Loading doc2vec vectors...")
    if not isinstance(docs_vectors_npy, np.ndarray):
        docs_vectors_npy = np.load(docs_vectors_npy)

    if not isinstance(class_list_doc2vec, list):
        class_list_doc2vec = cfg_emb.load_class_list(class_list_doc2vec)

    # mAP test (optimized with cdist)
    if progressbar:
        import sys
        bar = pyprind.ProgBar(len(visual_features.labels), stream = sys.stdout)
    av_prec = []
    from scipy.spatial.distance import cdist
    C = cdist(output_doc_vectors, docs_vectors_npy, 'cosine')
    C = 1-C

    for i, im_label in enumerate(visual_features.labels):
        scores = []
        targets = []
        if progressbar:
            bar.update()
        for j, dv in enumerate(docs_vectors_npy):
            lbl = int(class_list_doc2vec[j])
            target = not bool(im_label - lbl)
            score = C[i, j]
            scores.append(score)
            targets.append(target)
        from sklearn.metrics import average_precision_score
        AP = average_precision_score(targets, scores)
        av_prec.append(AP)
        printv("Class {} - AP = {}".format(lbl, AP))

    mAP = np.mean(np.asarray(av_prec))
    printv("\t\tmAP = {}".format(mAP))
    return mAP
def test_embedding_top_similars(visual_features, docs_vectors_npy, class_list_doc2vec, im2doc_model_name,
                                im2doc_model_ext=None, im2doc_weights_ext=None,
                                load_precomputed_imdocs=None, top_similars=None):
    if im2doc_model_ext is None:
        im2doc_model_ext = DEFAULT_IM2DOC_MODEL_EXT
    if load_precomputed_imdocs is None:
        load_precomputed_imdocs = False
    if top_similars is None:
        top_similars = 10

    print("Loading visual features..")
    if not isinstance(visual_features, ImageDataset):
        visual_features = ImageDataset().load_hdf5(visual_features)


    print("Loading im2doc model..")
    model_file = os.path.join(cfg_emb.IM2DOC_MODEL_FOLDER, os.path.join(im2doc_model_name, im2doc_model_name + im2doc_model_ext))
    im2doc_model = load_model(model_file, custom_objects={'cos_distance': cos_distance})
    if im2doc_weights_ext is not None:
        print("Loading im2doc weights..")
        weight_file = os.path.join(cfg_emb.IM2DOC_MODEL_FOLDER, os.path.join(im2doc_model, im2doc_model + im2doc_weights_ext))
        im2doc_model.load_weights(weight_file)


    imdocs_path = os.path.join(cfg_emb.IM2DOC_PREDICTION_FOLDER, im2doc_prediction_fname(im2doc_model_name))
    if load_precomputed_imdocs and os.path.exists(imdocs_path):
        print("Pre computed docs from images found (im2doc embedding)... loading...")
        output_doc_vectors = np.load(imdocs_path)
    else:
        print("Predict docs from images (im2doc embedding)..")
        im_data = visual_features.data
        while len(im_data.shape) > 2:
            if im_data.shape[-1] == 1:
                im_data = np.squeeze(im_data, axis=(-1,))
        output_doc_vectors = im2doc_model.predict(im_data, verbose=True)
        np.save(imdocs_path, output_doc_vectors)

    print("Loading doc2vec vectors...")
    if not isinstance(docs_vectors_npy, np.ndarray):
        docs_vectors_npy = np.load(docs_vectors_npy)

    # * * * * * * OLD METHOD (use d2v model)  * * * * * *
    # print("Loading doc2vec model..")
    # d2v_model = doc2vec.Doc2Vec.load(DOC2VEC_MODEL)


    if not isinstance(class_list_doc2vec, list):
        class_list_doc2vec = cfg_emb.load_class_list(class_list_doc2vec)

    for index, vec  in enumerate(output_doc_vectors):
        nv = np.asarray(vec)

        # * * * * * * OLD METHOD (use d2v model)  * * * * * *
        # similars_2 = d2v_model.docvecs.most_similar(positive=[nv], topn=10)
        # similars_2 = np.asarray(similars_2, dtype=np.uint32)
        # # Translate class index of doc2vec (executed on a subset of dataset) in class index of original dataset
        # if class_list_doc2vec is not None:
        #     similars_2 = [int(class_list_doc2vec[s]) for s in similars_2[:, 0]]
        # else:
        #     similars_2 = similars[:, 0]
        #

        # * * * * * * NEW METHOD (use only stored vectors)  * * * * * *
        similars, dists = distances(nv, docs_vectors_npy, get_first_n=top_similars)
        similars = [int(class_list_doc2vec[s]) for s in similars[:]]

        # similars = similars_2  # activate the use of the old method (you need also tu uncomment the d2v_model loading)

        fname = visual_features.fnames[index]
        label = visual_features.labels[index]
        label_name = visual_features.labelIntToStr(label)


        # # Print the images
        # sub_d = imdataset.sub_dataset_from_filename(fname)
        # image = sub_d.data[0]
        # image = image.transpose((2, 0, 1))
        # image = image.transpose((2, 0, 1))
        #plt.title("Class: {} - {}".format(label, label_name) )
        #plt.imshow(image)

        print("")
        print("Class: {} - {}".format(label, str(label_name).decode('utf-8')))
        print("Image: " + str(fname).decode('utf-8'))
        print("Top {} similars classes: ".format(top_similars) + str(similars[:]))
        for i in range(0, top_similars):
            print("{} similar class: {} - {} ".format(i+1, str(similars[i]), visual_features.labelIntToStr(similars[i])))