def im2doc(visual_features=cfg_emb.VISUAL_FEATURES_TRAIN,
           text_features=cfg_emb.TEXT_FEATURES_400,
           class_list=cfg_emb.CLASS_LIST_400):
    import numpy as np

    print("Loading visual features..")
    visual_features = ImageDataset().load_hdf5(visual_features)

    print("Loading textual features..")
    text_features = np.load(text_features)

    if class_list is not None:
        class_list = file(class_list, 'r').read().split('\n')
        #class_list.sort()

    print("Generating dataset..")
    data = []
    targets = []

    if class_list is not None:
        cycle_on = class_list, text_features
    else:
        cycle_on = enumerate(text_features)

    for lbl, docv in zip(cycle_on[0], cycle_on[1]):
        lbl = int(lbl)
        visual_features_with_label = visual_features.sub_dataset_with_label(
            lbl)
        for visual_feat in visual_features_with_label.data:
            data.append(visual_feat)
            targets.append(docv)

    data = np.asarray(data)
    while len(data.shape) > 2:
        if data.shape[-1] == 1:
            data = np.squeeze(data, axis=(-1, ))

    targets = np.asarray(targets)

    print("Generating model..")

    EPOCHS = 60
    hiddens = [[2000, 1000], [1000]]
    lrs = [10]
    batch_sizes = [32]
    optimizers_str = ['Adadelta']
    optimizers = [Adadelta]

    #hiddens = [ [1000], [2000], [4000], [2000,1000], [4000, 2000], [4000, 2000, 1000]]
    #lrs = [10, 1]
    #batch_sizes = [64, 32, 16]
    #optimizers_str = ['Adadelta', 'Adagrad']
    #optimizers = [Adadelta, Adagrad]

    for hid in hiddens:
        for opt, opt_str in zip(optimizers, optimizers_str):
            for lr in lrs:
                for bs in batch_sizes:

                    print ""
                    print("Training model..")
                    print "hiddens: " + str(hid)
                    print "optim:   " + str(opt_str)
                    print "lr:      " + str(lr)

                    fname = "video2doc_model_opt-{}_lr-{}_bs-{}".format(
                        opt_str, lr, bs)
                    for i, hu in enumerate(hid):
                        fname += "_hl-" + str(hu)
                    fname = os.path.join(cfg_emb.IM2DOC_MODEL_FOLDER, fname)

                    model = get_model(data.shape[1], targets.shape[-1], hid)
                    model.compile(optimizer=opt(lr=lr), loss=cos_distance)

                    earlystop = EarlyStopping(monitor='loss',
                                              min_delta=0.0005,
                                              patience=9)
                    reduceLR = ReduceLROnPlateau(monitor='loss',
                                                 factor=0.1,
                                                 patience=4,
                                                 verbose=1,
                                                 epsilon=0.0005)
                    bestpoint = ModelCheckpoint(
                        fname + '.weights.{epoch:02d}.loss-{loss:.4f}.h5',
                        monitor='loss',
                        save_best_only=True,
                        save_weights_only=False)
                    bestpoint_wo = ModelCheckpoint(
                        fname + '.weights.{epoch:02d}.loss-{loss:.4f}.h5',
                        monitor='loss',
                        save_best_only=True,
                        save_weights_only=True)
                    callbacks = [earlystop, reduceLR, bestpoint]
                    history = model.fit(data,
                                        targets,
                                        batch_size=64,
                                        nb_epoch=EPOCHS,
                                        verbose=1,
                                        shuffle=True,
                                        callbacks=callbacks)
def joint_embedding_train(config_gen_function=config_gen_TEST,
                          debug_map_val=None):

    visual_features = cfg_emb.VISUAL_FEATURES_TRAIN
    text_features = cfg_emb.TEXT_FEATURES_400
    class_list = cfg_emb.CLASS_LIST_400
    visual_features_valid = cfg_emb.VISUAL_FEATURES_VALID
    visual_features_zs_test = cfg_emb.VISUAL_FEATURES_TEST
    text_features_zs_test = cfg_emb.TEXT_FEATURES_100
    class_list_test = cfg_emb.CLASS_LIST_100
    recall_at_k = [1, 3, 5, 10]

    print("Loading visual features..")
    visual_features = ImageDataset().load_hdf5(visual_features)
    if visual_features_valid is not None:
        visual_features_valid = ImageDataset().load_hdf5(visual_features_valid)

    print("Loading textual features..")
    if not isinstance(text_features, np.ndarray):
        text_features = np.load(text_features)
    if not isinstance(text_features_zs_test,
                      np.ndarray) and text_features_zs_test is not None:
        text_features_zs_test = np.load(text_features_zs_test)

    if class_list is None:
        class_list = np.unique(visual_features.labels).tolist()
    else:
        class_list = cfg_emb.load_class_list(class_list, int_cast=True)

    if not isinstance(class_list_test, list):
        class_list_test = cfg_emb.load_class_list(class_list_test,
                                                  int_cast=True)

    print("Generating dataset..")

    if class_list is not None:
        cycle_clslst_txfeat = class_list, text_features
    else:
        cycle_clslst_txfeat = enumerate(text_features)

    im_data_train = []
    tx_data_train_im_aligned = [
    ]  # 1 text for each image (align: img_lbl_x <-> txt_lbl_x <-> lbl_x )
    tx_data_train = []  # 1 text for each class
    label_train = []
    if visual_features_valid is not None:
        im_data_val = []
        tx_data_valid_im_aligned = []
        label_val = []

    for lbl, docv in zip(cycle_clslst_txfeat[0], cycle_clslst_txfeat[1]):
        lbl = int(lbl)
        norm_docv = docv / np.linalg.norm(docv)  # l2 normalization
        tx_data_train.append(norm_docv)

        visual_features_with_label = visual_features.sub_dataset_with_label(
            lbl)
        for visual_feat in visual_features_with_label.data:
            visual_feat = visual_feat / np.linalg.norm(
                visual_feat)  # l2 normalization
            im_data_train.append(visual_feat)
            tx_data_train_im_aligned.append(norm_docv)
            label_train.append(lbl)

        if visual_features_valid is not None:
            visual_features_valid_with_label = visual_features_valid.sub_dataset_with_label(
                lbl)
            for visual_feat in visual_features_valid_with_label.data:
                visual_feat = visual_feat / np.linalg.norm(
                    visual_feat)  # l2 normalization
                im_data_val.append(visual_feat)
                tx_data_valid_im_aligned.append(norm_docv)
                label_val.append(lbl)

    # Image data conversion
    im_data_train = list_to_ndarray(im_data_train)
    im_data_val = list_to_ndarray(im_data_val)

    # Text data conversion
    tx_data_train = list_to_ndarray(tx_data_train)
    #tx_data_train_im_aligned = list_to_ndarray(tx_data_train_im_aligned)
    #tx_data_valid_im_aligned = list_to_ndarray(tx_data_valid_im_aligned)

    # Label conversion
    label_train = list_to_ndarray(label_train)
    label_val = list_to_ndarray(label_val)

    print("Generating model..")

    configs, config_gen_name = config_gen_function()

    print("Executing training over config generator: " + config_gen_name)
    folder_gen_name = "jointmodel_confgen-" + config_gen_name
    folder_gen_path = os.path.join(JOINT_MODEL_FOLDER, folder_gen_name)
    if not os.path.isdir(folder_gen_path):
        os.mkdir(folder_gen_path)

    class ModelScore:
        def __init__(self,
                     train_set_score=None,
                     valid_set_score=None,
                     test_set_score=None):
            self.train_set = train_set_score
            self.valid_set = valid_set_score
            self.test_set = test_set_score

    class ConfigScore:
        def __init__(self, name=None):
            self.name = name
            self.scores_best_train = ModelScore()
            self.scores_best_valid = ModelScore()
            self.scores_init = ModelScore()

    config_scores = []  # list of Score, one for each config

    for config_counter, c in enumerate(configs):
        if not isinstance(c, Config):
            raise TypeError('c is not an instance of Config class.')

        print("")
        print("")
        print("")
        print("")
        print("Config: ")
        pprint(c)

        fname = folder_gen_name + "__" + str(config_counter)
        folder_path = os.path.join(folder_gen_path, fname)
        fpath = os.path.join(folder_path, fname)
        if not os.path.isdir(folder_path):
            os.mkdir(folder_path)

        c.saveJSON(fpath + '.config.json')

        JE = JointEmbedder(im_dim=im_data_train.shape[-1],
                           tx_dim=tx_data_train.shape[-1],
                           out_dim=c.sp_dim,
                           n_text_classes=len(class_list),
                           use_merge_distance=USE_MERGE_DISTANCE)

        optimizer = c.opt(**c.opt_kwargs)
        model = JE.model(optimizer=optimizer,
                         tx_activation=c.tx_act,
                         im_activation=c.im_act,
                         tx_hidden_layers=c.tx_hid,
                         im_hidden_layers=c.im_hid,
                         contrastive_loss_weight=c.contr_w,
                         logistic_loss_weight=c.log_w_tx,
                         contrastive_loss_weight_inverted=c.contr_inv_w,
                         init=c.w_init,
                         contrastive_loss_margin=c.contr_margin)

        model.summary()

        label_map = {}
        for index, label in enumerate(class_list):
            label_map[label] = index
        size = len(class_list)

        init_model_fname = fpath + '.model.init.random.h5'
        best_valid_fname = fpath + '.model.best.val_loss.h5'
        best_train_fname = fpath + '.model.best.loss.h5'
        model.save(init_model_fname)

        # Creating contrastive training set:
        val_x_im, val_x_tx, val_y_contr, val_y_log = get_contr_data_batch(
            im_data_val,
            tx_data_train,
            label_val,
            class_list,
            no_contrastive=DISABLE_CONTRASTIVE,
            shuffle=True,
            bs=c.bs)
        val_X = [val_x_im, val_x_tx]
        val_Y = [val_y_contr, val_y_contr, val_y_contr, val_y_log]

        best_loss = best_val_loss = float('inf')
        best_loss_epoch = -1
        best_val_loss_epoch = -1
        loss_hist = []
        val_loss_hist = []

        for ep in range(0, c.epochs):
            print("Epoch: {}/{}".format(ep, c.epochs - 1))

            checpoint_path = fpath + ".weights.{:03d}.h5".format(ep)
            checkpoint = ModelCheckpoint(checpoint_path,
                                         monitor='val_loss',
                                         save_best_only=False,
                                         save_weights_only=True)

            x_im, x_tx, y_cont, y_log = get_contr_data_batch(
                im_data_train,
                tx_data_train,
                label_train,
                class_list,
                no_contrastive=DISABLE_CONTRASTIVE,
                shuffle=True,
                bs=c.bs)
            X = [x_im, x_tx]
            Y = [y_cont, y_cont, y_cont, y_log]
            calls = c.callbacks
            calls.append(checkpoint)
            hs = model.fit(X,
                           Y,
                           c.bs,
                           nb_epoch=1,
                           validation_data=[val_X, val_Y],
                           shuffle=False,
                           callbacks=calls)
            # FIT !!! TRAINING

            hist = hs.history
            val_loss = hist['val_loss'][0]
            loss = hist['loss'][0]
            val_loss_hist.append(val_loss)
            loss_hist.append(loss)

            if loss < best_loss:
                best_loss = loss
                model.save(best_train_fname)
                best_loss_epoch = ep
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                model.save(best_valid_fname)
                best_val_loss_epoch = ep

        loss_csv = file(fpath + ".loss.csv", 'w')
        loss_csv.write('Learning curves (loss),Epoch, Loss, Val Loss\n')

        if EVAL_INIT_MODEL_LOSS:
            x_im, x_tx, y_cont, y_log = get_contr_data(im_data_train,
                                                       tx_data_train,
                                                       label_train, class_list)
            X = [x_im, x_tx]
            Y = [y_cont, y_cont, y_cont, y_log]
            init_loss = model.evaluate(X, Y, batch_size=c.bs)[0]
            init_val_loss = model.evaluate(val_X, val_Y, batch_size=c.bs)[0]
            loss_csv.write(', {}, {}, {}\n'.format(-1, init_loss,
                                                   init_val_loss))

        epoch = 0
        for loss, val_loss in zip(loss_hist, val_loss_hist):
            loss_csv.write(", {}, {}, {}\n".format(epoch, loss, val_loss))
            epoch += 1
        loss_csv.write("\n\n\n")
        loss_csv.write("Best loss epoch:, {}, \n".format(best_loss_epoch))
        loss_csv.write(
            "Best val loss epoch:, {}\n".format(best_val_loss_epoch))

        if EVAL_MAP:
            map_call_tr = ModelMAP(visual_features=visual_features,
                                   docs_vectors=text_features,
                                   class_list=class_list,
                                   data_name='TrainSet',
                                   text_retrieval_map=True,
                                   image_retrieval_map=True,
                                   recall_at_k=recall_at_k,
                                   debug_value=debug_map_val)

            map_call_val = ModelMAP(visual_features=visual_features_valid,
                                    docs_vectors=text_features,
                                    class_list=class_list,
                                    data_name='ValidSet',
                                    text_retrieval_map=True,
                                    image_retrieval_map=True,
                                    recall_at_k=recall_at_k,
                                    debug_value=debug_map_val)

            map_call_zs = ModelMAP(visual_features=visual_features_zs_test,
                                   docs_vectors=text_features_zs_test,
                                   class_list=class_list_test,
                                   data_name='TestSetZS',
                                   text_retrieval_map=True,
                                   image_retrieval_map=True,
                                   recall_at_k=recall_at_k,
                                   debug_value=debug_map_val)

            # Map on best loss model
            best_train_model = JointEmbedder.load_model(best_train_fname)
            map_tr_best_train = map_call_tr.call_manual(best_train_model)
            map_val_best_train = map_call_val.call_manual(best_train_model)
            map_zs_best_train = map_call_zs.call_manual(best_train_model)

            score_best_train = ModelScore(map_tr_best_train,
                                          map_val_best_train,
                                          map_zs_best_train)

            # Map on best val_loss model
            best_valid_model = JointEmbedder.load_model(best_valid_fname)
            map_tr_best_valid = map_call_tr.call_manual(best_valid_model)
            map_val_best_valid = map_call_val.call_manual(best_valid_model)
            map_zs_best_valid = map_call_zs.call_manual(best_valid_model)

            score_best_valid = ModelScore(map_tr_best_valid,
                                          map_val_best_valid,
                                          map_zs_best_valid)

            list_map_labels = ["Best Tr Loss", "Best Val Loss"]
            list_map_dict_tr = [map_tr_best_train, map_tr_best_valid]
            list_map_dict_val = [map_val_best_train, map_val_best_valid]
            list_map_dict_zs = [map_zs_best_train, map_zs_best_valid]

            score_init = None

            if EVAL_INIT_MODEL_MAP:
                # Map on init/random model
                init_model = JointEmbedder.load_model(init_model_fname)
                map_tr_init = map_call_tr.call_manual(init_model)
                map_val_init = map_call_val.call_manual(init_model)
                map_zs_init = map_call_zs.call_manual(init_model)
                list_map_labels.append("Init/Random")
                list_map_dict_tr.append(map_tr_init)
                list_map_dict_val.append(map_val_init)
                list_map_dict_zs.append(map_zs_init)

                score_init = ModelScore(map_tr_init, map_val_init, map_zs_init)

            cs = ConfigScore(name=str(config_counter))
            cs.scores_best_train = score_best_train
            cs.scores_best_valid = score_best_valid
            cs.scores_init = score_init
            config_scores.append(cs)

            loss_csv.write("\n\n\n\n")

            loss_csv.write(", Loaded models/weights:, ")
            for l in list_map_labels:
                loss_csv.write("{}, ".format(l))
            loss_csv.write("\n")

            loss_csv.write("\nmAP over training set, ")
            for key in map_tr_best_train.keys():
                loss_csv.write("{}, ".format(key))
                for map_dict in list_map_dict_tr:
                    loss_csv.write("{}, ".format(map_dict[key]))
                loss_csv.write("\n, ")

            loss_csv.write("\nmAP over validation set, ")
            for key in map_tr_best_train.keys():
                loss_csv.write("{}, ".format(key))
                for map_dict in list_map_dict_val:
                    loss_csv.write("{}, ".format(map_dict[key]))
                loss_csv.write("\n, ")

            loss_csv.write("\nmAP over zs-test set, ")
            for key in map_tr_best_train.keys():
                loss_csv.write("{}, ".format(key))
                for map_dict in list_map_dict_zs:
                    loss_csv.write("{}, ".format(map_dict[key]))
                loss_csv.write("\n, ")

        #
        # def write_map_dict(map_dict, str):
        #         loss_csv.write("\n" + str)
        #         for k, v in map_dict.items():
        #             loss_csv.write(",{}, {}\n".format(k, v))
        #
        #     write_map_dict(map_tr_best_train, "mAP on tr set - best loss model")
        #     write_map_dict(map_val_best_train, "mAP on val set - best loss model")
        #     write_map_dict(map_zs_best_train, "mAP on test zs set - best loss model")
        #     loss_csv.write("\n")
        #     write_map_dict(map_tr_best_valid, "mAP on tr set - best valid loss model")
        #     write_map_dict(map_val_best_valid, "mAP on val set - best valid loss model")
        #     write_map_dict(map_zs_best_valid, "mAP on test zs set - best valid loss model")
        #     loss_csv.write("\n")
        #     write_map_dict(map_tr_init, "mAP on tr set - init/random model")
        #     write_map_dict(map_val_init, "mAP on val set - init/random model")
        #     write_map_dict(map_zs_init, "mAP on test zs set - init/random model")

        loss_csv.close()

    if EVAL_MAP:

        assert cs.scores_best_train.test_set.keys() == \
               cs.scores_best_train.train_set.keys() == \
               cs.scores_best_train.valid_set.keys() == \
               cs.scores_best_valid.test_set.keys() == \
               cs.scores_best_valid.train_set.keys() == \
               cs.scores_best_valid.valid_set.keys()

        if EVAL_INIT_MODEL_MAP:
            assert cs.scores_best_train.test_set.keys() == \
                   cs.scores_init.test_set.keys() == \
                   cs.scores_init.train_set.keys() == \
                   cs.scores_init.valid_set.keys()

        keys = cs.scores_best_train.test_set.keys()
        for key in keys:

            stats_csv = file(
                os.path.join(folder_gen_path,
                             folder_gen_name + ".{}.csv".format(key)), 'w')
            stats_csv.write('Stats for {}\n\n'.format(key))

            init_model_comma = ', ' if EVAL_INIT_MODEL_MAP else ''
            stats_csv.write(
                ', test over training set, , , , test over validation set, , , , test over test set, , ,, \n'
            )

            stats_csv.write('Model Weights:, '
                            'best tr loss, best val loss, init/random, , '
                            'best tr loss, best val loss, init/random, , '
                            'best tr loss, best val loss, init/random, , \n')
            stats_csv.write('Config index/name, \n')

            for cs in config_scores:
                index = cs.name
                stats_csv.write(
                    '{}, {}, {}, {}, , {}, {}, {}, , {}, {}, {},\n'.format(
                        cs.name, cs.scores_best_train.train_set[key],
                        cs.scores_best_valid.train_set[key],
                        str(cs.scores_init.train_set[key])
                        if EVAL_INIT_MODEL_MAP else '',
                        cs.scores_best_train.valid_set[key],
                        cs.scores_best_valid.valid_set[key],
                        str(cs.scores_init.valid_set[key])
                        if EVAL_INIT_MODEL_MAP else '',
                        cs.scores_best_train.test_set[key],
                        cs.scores_best_valid.test_set[key],
                        str(cs.scores_init.test_set[key])
                        if EVAL_INIT_MODEL_MAP else ''))
def joint_embedding_train(visual_features=cfg_emb.VISUAL_FEATURES_TRAIN,
                          text_features=cfg_emb.TEXT_FEATURES_400,
                          class_list=cfg_emb.CLASS_LIST_400,
                          visual_features_valid=cfg_emb.VISUAL_FEATURES_VALID,
                          visual_features_zs_test=cfg_emb.VISUAL_FEATURES_TEST,
                          text_features_zs_test=cfg_emb.TEXT_FEATURES_100,
                          class_list_test=cfg_emb.CLASS_LIST_100):
    import numpy as np

    print("Loading visual features..")
    visual_features = ImageDataset().load_hdf5(visual_features)
    if visual_features_valid is not None:
        visual_features_valid = ImageDataset().load_hdf5(visual_features_valid)

    print("Loading textual features..")
    if not isinstance(text_features, np.ndarray):
        text_features = np.load(text_features)
    if not isinstance(text_features_zs_test,
                      np.ndarray) and text_features_zs_test is not None:
        text_features_zs_test = np.load(text_features_zs_test)

    if class_list is None:
        class_list = np.unique(visual_features.labels).tolist()
    else:
        class_list = cfg_emb.load_class_list(class_list, int_cast=True)

    if not isinstance(class_list_test, list):
        class_list_test = cfg_emb.load_class_list(class_list_test,
                                                  int_cast=True)

    print("Generating dataset..")

    if class_list is not None:
        cycle_clslst_txfeat = class_list, text_features
    else:
        cycle_clslst_txfeat = enumerate(text_features)

    im_data_train = []
    tx_data_train = []
    label_train = []
    if visual_features_valid is not None:
        im_data_valid = []
        tx_data_valid = []
        label_valid = []

    for lbl, docv in zip(cycle_clslst_txfeat[0], cycle_clslst_txfeat[1]):
        lbl = int(lbl)
        norm_docv = docv / np.linalg.norm(docv)  # l2 normalization

        visual_features_with_label = visual_features.sub_dataset_with_label(
            lbl)
        for visual_feat in visual_features_with_label.data:
            im_data_train.append(visual_feat)
            tx_data_train.append(norm_docv)
            label_train.append(lbl)

        if visual_features_valid is not None:
            visual_features_valid_with_label = visual_features_valid.sub_dataset_with_label(
                lbl)
            for visual_feat in visual_features_valid_with_label.data:
                im_data_valid.append(visual_feat)
                tx_data_valid.append(norm_docv)
                label_valid.append(lbl)

    # Image data conversion
    im_data_train = np.asarray(im_data_train)
    im_data_valid = np.asarray(im_data_valid)
    while len(im_data_train.shape) > 2:
        if im_data_train.shape[-1] == 1:
            im_data_train = np.squeeze(im_data_train, axis=(-1, ))
    while len(im_data_valid.shape) > 2:
        if im_data_valid.shape[-1] == 1:
            im_data_valid = np.squeeze(im_data_valid, axis=(-1, ))

    # Text data conversion
    tx_data_train = np.asarray(tx_data_train)
    tx_data_valid = np.asarray(tx_data_valid)
    while len(tx_data_train.shape) > 2:
        if tx_data_train.shape[-1] == 1:
            tx_data_train = np.squeeze(tx_data_train, axis=(-1, ))
    while len(tx_data_valid.shape) > 2:
        if tx_data_valid.shape[-1] == 1:
            tx_data_valid = np.squeeze(tx_data_valid, axis=(-1, ))

    # Label conversion
    label_train = np.asarray(label_train)
    label_valid = np.asarray(label_valid)

    while len(label_train.shape) > 2:
        if label_train.shape[-1] == 1:
            label_train = np.squeeze(label_train, axis=(-1, ))
    while len(label_valid.shape) > 2:
        if label_valid.shape[-1] == 1:
            label_valid = np.squeeze(label_valid, axis=(-1, ))

    print("Loading model..")

    #path = 'im2doc_embedding/jointmodel_opt-adadelta_lr-100_bs-64-clamoroso?/jointmodel_opt-adadelta_lr-100_bs-64'
    name = "jointmodel"
    path = 'im2doc_embedding/{}/{}'.format(name, name)
    model_path = path + '.model.best.h5'
    #weight_path = path + '.weights.09.h5'
    weight_path = None

    model = JointEmbedder.load_model(model_path=model_path,
                                     weight_path=weight_path)
    top_k = [1, 3, 5, 10]

    print("\nTest traning: ")
    map = retrieve_text_map(visual_features,
                            text_features,
                            class_list,
                            joint_model=model)
    mapi = retrieve_image_map(visual_features,
                              text_features,
                              class_list,
                              joint_model=model)
    print("mAP = " + str(map))
    print("mAPi = " + str(mapi))
    for k in top_k:
        recall = recall_top_k(visual_features,
                              text_features,
                              class_list,
                              joint_model=model,
                              top_k=k,
                              verbose=False,
                              progressbar=False)
        print("recall@{} = {}".format(k, recall))

    print("\nTest validation: ")
    map = retrieve_text_map(visual_features_valid,
                            text_features,
                            class_list,
                            joint_model=model)
    mapi = retrieve_image_map(visual_features_valid,
                              text_features,
                              class_list,
                              joint_model=model)
    print("mAP = " + str(map))
    print("mAPi = " + str(mapi))
    for k in top_k:
        recall = recall_top_k(visual_features_valid,
                              text_features,
                              class_list,
                              joint_model=model,
                              top_k=k,
                              verbose=False,
                              progressbar=False)
        print("recall@{} = {}".format(k, recall))

    print("\nTest zero shot test: ")
    #map = test_joint_map(visual_features_zs_test, text_features_zs_test, class_list_test, joint_model=model)
    map = retrieve_text_map(visual_features_zs_test,
                            text_features_zs_test,
                            class_list_test,
                            joint_model=model)
    print("mAP = " + str(map))
    print("mAPi = " + str(mapi))
    for k in top_k:
        recall = recall_top_k(visual_features_zs_test,
                              text_features_zs_test,
                              class_list_test,
                              joint_model=model,
                              top_k=k,
                              verbose=False,
                              progressbar=False)
        print("recall@{} = {}".format(k, recall))
def clustering_dbscan(X):  # NOT WORKING
    import numpy as np
    # Compute DBSCAN
    db = DBSCAN(eps=0.35, min_samples=2, metric='cosine',
                algorithm='brute').fit(X)
    core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
    core_samples_mask[db.core_sample_indices_] = True
    return db.labels_


from config import cfg
from imdataset import ImageDataset
cfg.init()
dataset = ImageDataset().load_hdf5(
    "shallow_extracted_features/shallow_feat_dbp3120_train_ds.h5")
dataset_sub = dataset.sub_dataset_with_label(3115)
# good examples: 1843, 123, 43, 422
# hard examples: 421, 843, 93, 927

# 3115 (monument) --> decent with kmeans 3 clusters
# 3118 (monument) --> best MS quantile=0.20
# 1843, 123, 43, 422 --> perfect with: db = DBSCAN(eps=900, min_samples=2, metric='cityblock').fit(X)

# impressive! db = DBSCAN(eps=0.35, min_samples=2, metric='cosine', algorithm='brute').fit(X)
# Perfect: 2910 (1 cluster with eps=0.40, 2 clusters with eps=0.35)
# 2908: good with 0.40
# Perfect: 1843, 423, 422, 43, 123, 2976, 290, 963
# Excellent! 3115 (monument)
# Good: 927, 3099, 1954, 1378, 1143
# Good/excellent, but create 2 good clusters: 843
# Decent: 421, 1984
Beispiel #5
0
def joint_embedding_train(visual_features=cfg_emb.VISUAL_FEATURES_TRAIN,
                          text_features=cfg_emb.TEXT_FEATURES_400,
                          class_list=cfg_emb.CLASS_LIST_400,
                          visual_features_valid=cfg_emb.VISUAL_FEATURES_VALID,
                          visual_features_zs_test=cfg_emb.VISUAL_FEATURES_TEST,
                          text_features_zs_test=cfg_emb.TEXT_FEATURES_100,
                          class_list_test=cfg_emb.CLASS_LIST_100):
    import numpy as np

    print("Loading visual features..")
    visual_features = ImageDataset().load_hdf5(visual_features)
    if visual_features_valid is not None:
        visual_features_valid = ImageDataset().load_hdf5(visual_features_valid)

    print("Loading textual features..")
    if not isinstance(text_features, np.ndarray):
        text_features = np.load(text_features)
    if not isinstance(text_features_zs_test,
                      np.ndarray) and text_features_zs_test is not None:
        text_features_zs_test = np.load(text_features_zs_test)

    if class_list is None:
        class_list = np.unique(visual_features.labels).tolist()
    else:
        class_list = cfg_emb.load_class_list(class_list, int_cast=True)

    if not isinstance(class_list_test, list):
        class_list_test = cfg_emb.load_class_list(class_list_test,
                                                  int_cast=True)

    print("Generating dataset..")

    if class_list is not None:
        cycle_clslst_txfeat = class_list, text_features
    else:
        cycle_clslst_txfeat = enumerate(text_features)

    im_data_train = []
    tx_data_train = []
    label_train = []
    if visual_features_valid is not None:
        im_data_valid = []
        tx_data_valid = []
        label_valid = []

    for lbl, docv in zip(cycle_clslst_txfeat[0], cycle_clslst_txfeat[1]):
        lbl = int(lbl)
        norm_docv = docv / np.linalg.norm(docv)  # l2 normalization

        visual_features_with_label = visual_features.sub_dataset_with_label(
            lbl)
        for visual_feat in visual_features_with_label.data:
            im_data_train.append(visual_feat)
            tx_data_train.append(norm_docv)
            label_train.append(lbl)

        if visual_features_valid is not None:
            visual_features_valid_with_label = visual_features_valid.sub_dataset_with_label(
                lbl)
            for visual_feat in visual_features_valid_with_label.data:
                im_data_valid.append(visual_feat)
                tx_data_valid.append(norm_docv)
                label_valid.append(lbl)

    # Image data conversion
    im_data_train = np.asarray(im_data_train)
    im_data_valid = np.asarray(im_data_valid)
    while len(im_data_train.shape) > 2:
        if im_data_train.shape[-1] == 1:
            im_data_train = np.squeeze(im_data_train, axis=(-1, ))
    while len(im_data_valid.shape) > 2:
        if im_data_valid.shape[-1] == 1:
            im_data_valid = np.squeeze(im_data_valid, axis=(-1, ))

    # Text data conversion
    tx_data_train = np.asarray(tx_data_train)
    tx_data_valid = np.asarray(tx_data_valid)
    while len(tx_data_train.shape) > 2:
        if tx_data_train.shape[-1] == 1:
            tx_data_train = np.squeeze(tx_data_train, axis=(-1, ))
    while len(tx_data_valid.shape) > 2:
        if tx_data_valid.shape[-1] == 1:
            tx_data_valid = np.squeeze(tx_data_valid, axis=(-1, ))

    # Label conversion
    label_train = np.asarray(label_train)
    label_valid = np.asarray(label_valid)

    while len(label_train.shape) > 2:
        if label_train.shape[-1] == 1:
            label_train = np.squeeze(label_train, axis=(-1, ))
    while len(label_valid.shape) > 2:
        if label_valid.shape[-1] == 1:
            label_valid = np.squeeze(label_valid, axis=(-1, ))

    print("Generating model..")

    MONITOR = 'val_loss'

    class Config:
        def __init__(self):
            self.lr = 10
            self.bs = 64
            self.epochs = 50
            self.opt = Adadelta
            self.opt_str = 'adadelta'
            self.joint_space_dim = 200
            self.tx_activation = 'softmax'
            self.im_activation = 'tanh'
            self.tx_hidden_layers = None
            self.tx_hidden_activation = None
            self.im_hidden_layers = None
            self.im_hidden_activation = None
            self.contrastive_loss_weight = 1
            self.logistic_loss_weight = 1
            self.contrastive_loss_weight_inverted = 1
            self.weight_init = 'glorot_uniform'

    # GOT GREAT RESUTLS WITH THIS PARAMS:
    # configs = []
    # c = Config()
    # c.lr = 100
    # c.bs = 64
    # c.epochs = 50
    # c.joint_space_dim = 200
    # c.emb_activation = 'softmax'
    # c.contrastive_loss_weight = 3
    # c.logistic_loss_weight = 1
    # c.weight_init = 'glorot_uniform' # 'glorot_normal'
    #
    # # train_mAP-fit-end: 0.231570111798
    # # valid_mAP-fit-end: 0.36824232778
    # # test_mAP-fit-end: 0.12500124832
    # Epoch 48 / 50
    # loss: 2.8842 - activation_1_loss: 0.7106 - activation_2_loss: 0.7106 - dense_1_loss: 0.7524 - val_loss: 3.0216 - val_activation_1_loss: 0.8354 - val_activation_2_loss: 0.8354 - val_dense_1_loss: 0.5154
    # Epoch 49 / 50
    # loss: 2.7934 - activation_1_loss: 0.6958 - activation_2_loss: 0.6958 - dense_1_loss: 0.7061 - val_loss: 2.6629 - val_activation_1_loss: 0.5755 - val_activation_2_loss: 0.5755 - val_dense_1_loss: 0.9365
    # Epoch 50 / 50
    # loss: 2.7774 - activation_1_loss: 0.6948 - activation_2_loss: 0.6948 - dense_1_loss: 0.6930 - val_loss: 2.7351 - val_activation_1_loss: 0.5661 - val_activation_2_loss: 0.5661 - val_dense_1_loss: 1.0367

    # configs = []
    # c = Config()
    # c.lr = 100
    # c.bs = 64
    # c.epochs = 50
    # c.joint_space_dim = 200
    # c.emb_activation = 'softmax'
    # c.contrastive_loss_weight = 3
    # c.logistic_loss_weight = 1
    # c.weight_init = 'glorot_uniform' # 'glorot_normal'
    # c.tx_hidden_layers = [250]
    # c.tx_hidden_activation = ['relu']
    # c.im_hidden_layers = [500]
    # c.im_hidden_activation = ['tanh']
    configs = []
    c = Config()
    c.lr = 10
    c.bs = 64
    c.epochs = 10
    c.joint_space_dim = 200
    c.tx_activation = 'sigmoid'
    c.im_activation = 'sigmoid'
    c.contrastive_loss_weight = 3
    c.contrastive_loss_weight_inverted = 3
    c.logistic_loss_weight = 1
    c.weight_init = 'glorot_uniform'  # 'glorot_normal'
    # c.tx_hidden_layers = [250]
    # c.tx_hidden_activation = ['relu']
    # c.im_hidden_layers = [500]
    # c.im_hidden_activation = ['tanh']
    # train_mAP-fit-end: 0.501253132832
    # valid_mAP-fit-end: 0.501253132832
    # test_mAP-fit-end: 0.505
    # # ... in realta' abbiamo tutti i vettori delle distanze IDENTICI per questo si hanno questi risultati

    configs.append(c)

    for c in configs:

        print ""
        print("Training model..")
        print "optim:   " + str(c.opt_str)
        print "lr:      " + str(c.lr)

        fname = "jointmodel_opt-{}_lr-{}_bs-{}".format(c.opt_str, c.lr, c.bs)
        # for i, hu in enumerate(hid):
        #     fname += "_hl-" + str(hu)
        folder = os.path.join(cfg_emb.IM2DOC_MODEL_FOLDER, fname)
        if not os.path.isdir(folder):
            os.mkdir(folder)

        fname = os.path.join(folder, fname)

        JE = JointEmbedder(im_dim=im_data_train.shape[-1],
                           tx_dim=tx_data_train.shape[-1],
                           out_dim=c.joint_space_dim,
                           n_text_classes=len(class_list))

        model = JE.model(
            optimizer=c.opt(lr=c.lr),
            tx_activation=c.tx_activation,
            im_activation=c.im_activation,
            tx_hidden_layers=c.tx_hidden_layers,
            tx_hidden_activation=c.tx_hidden_activation,
            im_hidden_layers=c.im_hidden_layers,
            im_hidden_activation=c.im_hidden_activation,
            contrastive_loss_weight=c.contrastive_loss_weight,
            logistic_loss_weight=c.logistic_loss_weight,
            contrastive_loss_weight_inverted=c.
            contrastive_loss_weight_inverted,
            init=c.weight_init,
        )

        #earlystop = EarlyStopping(monitor=MONITOR, min_delta=0.0005, patience=9)
        #reduceLR = ReduceLROnPlateau(monitor=MONITOR, factor=0.1, patience=4, verbose=1, epsilon=0.0005)
        bestpoint = ModelCheckpoint(fname + '.model.best.h5',
                                    monitor=MONITOR,
                                    save_best_only=True)
        checkpoint = ModelCheckpoint(fname + '.weights.{epoch:02d}.h5',
                                     monitor=MONITOR,
                                     save_best_only=False,
                                     save_weights_only=True)

        mAP_tr = ModelMAP(visual_features=visual_features,
                          docs_vectors=text_features,
                          class_list=class_list,
                          data_name='train-set',
                          exe_fit_end=True,
                          recall_at_k=[10])
        mAP_val = ModelMAP(visual_features=visual_features_valid,
                           docs_vectors=text_features,
                           class_list=class_list,
                           data_name='valid-set',
                           exe_fit_end=True,
                           recall_at_k=[10])
        mAP_zs = ModelMAP(visual_features=visual_features_zs_test,
                          docs_vectors=text_features_zs_test,
                          class_list=class_list_test,
                          data_name='test-set',
                          exe_fit_end=True,
                          recall_at_k=[10])

        callbacks = [mAP_tr, mAP_val, mAP_zs, checkpoint,
                     bestpoint]  #, earlystop, ]

        model.summary()

        label_map = {}
        for index, label in enumerate(class_list):
            label_map[label] = index
        size = len(class_list)

        label_train_converted = []
        for l in label_train:
            new_l = np.zeros([size])
            new_l[label_map[l]] = 1
            label_train_converted.append(new_l)
        label_train_converted = np.asarray(label_train_converted)
        label_valid_converted = []
        for l in label_valid:
            new_l = np.zeros([size])
            new_l[label_map[l]] = 1
            label_valid_converted.append(new_l)
        label_valid_converted = np.asarray(label_valid_converted)
        # label_train_converted = np.asarray([label_map[l] for l in label_train])
        # label_valid_converted = np.asarray([label_map[l] for l in label_valid])

        history = model.fit([im_data_train, tx_data_train], [
            label_train, label_train, label_train_converted,
            label_train_converted
        ],
                            validation_data=[[im_data_valid, tx_data_valid],
                                             [
                                                 label_valid, label_valid,
                                                 label_valid_converted,
                                                 label_valid_converted
                                             ]],
                            batch_size=c.bs,
                            nb_epoch=c.epochs,
                            shuffle=True,
                            verbose=1,
                            callbacks=callbacks)

        loss_csv = file(fname + '.loss.csv', 'w')
        hist = history.history
def dnn2docvec(dataset=cfg.dataset,
               feat_net='resnet50',
               shallow_net=ShallowNetBuilder().A()):
    import numpy as np

    print("Loading visual features..")
    SL = ShallowLoader(shallow_training_set, feat_net)
    shallow_net.load(SL, )
    visual_features = pruned_feat_dataset(
        dataset,
        feat_net,
    )
    visual_features = ImageDataset().load_hdf5(
        "shallow_extracted_features/shallow_feat_dbp3120_train_ds.h5")
    print("Loading textual features..")
    textual_features = np.load("doc2vec_dbpedia_vectors.npy")

    print("Generating dataset..")
    data = []
    labels = []
    for lbl, docv in enumerate(textual_features):
        visual_features_with_label = visual_features.sub_dataset_with_label(
            lbl)
        for visual_feat in visual_features_with_label.data:
            data.append(visual_feat)
            labels.append(docv)

    data = np.asarray(data)
    labels = np.asarray(labels)

    print("Generating model..")

    EPOCHS = 40
    hiddens = [[1000], [2000], [4000], [2000, 1000], [4000, 2000],
               [4000, 2000, 1000]]
    lrs = [10, 1]
    batch_sizes = [64, 32, 16]
    optimizers_str = ['Adadelta', 'Adagrad']
    optimizers = [Adadelta, Adagrad]
    for hid in hiddens:
        for opt, opt_str in zip(optimizers, optimizers_str):
            for lr in lrs:
                for bs in batch_sizes:

                    print ""
                    print("Training model..")
                    print "hiddens: " + str(hid)
                    print "optim:   " + str(opt_str)
                    print "lr:      " + str(lr)

                    fname = "video2doc_model_opt-{}_lr-{}_bs-{}".format(
                        opt_str, lr, bs)
                    for i, hu in enumerate(hid):
                        fname += "_hl-" + str(hu)

                    model = get_model(data.shape[-1], labels.shape[-1], hid)
                    model.compile(optimizer=opt(lr=lr), loss=cos_distance)

                    earlystop = EarlyStopping(monitor='loss',
                                              min_delta=0.0005,
                                              patience=8)
                    reduceLR = ReduceLROnPlateau(monitor='loss',
                                                 factor=0.1,
                                                 patience=3,
                                                 verbose=1,
                                                 epsilon=0.001)
                    bestpoint = ModelCheckpoint(
                        fname + '.weights.{epoch:02d}.loss-{loss:.4f}.h5',
                        monitor='loss',
                        save_best_only=True,
                        save_weights_only=False)
                    bestpoint_wo = ModelCheckpoint(
                        fname + '.weights.{epoch:02d}.loss-{loss:.4f}.h5',
                        monitor='loss',
                        save_best_only=True,
                        save_weights_only=True)
                    callbacks = [earlystop, reduceLR, bestpoint, bestpoint_wo]
                    model.fit(data,
                              labels,
                              batch_size=64,
                              nb_epoch=EPOCHS,
                              verbose=1,
                              shuffle=True,
                              callbacks=callbacks)
def main():

    cfg.init()

    class_list_500 = load_class_list('class_keep_from_pruning.txt',
                                     int_cast=True)
    class_list_400 = load_class_list('class_keep_from_pruning-train.txt',
                                     int_cast=True)
    class_list_100 = load_class_list('class_keep_from_pruning-test.txt',
                                     int_cast=True)

    visual_features_400_train = 'extracted_features/feat_dbp3120__resnet50__avg_pool_pruned-A@best_nb-classes-500_test-on-dbp3120__train.h5'
    visual_features_400_valid = 'extracted_features/feat_dbp3120__resnet50__avg_pool_pruned-A@best_nb-classes-500_test-on-dbp3120__valid.h5'
    visual_features_100_zs_test = 'extracted_features/feat_dbp3120__resnet50__avg_pool_pruned-A@best_nb-classes-500_test-on-dbp3120__test.h5'

    print("Loading textual features..")
    text_features_400 = np.load('docvec_400_train_on_wiki.npy')
    text_features_500 = np.load('docvec_500_train_on_wiki.npy')
    text_features_100_zs = []
    for i, cls in enumerate(class_list_500):
        if cls in class_list_100:
            text_features_100_zs.append(text_features_500[i])
    text_features_100_zs = np.asarray(text_features_100_zs)

    print("Loading visual features..")
    visual_features_400_train = ImageDataset().load_hdf5(
        visual_features_400_train)
    visual_features_100_zs_test = ImageDataset().load_hdf5(
        visual_features_100_zs_test)
    if visual_features_400_valid is not None:
        visual_features_400_valid = ImageDataset().load_hdf5(
            visual_features_400_valid)

    im_data_train = []
    tx_data_train = []
    label_train = []
    if visual_features_400_valid is not None:
        im_data_valid = []
        tx_data_valid = []
        label_valid = []

    for lbl, docv in zip(class_list_400, text_features_400):
        lbl = int(lbl)
        norm_docv = docv / np.linalg.norm(docv)  # l2 normalization

        visual_features_with_label = visual_features_400_train.sub_dataset_with_label(
            lbl)
        for visual_feat in visual_features_with_label.data:
            im_data_train.append(visual_feat)
            tx_data_train.append(norm_docv)
            label_train.append(lbl)

        if visual_features_400_valid is not None:
            visual_features_valid_with_label = visual_features_400_valid.sub_dataset_with_label(
                lbl)
            for visual_feat in visual_features_valid_with_label.data:
                im_data_valid.append(visual_feat)
                tx_data_valid.append(norm_docv)
                label_valid.append(lbl)

    # Image data conversion
    im_data_train = list_to_ndarray(im_data_train)
    im_data_valid = list_to_ndarray(im_data_valid)

    # Text data conversion
    tx_data_train = list_to_ndarray(tx_data_train)
    tx_data_valid = list_to_ndarray(tx_data_valid)

    # Label conversion
    label_train = list_to_ndarray(label_train)
    label_valid = list_to_ndarray(label_valid)

    joint_space_dim = 200
    batch_size = 64
    epochs = 80
    optimizer_str = 'Adadelta'  # 'sgd'
    optimizer = Adadelta  #sgd
    lr = 40

    print("Generating model..")
    print ""
    print("Training model..")
    print "optim:   " + str(optimizer_str)
    print "lr:      " + str(lr)

    fname = "jointmodel_opt-{}_lr-{}_bs-{}".format(optimizer_str, lr,
                                                   batch_size)

    JE = JointEmbedder(im_dim=im_data_train.shape[-1],
                       tx_dim=tx_data_train.shape[-1],
                       out_dim=joint_space_dim,
                       n_text_classes=len(class_list_400))

    model = JE.model(
        optimizer=optimizer(lr=lr),
        tx_activation='softmax',
        tx_hidden_layers=[256],
        tx_hidden_activation=['relu'],
        im_hidden_layers=[512],
        im_hidden_activation=['tanh'],
    )

    # earlystop = EarlyStopping(monitor=MONITOR, min_delta=0.0005, patience=9)
    # reduceLR = ReduceLROnPlateau(monitor=MONITOR, factor=0.1, patience=4, verbose=1, epsilon=0.0005)
    # bestpoint = ModelCheckpoint(fname + '.model.best.h5', monitor=MONITOR, save_best_only=True)
    # checkpoint = ModelCheckpoint(fname + '.weights.{epoch:02d}.h5', monitor=MONITOR, save_best_only=False, save_weights_only=True)

    # # mAP_tr = ModelMAP(visual_features=visual_features, docs_vectors=text_features, class_list=class_list)
    mAP_tr = ModelMAP(visual_features=visual_features_400_train,
                      docs_vectors=text_features_400,
                      class_list=class_list_400,
                      data_name='tr_mAP',
                      exe_on_train_begin=False,
                      on_train_begin_key='tr_begin-tr_map',
                      exe_on_batch_end=False,
                      on_batch_end_key='batch-tr_map',
                      exe_on_epoch_end=False,
                      exe_on_train_end=True)
    mAP_val = ModelMAP(visual_features=visual_features_400_valid,
                       docs_vectors=text_features_400,
                       class_list=class_list_400,
                       data_name='val_mAP',
                       exe_on_train_begin=False,
                       on_train_begin_key='tr_begin-val_map',
                       exe_on_batch_end=False,
                       on_batch_end_key='batch-val_map',
                       exe_on_train_end=True,
                       exe_on_epoch_end=False)

    mAP_zs = ModelMAP(visual_features=visual_features_100_zs_test,
                      docs_vectors=text_features_100_zs,
                      class_list=class_list_100,
                      data_name='zs_mAP',
                      exe_on_train_begin=False,
                      on_train_begin_key='tr_begin-zs_map',
                      exe_on_batch_end=False,
                      on_batch_end_key='batch-zs_map',
                      exe_on_train_end=True,
                      exe_on_epoch_end=False)

    # callbacks = [reduceLR, bestpoint, checkpoint, mAP_val, mAP_zs] #, earlystop, ]
    callbacks = [mAP_tr, mAP_val,
                 mAP_zs]  # , mAP_val,  mAP_zs] #, earlystop, ]
    # TODO: use validation-set for early stopping and LR-decay
    # label_train = np.zeros([len(im_data_train), 1])
    # label_valid = np.zeros([len(im_data_valid), 1])

    label_map = {}
    for index, label in enumerate(class_list_400):
        label_map[label] = index
    size = len(class_list_400)

    label_train_converted = []
    for l in label_train:
        new_l = np.zeros([size])
        new_l[label_map[l]] = 1
        label_train_converted.append(new_l)
    label_train_converted = np.asarray(label_train_converted)
    label_valid_converted = []
    for l in label_valid:
        new_l = np.zeros([size])
        new_l[label_map[l]] = 1
        label_valid_converted.append(new_l)
    label_valid_converted = np.asarray(label_valid_converted)
    #label_train_converted = np.asarray([label_map[l] for l in label_train])
    #label_valid_converted = np.asarray([label_map[l] for l in label_valid])

    model.summary()
    history = model.fit(
        [im_data_train, tx_data_train],
        [label_train, label_train, label_train_converted],
        validation_data=[[im_data_valid, tx_data_valid],
                         [label_valid, label_valid, label_valid_converted]],
        batch_size=batch_size,
        nb_epoch=epochs,
        shuffle=True,
        verbose=1,
        callbacks=callbacks)

    loss_csv = file(fname + '.loss.csv', 'w')
    hist = history.history
Beispiel #8
0
def im2docvec_wvalid_map(visual_features=cfg_emb.VISUAL_FEATURES_TRAIN,
                         text_features=cfg_emb.TEXT_FEATURES_400,
                         class_list=cfg_emb.CLASS_LIST_400,
                         visual_features_valid=cfg_emb.VISUAL_FEATURES_VALID,
                         visual_features_zs_test=cfg_emb.VISUAL_FEATURES_TEST,
                         text_features_zs_test=cfg_emb.GET_TEXT_FEATURES_100(),
                         class_list_test=cfg_emb.CLASS_LIST_100):
    import numpy as np

    print("Loading visual features..")
    visual_features = ImageDataset().load_hdf5(visual_features)
    if visual_features_valid is not None:
        visual_features_valid = ImageDataset().load_hdf5(visual_features_valid)

    print("Loading textual features..")
    if not isinstance(text_features, np.ndarray):
        text_features = np.load(text_features)
    if not isinstance(text_features_zs_test,
                      np.ndarray) and text_features_zs_test is not None:
        text_features_zs_test = np.load(text_features_zs_test)

    if class_list is None:
        class_list = np.unique(visual_features.labels).tolist()
    else:
        class_list = cfg_emb.load_class_list(class_list)

    if not isinstance(class_list_test, list):
        class_list_test = cfg_emb.load_class_list(class_list_test)

    print("Generating dataset..")

    if class_list is not None:
        cycle_on = class_list, text_features
    else:
        cycle_on = enumerate(text_features)

    data_train = []
    target_train = []
    if visual_features_valid is not None:
        data_valid = []
        target_valid = []

    for lbl, docv in zip(cycle_on[0], cycle_on[1]):
        lbl = int(lbl)
        norm_docv = docv / np.linalg.norm(docv)  # l2 normalization
        visual_features_with_label = visual_features.sub_dataset_with_label(
            lbl)
        for visual_feat in visual_features_with_label.data:
            data_train.append(visual_feat)
            target_train.append(norm_docv)

        if visual_features_valid is not None:
            visual_features_valid_with_label = visual_features_valid.sub_dataset_with_label(
                lbl)
            for visual_feat in visual_features_valid_with_label.data:
                data_valid.append(visual_feat)
                target_valid.append(norm_docv)

    data_train = np.asarray(data_train)
    data_valid = np.asarray(data_valid)

    while len(data_train.shape) > 2:
        if data_train.shape[-1] == 1:
            data_train = np.squeeze(data_train, axis=(-1, ))

    while len(data_valid.shape) > 2:
        if data_valid.shape[-1] == 1:
            data_valid = np.squeeze(data_valid, axis=(-1, ))

    target_train = np.asarray(target_train)
    target_valid = np.asarray(target_valid)

    validation_data = [data_valid, target_valid]

    print("Generating model..")

    EPOCHS = 20
    hiddens = [[1000], [500], [200]]
    #hiddens = [ [2000,1000], ]

    lrs = [10]
    batch_sizes = [32]
    optimizers_str = ['Adadelta']
    optimizers = [Adadelta]

    #hiddens = [ [1000], [2000], [4000], [2000,1000], [4000, 2000], [4000, 2000, 1000]]
    #lrs = [10, 1]
    #batch_sizes = [64, 32, 16]
    #optimizers_str = ['Adadelta', 'Adagrad']
    #optimizers = [Adadelta, Adagrad]

    for hid in hiddens:
        for opt, opt_str in zip(optimizers, optimizers_str):
            for lr in lrs:
                for bs in batch_sizes:

                    print ""
                    print("Training model..")
                    print "hiddens: " + str(hid)
                    print "optim:   " + str(opt_str)
                    print "lr:      " + str(lr)

                    fname = "im2docvec_opt-{}_lr-{}_bs-{}".format(
                        opt_str, lr, bs)
                    for i, hu in enumerate(hid):
                        fname += "_hl-" + str(hu)
                    folder = os.path.join(cfg_emb.IM2DOC_MODEL_FOLDER, fname)
                    if not os.path.isdir(folder):
                        os.mkdir(folder)

                    fname = os.path.join(folder, fname)

                    model = get_model(data_train.shape[1],
                                      target_train.shape[-1], hid)
                    model.compile(optimizer=opt(lr=lr), loss=cos_distance)

                    earlystop = EarlyStopping(monitor=MONITOR,
                                              min_delta=0.0005,
                                              patience=9)
                    reduceLR = ReduceLROnPlateau(monitor=MONITOR,
                                                 factor=0.1,
                                                 patience=4,
                                                 verbose=1,
                                                 epsilon=0.0005)
                    bestpoint = ModelCheckpoint(fname + '.model.best.h5',
                                                monitor=MONITOR,
                                                save_best_only=True)
                    checkpoint = ModelCheckpoint(fname +
                                                 '.weights.{epoch:02d}.h5',
                                                 monitor=MONITOR,
                                                 save_best_only=False,
                                                 save_weights_only=True)

                    #mAP_tr = ModelMAP(visual_features=visual_features, docs_vectors=text_features, class_list=class_list)
                    mAP_val = ModelMAP(visual_features=visual_features_valid,
                                       docs_vectors=text_features,
                                       class_list=class_list,
                                       history_key='val_mAP',
                                       exe_on_train_begin=True,
                                       on_train_begin_key='tr_begin-val_map',
                                       exe_on_batch_end=False,
                                       on_batch_end_key='batch-val_map')
                    mAP_zs = ModelMAP(visual_features=visual_features_zs_test,
                                      docs_vectors=text_features_zs_test,
                                      class_list=class_list_test,
                                      history_key='zs_mAP',
                                      exe_on_train_begin=True,
                                      on_train_begin_key='tr_begin-zs_map',
                                      exe_on_batch_end=False,
                                      on_batch_end_key='batch-zs_map')

                    callbacks = [
                        reduceLR, bestpoint, checkpoint, mAP_val, mAP_zs
                    ]  #, earlystop, ]
                    # mAP = test_embedding_map(visual_features=visual_features_zs_test,
                    #                          class_list_doc2vec=class_list_test,
                    #                          docs_vectors_npy=text_features_zs_test,
                    #                          im2doc_model=model,
                    #                          verbose=False)
                    # print("Pre train mAP: " + str(mAP))
                    history = model.fit(data_train,
                                        target_train,
                                        batch_size=bs,
                                        nb_epoch=EPOCHS,
                                        verbose=1,
                                        shuffle=True,
                                        callbacks=callbacks,
                                        validation_data=validation_data)

                    loss_csv = file(fname + '.loss.csv', 'w')
                    hist = history.history
                    return

                    if 'tr_begin-val_map' in hist.keys():
                        loss_csv.write('val_mAP pre train:, {}\n'.format(
                            hist['tr_begin-val_map'][0]))
                    if 'tr_begin-zs_map' in hist.keys():
                        loss_csv.write('zs_mAP pre train:, {}\n'.format(
                            hist['tr_begin-zs_map'][0]))

                    loss_csv.write(
                        'Epoch, Loss, Val Loss, valid mAP, test mAP\n')
                    epoch = 0
                    for loss, val_loss, val_mAP, zs_mAP in zip(
                            hist['loss'], hist['val_loss'], hist['val_mAP'],
                            hist['zs_mAP']):
                        epoch += 1
                        loss_csv.write(
                            str(epoch) + ', ' + str(loss) + ', ' +
                            str(val_loss) + ', ' + str(val_mAP) + ', ' +
                            str(zs_mAP) + '\n')

                    if 'batch-zs_map' in hist.keys(
                    ) or 'batch-val_map' in hist.keys():
                        loss_csv.write(
                            '\n\n\n\nbatch_size:, {}\n\n'.format(bs))
                        loss_csv.write('Batch, val mAP, test mAP\n')
                        batch = 0
                        for val_mAP, zs_mAP in zip(hist['batch-val_map',
                                                        'batch-zs_map']):
                            batch += 1
                            loss_csv.write('{}, {}, {}\n'.format(
                                batch, str(val_mAP), str(zs_mAP)))
Beispiel #9
0
def im2docvec_wvalid(visual_features=cfg_emb.VISUAL_FEATURES_TRAIN,
                     text_features=cfg_emb.TEXT_FEATURES_400,
                     visual_features_valid=cfg_emb.VISUAL_FEATURES_VALID,
                     class_list=cfg_emb.CLASS_LIST_400):
    import numpy as np

    print("Loading visual features..")
    visual_features = ImageDataset().load_hdf5(visual_features)
    if visual_features_valid is not None:
        visual_features_valid = ImageDataset().load_hdf5(visual_features_valid)

    print("Loading textual features..")
    text_features = np.load(text_features)

    if class_list is None:
        class_list = np.unique(visual_features.labels).tolist()
    else:
        class_list = file(class_list, 'r').read().split('\n')
        #class_list.sort()

    print("Generating dataset..")

    if class_list is not None:
        cycle_on = class_list, text_features
    else:
        cycle_on = enumerate(text_features)

    data_train = []
    target_train = []
    if visual_features_valid is not None:
        data_valid = []
        target_valid = []

    for lbl, docv in zip(cycle_on[0], cycle_on[1]):
        lbl = int(lbl)
        visual_features_with_label = visual_features.sub_dataset_with_label(
            lbl)
        for visual_feat in visual_features_with_label.data:
            data_train.append(visual_feat)
            # TODO: normalize vectors in data_train
            target_train.append(docv)

        if visual_features_valid is not None:
            visual_features_valid_with_label = visual_features_valid.sub_dataset_with_label(
                lbl)
            for visual_feat in visual_features_valid_with_label.data:
                data_valid.append(visual_feat)
                # TODO: normalize vectors in data_valid
                target_valid.append(docv)

    data_train = np.asarray(data_train)
    data_valid = np.asarray(data_valid)

    while len(data_train.shape) > 2:
        if data_train.shape[-1] == 1:
            data_train = np.squeeze(data_train, axis=(-1, ))

    while len(data_valid.shape) > 2:
        if data_valid.shape[-1] == 1:
            data_valid = np.squeeze(data_valid, axis=(-1, ))

    target_train = np.asarray(target_train)
    target_valid = np.asarray(target_valid)

    validation_data = [data_valid, target_valid]

    print("Generating model..")

    EPOCHS = 60
    hiddens = [[2000, 1000], [1000]]
    #hiddens = [ [1000] ]

    lrs = [10]
    batch_sizes = [32]
    optimizers_str = ['Adadelta']
    optimizers = [Adadelta]

    #hiddens = [ [1000], [2000], [4000], [2000,1000], [4000, 2000], [4000, 2000, 1000]]
    #lrs = [10, 1]
    #batch_sizes = [64, 32, 16]
    #optimizers_str = ['Adadelta', 'Adagrad']
    #optimizers = [Adadelta, Adagrad]

    for hid in hiddens:
        for opt, opt_str in zip(optimizers, optimizers_str):
            for lr in lrs:
                for bs in batch_sizes:

                    print ""
                    print("Training model..")
                    print "hiddens: " + str(hid)
                    print "optim:   " + str(opt_str)
                    print "lr:      " + str(lr)

                    fname = "im2docvec_opt-{}_lr-{}_bs-{}".format(
                        opt_str, lr, bs)
                    for i, hu in enumerate(hid):
                        fname += "_hl-" + str(hu)
                    folder = os.path.join(cfg_emb.IM2DOC_MODEL_FOLDER, fname)
                    if not os.path.isdir(folder):
                        os.mkdir(folder)

                    fname = os.path.join(folder, fname)

                    model = get_model(data_train.shape[1],
                                      target_train.shape[-1], hid)
                    model.compile(optimizer=opt(lr=lr), loss=cos_distance)

                    earlystop = EarlyStopping(monitor=MONITOR,
                                              min_delta=0.0005,
                                              patience=9)
                    reduceLR = ReduceLROnPlateau(monitor=MONITOR,
                                                 factor=0.1,
                                                 patience=4,
                                                 verbose=1,
                                                 epsilon=0.0005)
                    bestpoint = ModelCheckpoint(fname + '.model.best.h5',
                                                monitor=MONITOR,
                                                save_best_only=True)
                    checkpoint = ModelCheckpoint(fname +
                                                 '.weights.{epoch:02d}.h5',
                                                 monitor=MONITOR,
                                                 save_best_only=False,
                                                 save_weights_only=True)
                    callbacks = [earlystop, reduceLR, bestpoint, checkpoint]
                    history = model.fit(data_train,
                                        target_train,
                                        batch_size=64,
                                        nb_epoch=EPOCHS,
                                        verbose=1,
                                        shuffle=True,
                                        callbacks=callbacks,
                                        validation_data=validation_data)

                    loss_csv = file(fname + '.loss.csv', 'w')
                    loss_csv.write('Epoch, Loss, Val Loss\n')
                    epoch = 0
                    for loss, val_loss in zip(history.history['loss'],
                                              history.history['val_loss']):
                        epoch += 1
                        loss_csv.write(
                            str(epoch) + ', ' + str(loss) + ', ' +
                            str(val_loss) + '\n')