def split_train_valid_test(visual_features=VISUAL_FEATURES, text_features=TEXT_FEATURES_400, n_test_classes=100): visual_features = ImageDataset().load_hdf5(visual_features) class_list = cfg_emb.load_class_list(cfg_emb.CLASS_LIST_500) class_name_list = cfg_emb.load_class_list(cfg_emb.CLASS_NAME_LIST_500) class_list = np.asarray(class_list, dtype=np.int32) class_name_list = np.asarray(class_name_list) class_permutation = np.random.permutation(range(0, len(class_list))) class_list_test = class_list[class_permutation][0:n_test_classes] class_list_train = class_list[class_permutation][n_test_classes:-1] class_name_list_test = class_name_list[class_permutation][0:n_test_classes] class_name_list_train = class_name_list[class_permutation][ n_test_classes:-1] test_sort = np.argsort(class_list_test) train_sort = np.argsort(class_list_train) class_list_test = class_list_test[test_sort] class_list_train = class_list_train[train_sort] class_name_list_test = class_name_list_test[test_sort] class_name_list_train = class_name_list_train[train_sort] print("Loading textual features..") text_features = np.load(text_features) text_features_test = text_features[class_permutation][0:n_test_classes][ test_sort] text_features_train = text_features[class_permutation][n_test_classes:-1][ train_sort] visual_features_test = visual_features.sub_dataset_with_labels( class_list_test) visual_features_train_valid = visual_features.sub_dataset_with_labels( class_list_train) split_options = [SplitOptions("flickr", 0.25), SplitOptions("google", 0.3)] exclude_file_starting_with = ["seed"] visual_features_train, visual_features_valid = \ visual_features_train_valid.validation_per_class_split(split_options, exclude_file_starting_with) cfg_emb.save_class_list(class_list_test, cfg_emb.CLASS_LIST_100) cfg_emb.save_class_list(class_list_train, cfg_emb.CLASS_LIST_400) cfg_emb.save_class_list(class_name_list_test, cfg_emb.CLASS_NAME_LIST_100) cfg_emb.save_class_list(class_name_list_train, cfg_emb.CLASS_NAME_LIST_400) np.save(cfg_emb.TEXT_FEATURES_TEST, text_features_test) np.save(cfg_emb.TEXT_FEATURES_TRAIN, text_features_train) visual_features_train.save_hdf5(cfg_emb.VISUAL_FEATURES_TRAIN) visual_features_valid.save_hdf5(cfg_emb.VISUAL_FEATURES_VALID) visual_features_test.save_hdf5(cfg_emb.VISUAL_FEATURES_TEST)
def test_embedding_zero_shot(tx_map=True, im_map=True, top_similar_output=None): docs_vectors_500 = np.load(DOCS_FILE_500) docs_vectors_100_zero_shot = [] class_list_all = cfg_emb.load_class_list(ZERO_SHOT_CLASS_LIST_all) class_list_for_map = cfg_emb.load_class_list(ZERO_SHOT_CLASS_LIST_for_map) for i, cls in enumerate(class_list_all): if cls in class_list_for_map: docs_vectors_100_zero_shot.append(docs_vectors_500[i]) docs_vectors_100_zero_shot = np.asarray(docs_vectors_100_zero_shot) if isinstance(top_similar_output, int): print("Top similars on zero shot aproach: not yet implemented.") # test_embedding_top_similars(visual_features=cfg_emb.VISUAL_FEATURES_VALID, # docs_vectors_npy=docs_vectors_100_zero_shot, # class_list_doc2vec=CLASS_LIST_D2V_for_similars, # im2doc_model=IM2DOC_MODEL, # im2doc_model_ext=IM2DOC_MODEL_EXT, im2doc_weights_ext=None, # load_precomputed_imdocs=False, # top_similars=top_similar_output) if tx_map: test_embedding_tx_mAP(visual_features=ZERO_SHOT_VISUAL_FEATURES, docs_vectors_npy=docs_vectors_100_zero_shot, class_list_doc2vec=ZERO_SHOT_CLASS_LIST_for_map, im2doc_model=IM2DOC_MODEL, im2doc_model_ext=None, im2doc_weights_ext=None, load_precomputed_imdocs=True) if im_map: test_embedding_im_mAP(visual_features=ZERO_SHOT_VISUAL_FEATURES, docs_vectors_npy=docs_vectors_100_zero_shot, class_list_doc2vec=ZERO_SHOT_CLASS_LIST_for_map, im2doc_model=IM2DOC_MODEL, im2doc_model_ext=None, im2doc_weights_ext=None, load_precomputed_imdocs=True)
def retrieve_text_map(img_features, txt_features, class_list_doc2vec, joint_model, joint_model_ext=None, joint_model_weights_ext=None, load_precomputed_embedded_feat=None, verbose=False, progressbar=True): def printv(str): if verbose: print(str) emb_txts, emb_imgs, img_labels = get_embedded_vectors( img_features, txt_features, joint_model, joint_model_ext, joint_model_weights_ext, load_precomputed_embedded_feat, verbose) if not isinstance(class_list_doc2vec, list): class_list_doc2vec = load_class_list(class_list_doc2vec) class_list_inverted_doc2vec = { k: i for i, k in enumerate(class_list_doc2vec) } if progressbar: bar = pyprind.ProgBar(len(emb_imgs), stream=sys.stdout) C = compute_dist_scores(emb_imgs, emb_txts) av_prec = [] for i, iv in enumerate(emb_imgs): scores = [] if progressbar: bar.update() lbl = int(img_labels[i]) targets = np.zeros([emb_txts.shape[0]]) targets[int(class_list_inverted_doc2vec[lbl])] = 1 for j, tx_vec in enumerate(emb_txts): score = C[i, j] scores.append(score) from sklearn.metrics import average_precision_score AP = average_precision_score(targets, scores) av_prec.append(AP) printv("Img {} - AP = {}".format(lbl, AP)) mAP = np.mean(np.asarray(av_prec)) printv("\t\tmAP = {}".format(mAP)) return mAP
def recall_top_k(img_features, txt_features, class_list_doc2vec, joint_model, joint_model_ext=None, joint_model_weights_ext=None, load_precomputed_embedded_feat=None, top_k=10, verbose=False, progressbar=True): def printv(str): if verbose: print(str) emb_txts, emb_imgs, img_labels = get_embedded_vectors( img_features, txt_features, joint_model, joint_model_ext, joint_model_weights_ext, load_precomputed_embedded_feat, verbose) if not isinstance(class_list_doc2vec, list): class_list_doc2vec = load_class_list(class_list_doc2vec) class_list_inverted_doc2vec = { k: i for i, k in enumerate(class_list_doc2vec) } if progressbar: bar = pyprind.ProgBar(len(emb_imgs), stream=sys.stdout) C = compute_dist_scores(emb_imgs, emb_txts, is_dist=True) recall_per_img = [] for i, iv in enumerate(emb_imgs): if progressbar: bar.update() lbl = int(img_labels[i]) arg_lbl = class_list_inverted_doc2vec[lbl] dists = C[i, :] arg_sort_dist = np.argsort(dists) if arg_lbl in arg_sort_dist[0:top_k + 1]: recall_per_img.append(1) else: recall_per_img.append(0) return np.sum(recall_per_img) / float(len(recall_per_img))
def joint_embedding_train(config_gen_function=config_gen_TEST, debug_map_val=None): visual_features = cfg_emb.VISUAL_FEATURES_TRAIN text_features = cfg_emb.TEXT_FEATURES_400 class_list = cfg_emb.CLASS_LIST_400 visual_features_valid = cfg_emb.VISUAL_FEATURES_VALID visual_features_zs_test = cfg_emb.VISUAL_FEATURES_TEST text_features_zs_test = cfg_emb.TEXT_FEATURES_100 class_list_test = cfg_emb.CLASS_LIST_100 recall_at_k = [1, 3, 5, 10] print("Loading visual features..") visual_features = ImageDataset().load_hdf5(visual_features) if visual_features_valid is not None: visual_features_valid = ImageDataset().load_hdf5(visual_features_valid) print("Loading textual features..") if not isinstance(text_features, np.ndarray): text_features = np.load(text_features) if not isinstance(text_features_zs_test, np.ndarray) and text_features_zs_test is not None: text_features_zs_test = np.load(text_features_zs_test) if class_list is None: class_list = np.unique(visual_features.labels).tolist() else: class_list = cfg_emb.load_class_list(class_list, int_cast=True) if not isinstance(class_list_test, list): class_list_test = cfg_emb.load_class_list(class_list_test, int_cast=True) print("Generating dataset..") if class_list is not None: cycle_clslst_txfeat = class_list, text_features else: cycle_clslst_txfeat = enumerate(text_features) im_data_train = [] tx_data_train_im_aligned = [ ] # 1 text for each image (align: img_lbl_x <-> txt_lbl_x <-> lbl_x ) tx_data_train = [] # 1 text for each class label_train = [] if visual_features_valid is not None: im_data_val = [] tx_data_valid_im_aligned = [] label_val = [] for lbl, docv in zip(cycle_clslst_txfeat[0], cycle_clslst_txfeat[1]): lbl = int(lbl) norm_docv = docv / np.linalg.norm(docv) # l2 normalization tx_data_train.append(norm_docv) visual_features_with_label = visual_features.sub_dataset_with_label( lbl) for visual_feat in visual_features_with_label.data: visual_feat = visual_feat / np.linalg.norm( visual_feat) # l2 normalization im_data_train.append(visual_feat) tx_data_train_im_aligned.append(norm_docv) label_train.append(lbl) if visual_features_valid is not None: visual_features_valid_with_label = visual_features_valid.sub_dataset_with_label( lbl) for visual_feat in visual_features_valid_with_label.data: visual_feat = visual_feat / np.linalg.norm( visual_feat) # l2 normalization im_data_val.append(visual_feat) tx_data_valid_im_aligned.append(norm_docv) label_val.append(lbl) # Image data conversion im_data_train = list_to_ndarray(im_data_train) im_data_val = list_to_ndarray(im_data_val) # Text data conversion tx_data_train = list_to_ndarray(tx_data_train) #tx_data_train_im_aligned = list_to_ndarray(tx_data_train_im_aligned) #tx_data_valid_im_aligned = list_to_ndarray(tx_data_valid_im_aligned) # Label conversion label_train = list_to_ndarray(label_train) label_val = list_to_ndarray(label_val) print("Generating model..") configs, config_gen_name = config_gen_function() print("Executing training over config generator: " + config_gen_name) folder_gen_name = "jointmodel_confgen-" + config_gen_name folder_gen_path = os.path.join(JOINT_MODEL_FOLDER, folder_gen_name) if not os.path.isdir(folder_gen_path): os.mkdir(folder_gen_path) class ModelScore: def __init__(self, train_set_score=None, valid_set_score=None, test_set_score=None): self.train_set = train_set_score self.valid_set = valid_set_score self.test_set = test_set_score class ConfigScore: def __init__(self, name=None): self.name = name self.scores_best_train = ModelScore() self.scores_best_valid = ModelScore() self.scores_init = ModelScore() config_scores = [] # list of Score, one for each config for config_counter, c in enumerate(configs): if not isinstance(c, Config): raise TypeError('c is not an instance of Config class.') print("") print("") print("") print("") print("Config: ") pprint(c) fname = folder_gen_name + "__" + str(config_counter) folder_path = os.path.join(folder_gen_path, fname) fpath = os.path.join(folder_path, fname) if not os.path.isdir(folder_path): os.mkdir(folder_path) c.saveJSON(fpath + '.config.json') JE = JointEmbedder(im_dim=im_data_train.shape[-1], tx_dim=tx_data_train.shape[-1], out_dim=c.sp_dim, n_text_classes=len(class_list), use_merge_distance=USE_MERGE_DISTANCE) optimizer = c.opt(**c.opt_kwargs) model = JE.model(optimizer=optimizer, tx_activation=c.tx_act, im_activation=c.im_act, tx_hidden_layers=c.tx_hid, im_hidden_layers=c.im_hid, contrastive_loss_weight=c.contr_w, logistic_loss_weight=c.log_w_tx, contrastive_loss_weight_inverted=c.contr_inv_w, init=c.w_init, contrastive_loss_margin=c.contr_margin) model.summary() label_map = {} for index, label in enumerate(class_list): label_map[label] = index size = len(class_list) init_model_fname = fpath + '.model.init.random.h5' best_valid_fname = fpath + '.model.best.val_loss.h5' best_train_fname = fpath + '.model.best.loss.h5' model.save(init_model_fname) # Creating contrastive training set: val_x_im, val_x_tx, val_y_contr, val_y_log = get_contr_data_batch( im_data_val, tx_data_train, label_val, class_list, no_contrastive=DISABLE_CONTRASTIVE, shuffle=True, bs=c.bs) val_X = [val_x_im, val_x_tx] val_Y = [val_y_contr, val_y_contr, val_y_contr, val_y_log] best_loss = best_val_loss = float('inf') best_loss_epoch = -1 best_val_loss_epoch = -1 loss_hist = [] val_loss_hist = [] for ep in range(0, c.epochs): print("Epoch: {}/{}".format(ep, c.epochs - 1)) checpoint_path = fpath + ".weights.{:03d}.h5".format(ep) checkpoint = ModelCheckpoint(checpoint_path, monitor='val_loss', save_best_only=False, save_weights_only=True) x_im, x_tx, y_cont, y_log = get_contr_data_batch( im_data_train, tx_data_train, label_train, class_list, no_contrastive=DISABLE_CONTRASTIVE, shuffle=True, bs=c.bs) X = [x_im, x_tx] Y = [y_cont, y_cont, y_cont, y_log] calls = c.callbacks calls.append(checkpoint) hs = model.fit(X, Y, c.bs, nb_epoch=1, validation_data=[val_X, val_Y], shuffle=False, callbacks=calls) # FIT !!! TRAINING hist = hs.history val_loss = hist['val_loss'][0] loss = hist['loss'][0] val_loss_hist.append(val_loss) loss_hist.append(loss) if loss < best_loss: best_loss = loss model.save(best_train_fname) best_loss_epoch = ep if val_loss < best_val_loss: best_val_loss = val_loss model.save(best_valid_fname) best_val_loss_epoch = ep loss_csv = file(fpath + ".loss.csv", 'w') loss_csv.write('Learning curves (loss),Epoch, Loss, Val Loss\n') if EVAL_INIT_MODEL_LOSS: x_im, x_tx, y_cont, y_log = get_contr_data(im_data_train, tx_data_train, label_train, class_list) X = [x_im, x_tx] Y = [y_cont, y_cont, y_cont, y_log] init_loss = model.evaluate(X, Y, batch_size=c.bs)[0] init_val_loss = model.evaluate(val_X, val_Y, batch_size=c.bs)[0] loss_csv.write(', {}, {}, {}\n'.format(-1, init_loss, init_val_loss)) epoch = 0 for loss, val_loss in zip(loss_hist, val_loss_hist): loss_csv.write(", {}, {}, {}\n".format(epoch, loss, val_loss)) epoch += 1 loss_csv.write("\n\n\n") loss_csv.write("Best loss epoch:, {}, \n".format(best_loss_epoch)) loss_csv.write( "Best val loss epoch:, {}\n".format(best_val_loss_epoch)) if EVAL_MAP: map_call_tr = ModelMAP(visual_features=visual_features, docs_vectors=text_features, class_list=class_list, data_name='TrainSet', text_retrieval_map=True, image_retrieval_map=True, recall_at_k=recall_at_k, debug_value=debug_map_val) map_call_val = ModelMAP(visual_features=visual_features_valid, docs_vectors=text_features, class_list=class_list, data_name='ValidSet', text_retrieval_map=True, image_retrieval_map=True, recall_at_k=recall_at_k, debug_value=debug_map_val) map_call_zs = ModelMAP(visual_features=visual_features_zs_test, docs_vectors=text_features_zs_test, class_list=class_list_test, data_name='TestSetZS', text_retrieval_map=True, image_retrieval_map=True, recall_at_k=recall_at_k, debug_value=debug_map_val) # Map on best loss model best_train_model = JointEmbedder.load_model(best_train_fname) map_tr_best_train = map_call_tr.call_manual(best_train_model) map_val_best_train = map_call_val.call_manual(best_train_model) map_zs_best_train = map_call_zs.call_manual(best_train_model) score_best_train = ModelScore(map_tr_best_train, map_val_best_train, map_zs_best_train) # Map on best val_loss model best_valid_model = JointEmbedder.load_model(best_valid_fname) map_tr_best_valid = map_call_tr.call_manual(best_valid_model) map_val_best_valid = map_call_val.call_manual(best_valid_model) map_zs_best_valid = map_call_zs.call_manual(best_valid_model) score_best_valid = ModelScore(map_tr_best_valid, map_val_best_valid, map_zs_best_valid) list_map_labels = ["Best Tr Loss", "Best Val Loss"] list_map_dict_tr = [map_tr_best_train, map_tr_best_valid] list_map_dict_val = [map_val_best_train, map_val_best_valid] list_map_dict_zs = [map_zs_best_train, map_zs_best_valid] score_init = None if EVAL_INIT_MODEL_MAP: # Map on init/random model init_model = JointEmbedder.load_model(init_model_fname) map_tr_init = map_call_tr.call_manual(init_model) map_val_init = map_call_val.call_manual(init_model) map_zs_init = map_call_zs.call_manual(init_model) list_map_labels.append("Init/Random") list_map_dict_tr.append(map_tr_init) list_map_dict_val.append(map_val_init) list_map_dict_zs.append(map_zs_init) score_init = ModelScore(map_tr_init, map_val_init, map_zs_init) cs = ConfigScore(name=str(config_counter)) cs.scores_best_train = score_best_train cs.scores_best_valid = score_best_valid cs.scores_init = score_init config_scores.append(cs) loss_csv.write("\n\n\n\n") loss_csv.write(", Loaded models/weights:, ") for l in list_map_labels: loss_csv.write("{}, ".format(l)) loss_csv.write("\n") loss_csv.write("\nmAP over training set, ") for key in map_tr_best_train.keys(): loss_csv.write("{}, ".format(key)) for map_dict in list_map_dict_tr: loss_csv.write("{}, ".format(map_dict[key])) loss_csv.write("\n, ") loss_csv.write("\nmAP over validation set, ") for key in map_tr_best_train.keys(): loss_csv.write("{}, ".format(key)) for map_dict in list_map_dict_val: loss_csv.write("{}, ".format(map_dict[key])) loss_csv.write("\n, ") loss_csv.write("\nmAP over zs-test set, ") for key in map_tr_best_train.keys(): loss_csv.write("{}, ".format(key)) for map_dict in list_map_dict_zs: loss_csv.write("{}, ".format(map_dict[key])) loss_csv.write("\n, ") # # def write_map_dict(map_dict, str): # loss_csv.write("\n" + str) # for k, v in map_dict.items(): # loss_csv.write(",{}, {}\n".format(k, v)) # # write_map_dict(map_tr_best_train, "mAP on tr set - best loss model") # write_map_dict(map_val_best_train, "mAP on val set - best loss model") # write_map_dict(map_zs_best_train, "mAP on test zs set - best loss model") # loss_csv.write("\n") # write_map_dict(map_tr_best_valid, "mAP on tr set - best valid loss model") # write_map_dict(map_val_best_valid, "mAP on val set - best valid loss model") # write_map_dict(map_zs_best_valid, "mAP on test zs set - best valid loss model") # loss_csv.write("\n") # write_map_dict(map_tr_init, "mAP on tr set - init/random model") # write_map_dict(map_val_init, "mAP on val set - init/random model") # write_map_dict(map_zs_init, "mAP on test zs set - init/random model") loss_csv.close() if EVAL_MAP: assert cs.scores_best_train.test_set.keys() == \ cs.scores_best_train.train_set.keys() == \ cs.scores_best_train.valid_set.keys() == \ cs.scores_best_valid.test_set.keys() == \ cs.scores_best_valid.train_set.keys() == \ cs.scores_best_valid.valid_set.keys() if EVAL_INIT_MODEL_MAP: assert cs.scores_best_train.test_set.keys() == \ cs.scores_init.test_set.keys() == \ cs.scores_init.train_set.keys() == \ cs.scores_init.valid_set.keys() keys = cs.scores_best_train.test_set.keys() for key in keys: stats_csv = file( os.path.join(folder_gen_path, folder_gen_name + ".{}.csv".format(key)), 'w') stats_csv.write('Stats for {}\n\n'.format(key)) init_model_comma = ', ' if EVAL_INIT_MODEL_MAP else '' stats_csv.write( ', test over training set, , , , test over validation set, , , , test over test set, , ,, \n' ) stats_csv.write('Model Weights:, ' 'best tr loss, best val loss, init/random, , ' 'best tr loss, best val loss, init/random, , ' 'best tr loss, best val loss, init/random, , \n') stats_csv.write('Config index/name, \n') for cs in config_scores: index = cs.name stats_csv.write( '{}, {}, {}, {}, , {}, {}, {}, , {}, {}, {},\n'.format( cs.name, cs.scores_best_train.train_set[key], cs.scores_best_valid.train_set[key], str(cs.scores_init.train_set[key]) if EVAL_INIT_MODEL_MAP else '', cs.scores_best_train.valid_set[key], cs.scores_best_valid.valid_set[key], str(cs.scores_init.valid_set[key]) if EVAL_INIT_MODEL_MAP else '', cs.scores_best_train.test_set[key], cs.scores_best_valid.test_set[key], str(cs.scores_init.test_set[key]) if EVAL_INIT_MODEL_MAP else ''))
def joint_embedding_train(visual_features=cfg_emb.VISUAL_FEATURES_TRAIN, text_features=cfg_emb.TEXT_FEATURES_400, class_list=cfg_emb.CLASS_LIST_400, visual_features_valid=cfg_emb.VISUAL_FEATURES_VALID, visual_features_zs_test=cfg_emb.VISUAL_FEATURES_TEST, text_features_zs_test=cfg_emb.TEXT_FEATURES_100, class_list_test=cfg_emb.CLASS_LIST_100): import numpy as np print("Loading visual features..") visual_features = ImageDataset().load_hdf5(visual_features) if visual_features_valid is not None: visual_features_valid = ImageDataset().load_hdf5(visual_features_valid) print("Loading textual features..") if not isinstance(text_features, np.ndarray): text_features = np.load(text_features) if not isinstance(text_features_zs_test, np.ndarray) and text_features_zs_test is not None: text_features_zs_test = np.load(text_features_zs_test) if class_list is None: class_list = np.unique(visual_features.labels).tolist() else: class_list = cfg_emb.load_class_list(class_list, int_cast=True) if not isinstance(class_list_test, list): class_list_test = cfg_emb.load_class_list(class_list_test, int_cast=True) print("Generating dataset..") if class_list is not None: cycle_clslst_txfeat = class_list, text_features else: cycle_clslst_txfeat = enumerate(text_features) im_data_train = [] tx_data_train = [] label_train = [] if visual_features_valid is not None: im_data_valid = [] tx_data_valid = [] label_valid = [] for lbl, docv in zip(cycle_clslst_txfeat[0], cycle_clslst_txfeat[1]): lbl = int(lbl) norm_docv = docv / np.linalg.norm(docv) # l2 normalization visual_features_with_label = visual_features.sub_dataset_with_label( lbl) for visual_feat in visual_features_with_label.data: im_data_train.append(visual_feat) tx_data_train.append(norm_docv) label_train.append(lbl) if visual_features_valid is not None: visual_features_valid_with_label = visual_features_valid.sub_dataset_with_label( lbl) for visual_feat in visual_features_valid_with_label.data: im_data_valid.append(visual_feat) tx_data_valid.append(norm_docv) label_valid.append(lbl) # Image data conversion im_data_train = np.asarray(im_data_train) im_data_valid = np.asarray(im_data_valid) while len(im_data_train.shape) > 2: if im_data_train.shape[-1] == 1: im_data_train = np.squeeze(im_data_train, axis=(-1, )) while len(im_data_valid.shape) > 2: if im_data_valid.shape[-1] == 1: im_data_valid = np.squeeze(im_data_valid, axis=(-1, )) # Text data conversion tx_data_train = np.asarray(tx_data_train) tx_data_valid = np.asarray(tx_data_valid) while len(tx_data_train.shape) > 2: if tx_data_train.shape[-1] == 1: tx_data_train = np.squeeze(tx_data_train, axis=(-1, )) while len(tx_data_valid.shape) > 2: if tx_data_valid.shape[-1] == 1: tx_data_valid = np.squeeze(tx_data_valid, axis=(-1, )) # Label conversion label_train = np.asarray(label_train) label_valid = np.asarray(label_valid) while len(label_train.shape) > 2: if label_train.shape[-1] == 1: label_train = np.squeeze(label_train, axis=(-1, )) while len(label_valid.shape) > 2: if label_valid.shape[-1] == 1: label_valid = np.squeeze(label_valid, axis=(-1, )) print("Loading model..") #path = 'im2doc_embedding/jointmodel_opt-adadelta_lr-100_bs-64-clamoroso?/jointmodel_opt-adadelta_lr-100_bs-64' name = "jointmodel" path = 'im2doc_embedding/{}/{}'.format(name, name) model_path = path + '.model.best.h5' #weight_path = path + '.weights.09.h5' weight_path = None model = JointEmbedder.load_model(model_path=model_path, weight_path=weight_path) top_k = [1, 3, 5, 10] print("\nTest traning: ") map = retrieve_text_map(visual_features, text_features, class_list, joint_model=model) mapi = retrieve_image_map(visual_features, text_features, class_list, joint_model=model) print("mAP = " + str(map)) print("mAPi = " + str(mapi)) for k in top_k: recall = recall_top_k(visual_features, text_features, class_list, joint_model=model, top_k=k, verbose=False, progressbar=False) print("recall@{} = {}".format(k, recall)) print("\nTest validation: ") map = retrieve_text_map(visual_features_valid, text_features, class_list, joint_model=model) mapi = retrieve_image_map(visual_features_valid, text_features, class_list, joint_model=model) print("mAP = " + str(map)) print("mAPi = " + str(mapi)) for k in top_k: recall = recall_top_k(visual_features_valid, text_features, class_list, joint_model=model, top_k=k, verbose=False, progressbar=False) print("recall@{} = {}".format(k, recall)) print("\nTest zero shot test: ") #map = test_joint_map(visual_features_zs_test, text_features_zs_test, class_list_test, joint_model=model) map = retrieve_text_map(visual_features_zs_test, text_features_zs_test, class_list_test, joint_model=model) print("mAP = " + str(map)) print("mAPi = " + str(mapi)) for k in top_k: recall = recall_top_k(visual_features_zs_test, text_features_zs_test, class_list_test, joint_model=model, top_k=k, verbose=False, progressbar=False) print("recall@{} = {}".format(k, recall))
def joint_embedding_train(visual_features=cfg_emb.VISUAL_FEATURES_TRAIN, text_features=cfg_emb.TEXT_FEATURES_400, class_list=cfg_emb.CLASS_LIST_400, visual_features_valid=cfg_emb.VISUAL_FEATURES_VALID, visual_features_zs_test=cfg_emb.VISUAL_FEATURES_TEST, text_features_zs_test=cfg_emb.TEXT_FEATURES_100, class_list_test=cfg_emb.CLASS_LIST_100): import numpy as np print("Loading visual features..") visual_features = ImageDataset().load_hdf5(visual_features) if visual_features_valid is not None: visual_features_valid = ImageDataset().load_hdf5(visual_features_valid) print("Loading textual features..") if not isinstance(text_features, np.ndarray): text_features = np.load(text_features) if not isinstance(text_features_zs_test, np.ndarray) and text_features_zs_test is not None: text_features_zs_test = np.load(text_features_zs_test) if class_list is None: class_list = np.unique(visual_features.labels).tolist() else: class_list = cfg_emb.load_class_list(class_list, int_cast=True) if not isinstance(class_list_test, list): class_list_test = cfg_emb.load_class_list(class_list_test, int_cast=True) print("Generating dataset..") if class_list is not None: cycle_clslst_txfeat = class_list, text_features else: cycle_clslst_txfeat = enumerate(text_features) im_data_train = [] tx_data_train = [] label_train = [] if visual_features_valid is not None: im_data_valid = [] tx_data_valid = [] label_valid = [] for lbl, docv in zip(cycle_clslst_txfeat[0], cycle_clslst_txfeat[1]): lbl = int(lbl) norm_docv = docv / np.linalg.norm(docv) # l2 normalization visual_features_with_label = visual_features.sub_dataset_with_label( lbl) for visual_feat in visual_features_with_label.data: im_data_train.append(visual_feat) tx_data_train.append(norm_docv) label_train.append(lbl) if visual_features_valid is not None: visual_features_valid_with_label = visual_features_valid.sub_dataset_with_label( lbl) for visual_feat in visual_features_valid_with_label.data: im_data_valid.append(visual_feat) tx_data_valid.append(norm_docv) label_valid.append(lbl) # Image data conversion im_data_train = np.asarray(im_data_train) im_data_valid = np.asarray(im_data_valid) while len(im_data_train.shape) > 2: if im_data_train.shape[-1] == 1: im_data_train = np.squeeze(im_data_train, axis=(-1, )) while len(im_data_valid.shape) > 2: if im_data_valid.shape[-1] == 1: im_data_valid = np.squeeze(im_data_valid, axis=(-1, )) # Text data conversion tx_data_train = np.asarray(tx_data_train) tx_data_valid = np.asarray(tx_data_valid) while len(tx_data_train.shape) > 2: if tx_data_train.shape[-1] == 1: tx_data_train = np.squeeze(tx_data_train, axis=(-1, )) while len(tx_data_valid.shape) > 2: if tx_data_valid.shape[-1] == 1: tx_data_valid = np.squeeze(tx_data_valid, axis=(-1, )) # Label conversion label_train = np.asarray(label_train) label_valid = np.asarray(label_valid) while len(label_train.shape) > 2: if label_train.shape[-1] == 1: label_train = np.squeeze(label_train, axis=(-1, )) while len(label_valid.shape) > 2: if label_valid.shape[-1] == 1: label_valid = np.squeeze(label_valid, axis=(-1, )) print("Generating model..") MONITOR = 'val_loss' class Config: def __init__(self): self.lr = 10 self.bs = 64 self.epochs = 50 self.opt = Adadelta self.opt_str = 'adadelta' self.joint_space_dim = 200 self.tx_activation = 'softmax' self.im_activation = 'tanh' self.tx_hidden_layers = None self.tx_hidden_activation = None self.im_hidden_layers = None self.im_hidden_activation = None self.contrastive_loss_weight = 1 self.logistic_loss_weight = 1 self.contrastive_loss_weight_inverted = 1 self.weight_init = 'glorot_uniform' # GOT GREAT RESUTLS WITH THIS PARAMS: # configs = [] # c = Config() # c.lr = 100 # c.bs = 64 # c.epochs = 50 # c.joint_space_dim = 200 # c.emb_activation = 'softmax' # c.contrastive_loss_weight = 3 # c.logistic_loss_weight = 1 # c.weight_init = 'glorot_uniform' # 'glorot_normal' # # # train_mAP-fit-end: 0.231570111798 # # valid_mAP-fit-end: 0.36824232778 # # test_mAP-fit-end: 0.12500124832 # Epoch 48 / 50 # loss: 2.8842 - activation_1_loss: 0.7106 - activation_2_loss: 0.7106 - dense_1_loss: 0.7524 - val_loss: 3.0216 - val_activation_1_loss: 0.8354 - val_activation_2_loss: 0.8354 - val_dense_1_loss: 0.5154 # Epoch 49 / 50 # loss: 2.7934 - activation_1_loss: 0.6958 - activation_2_loss: 0.6958 - dense_1_loss: 0.7061 - val_loss: 2.6629 - val_activation_1_loss: 0.5755 - val_activation_2_loss: 0.5755 - val_dense_1_loss: 0.9365 # Epoch 50 / 50 # loss: 2.7774 - activation_1_loss: 0.6948 - activation_2_loss: 0.6948 - dense_1_loss: 0.6930 - val_loss: 2.7351 - val_activation_1_loss: 0.5661 - val_activation_2_loss: 0.5661 - val_dense_1_loss: 1.0367 # configs = [] # c = Config() # c.lr = 100 # c.bs = 64 # c.epochs = 50 # c.joint_space_dim = 200 # c.emb_activation = 'softmax' # c.contrastive_loss_weight = 3 # c.logistic_loss_weight = 1 # c.weight_init = 'glorot_uniform' # 'glorot_normal' # c.tx_hidden_layers = [250] # c.tx_hidden_activation = ['relu'] # c.im_hidden_layers = [500] # c.im_hidden_activation = ['tanh'] configs = [] c = Config() c.lr = 10 c.bs = 64 c.epochs = 10 c.joint_space_dim = 200 c.tx_activation = 'sigmoid' c.im_activation = 'sigmoid' c.contrastive_loss_weight = 3 c.contrastive_loss_weight_inverted = 3 c.logistic_loss_weight = 1 c.weight_init = 'glorot_uniform' # 'glorot_normal' # c.tx_hidden_layers = [250] # c.tx_hidden_activation = ['relu'] # c.im_hidden_layers = [500] # c.im_hidden_activation = ['tanh'] # train_mAP-fit-end: 0.501253132832 # valid_mAP-fit-end: 0.501253132832 # test_mAP-fit-end: 0.505 # # ... in realta' abbiamo tutti i vettori delle distanze IDENTICI per questo si hanno questi risultati configs.append(c) for c in configs: print "" print("Training model..") print "optim: " + str(c.opt_str) print "lr: " + str(c.lr) fname = "jointmodel_opt-{}_lr-{}_bs-{}".format(c.opt_str, c.lr, c.bs) # for i, hu in enumerate(hid): # fname += "_hl-" + str(hu) folder = os.path.join(cfg_emb.IM2DOC_MODEL_FOLDER, fname) if not os.path.isdir(folder): os.mkdir(folder) fname = os.path.join(folder, fname) JE = JointEmbedder(im_dim=im_data_train.shape[-1], tx_dim=tx_data_train.shape[-1], out_dim=c.joint_space_dim, n_text_classes=len(class_list)) model = JE.model( optimizer=c.opt(lr=c.lr), tx_activation=c.tx_activation, im_activation=c.im_activation, tx_hidden_layers=c.tx_hidden_layers, tx_hidden_activation=c.tx_hidden_activation, im_hidden_layers=c.im_hidden_layers, im_hidden_activation=c.im_hidden_activation, contrastive_loss_weight=c.contrastive_loss_weight, logistic_loss_weight=c.logistic_loss_weight, contrastive_loss_weight_inverted=c. contrastive_loss_weight_inverted, init=c.weight_init, ) #earlystop = EarlyStopping(monitor=MONITOR, min_delta=0.0005, patience=9) #reduceLR = ReduceLROnPlateau(monitor=MONITOR, factor=0.1, patience=4, verbose=1, epsilon=0.0005) bestpoint = ModelCheckpoint(fname + '.model.best.h5', monitor=MONITOR, save_best_only=True) checkpoint = ModelCheckpoint(fname + '.weights.{epoch:02d}.h5', monitor=MONITOR, save_best_only=False, save_weights_only=True) mAP_tr = ModelMAP(visual_features=visual_features, docs_vectors=text_features, class_list=class_list, data_name='train-set', exe_fit_end=True, recall_at_k=[10]) mAP_val = ModelMAP(visual_features=visual_features_valid, docs_vectors=text_features, class_list=class_list, data_name='valid-set', exe_fit_end=True, recall_at_k=[10]) mAP_zs = ModelMAP(visual_features=visual_features_zs_test, docs_vectors=text_features_zs_test, class_list=class_list_test, data_name='test-set', exe_fit_end=True, recall_at_k=[10]) callbacks = [mAP_tr, mAP_val, mAP_zs, checkpoint, bestpoint] #, earlystop, ] model.summary() label_map = {} for index, label in enumerate(class_list): label_map[label] = index size = len(class_list) label_train_converted = [] for l in label_train: new_l = np.zeros([size]) new_l[label_map[l]] = 1 label_train_converted.append(new_l) label_train_converted = np.asarray(label_train_converted) label_valid_converted = [] for l in label_valid: new_l = np.zeros([size]) new_l[label_map[l]] = 1 label_valid_converted.append(new_l) label_valid_converted = np.asarray(label_valid_converted) # label_train_converted = np.asarray([label_map[l] for l in label_train]) # label_valid_converted = np.asarray([label_map[l] for l in label_valid]) history = model.fit([im_data_train, tx_data_train], [ label_train, label_train, label_train_converted, label_train_converted ], validation_data=[[im_data_valid, tx_data_valid], [ label_valid, label_valid, label_valid_converted, label_valid_converted ]], batch_size=c.bs, nb_epoch=c.epochs, shuffle=True, verbose=1, callbacks=callbacks) loss_csv = file(fname + '.loss.csv', 'w') hist = history.history
def retrieve_image_map(img_features, txt_features, class_list_doc2vec, joint_model, joint_model_ext=None, joint_model_weights_ext=None, load_precomputed_embedded_feat=None, verbose=False, progressbar=True): def printv(str): if verbose: print(str) if joint_model_ext is None: joint_model_ext = DEFAULT_JOINT_MODEL_EXT if load_precomputed_embedded_feat is None: load_precomputed_embedded_feat = False else: ValueError("load_precomputed_embedded_feat: not yet implemented.") printv("Loading visual features..") if not isinstance(img_features, ImageDataset): img_features = ImageDataset().load_hdf5(img_features) printv("Loading joint model..") if not isinstance(joint_model, Model): joint_model_name = joint_model model_file = os.path.join(JOINT_MODEL_FOLDER, os.path.join(joint_model_name, joint_model_name + joint_model_ext)) joint_model = load_model(model_file, custom_objects={'cos_distance': cos_distance}) else: joint_model_name = None if joint_model_weights_ext is not None: printv("Loading joint model weights..") weight_file = os.path.join(JOINT_MODEL_FOLDER, os.path.join(joint_model, joint_model + joint_model_weights_ext)) joint_model.load_weights(weight_file) if joint_model_name is not None: img_emb_path = os.path.join(JOINT_MODEL_PREDICTIONS_FOLDER, joint_prediction_fname(joint_model_name, 'img')) txt_emb_path = os.path.join(JOINT_MODEL_PREDICTIONS_FOLDER, joint_prediction_fname(joint_model_name, 'txt')) else: img_emb_path = "precomputed_im_emb.img.npy.temp" txt_emb_path = "precomputed_tx_emb.txt.npy.temp" if load_precomputed_embedded_feat and os.path.exists(img_emb_path) and os.path.exists(txt_emb_path): printv("Pre computed embedding from images and text found... loading...") imgs_embedded = np.load(img_emb_path) txts_embedded = np.load(txt_emb_path) else: printv("Predict embedding from images and text(joint model embedding)...") img_data = img_features.data while len(img_data.shape) > 2: if img_data.shape[-1] == 1: img_data = np.squeeze(img_data, axis=(-1,)) img_emb_model = get_sub_model(joint_model, 'img') imgs_embedded = img_emb_model.predict(img_data, verbose=verbose) np.save(img_emb_path, imgs_embedded) txt_data = txt_features while len(txt_data.shape) > 2: if txt_data.shape[-1] == 1: txt_data = np.squeeze(txt_data, axis=(-1,)) txt_emb_model = get_sub_model(joint_model, 'txt') txts_embedded = txt_emb_model.predict(txt_data, verbose=verbose) np.save(txt_emb_path, txts_embedded) #[a, b, c] = joint_model.predict(x=[img_data[0:len(txt_data)], txt_data], verbose=verbose) if not isinstance(class_list_doc2vec, list): class_list_doc2vec = load_class_list(class_list_doc2vec) # mAP test (optimized with cdist) if progressbar: import sys bar = pyprind.ProgBar(len(txts_embedded), stream = sys.stdout) av_prec = [] from scipy.spatial.distance import cdist #C = cdist(txts_embedded, imgs_embedded, 'cos') #C = 1-C C = -cdist(txts_embedded, imgs_embedded, 'cos') #C = (np.sqrt(C.shape[0])-C)/np.sqrt(C.shape[0]) for i, dv in enumerate(txts_embedded): scores = [] targets = [] if progressbar: bar.update() lbl = int(class_list_doc2vec[i]) for j, im_label in enumerate(img_features.labels): target = not bool(im_label[0] - lbl) score = C[i,j] scores.append(score) targets.append(target) from sklearn.metrics import average_precision_score AP = average_precision_score(targets, scores) av_prec.append(AP) printv("Class {} - AP = {}".format(lbl, AP)) mAP = np.mean(np.asarray(av_prec)) printv("\t\tmAP = {}".format(mAP)) return mAP
def recall_top_k(img_features, txt_features, class_list_doc2vec, joint_model, joint_model_ext=None, joint_model_weights_ext=None, load_precomputed_embedded_feat=None, top_k=10, verbose=False, progressbar=True): def printv(str): if verbose: print(str) if joint_model_ext is None: joint_model_ext = DEFAULT_JOINT_MODEL_EXT if load_precomputed_embedded_feat is None: load_precomputed_embedded_feat = False else: ValueError("load_precomputed_embedded_feat: not yet implemented.") printv("Loading visual features..") if not isinstance(img_features, ImageDataset): img_features = ImageDataset().load_hdf5(img_features) printv("Loading im2doc model..") if not isinstance(joint_model, Model): joint_model_name = joint_model model_file = os.path.join(JOINT_MODEL_FOLDER, os.path.join(joint_model_name, joint_model_name + joint_model_ext)) joint_model = load_model(model_file, custom_objects={'cos_distance': cos_distance}) else: joint_model_name = None if joint_model_weights_ext is not None: printv("Loading im2doc weights..") weight_file = os.path.join(JOINT_MODEL_FOLDER, os.path.join(joint_model, joint_model + joint_model_weights_ext)) joint_model.load_weights(weight_file) if joint_model_name is not None: img_emb_path = os.path.join(JOINT_MODEL_PREDICTIONS_FOLDER, joint_prediction_fname(joint_model_name, 'img')) txt_emb_path = os.path.join(JOINT_MODEL_PREDICTIONS_FOLDER, joint_prediction_fname(joint_model_name, 'txt')) else: img_emb_path = "precomputed_im_emb.img.npy.temp" txt_emb_path = "precomputed_tx_emb.txt.npy.temp" if load_precomputed_embedded_feat and os.path.exists(img_emb_path) and os.path.exists(txt_emb_path): printv("Pre computed embedding from images and text found... loading...") imgs_embedded = np.load(img_emb_path) txts_embedded = np.load(txt_emb_path) else: printv("Predict embedding from images and text(joint model embedding)...") img_data = img_features.data while len(img_data.shape) > 2: if img_data.shape[-1] == 1: img_data = np.squeeze(img_data, axis=(-1,)) img_emb_model = get_sub_model(joint_model, 'img') imgs_embedded = img_emb_model.predict(img_data, verbose=verbose) np.save(img_emb_path, imgs_embedded) txt_data = txt_features while len(txt_data.shape) > 2: if txt_data.shape[-1] == 1: txt_data = np.squeeze(txt_data, axis=(-1,)) txt_emb_model = get_sub_model(joint_model, 'txt') txts_embedded = txt_emb_model.predict(txt_data, verbose=verbose) np.save(txt_emb_path, txts_embedded) #[a, b, c] = joint_model.predict(x=[img_data[0:len(txt_data)], txt_data], verbose=verbose) if not isinstance(class_list_doc2vec, list): class_list_doc2vec = load_class_list(class_list_doc2vec) class_list_inverted_doc2vec = {k: i for i,k in enumerate(class_list_doc2vec)} # mAP test (optimized with cdist) if progressbar: import sys bar = pyprind.ProgBar(len(imgs_embedded), stream = sys.stdout) from scipy.spatial.distance import cdist #C = cdist(txts_embedded, imgs_embedded, 'cos') #C = 1-C C = -cdist(imgs_embedded, txts_embedded, 'euclidean') #C = (np.sqrt(C.shape[0])-C)/np.sqrt(C.shape[0]) recall_per_img = [] for i, iv in enumerate(imgs_embedded): if progressbar: bar.update() lbl = int(img_features.labels[i]) arg_lbl = class_list_inverted_doc2vec[lbl] dists = C[i, :] arg_sort_dist = np.argsort(dists) if arg_lbl in arg_sort_dist[0:top_k+1]: recall_per_img.append(1) else: recall_per_img.append(0) return np.sum(recall_per_img)/float(len(recall_per_img))
def load_class_list(class_list_doc2vec): from E5_embedding.cfg_emb import load_class_list return load_class_list(class_list_doc2vec) # TODO
def im2docvec_wvalid_map(visual_features=cfg_emb.VISUAL_FEATURES_TRAIN, text_features=cfg_emb.TEXT_FEATURES_400, class_list=cfg_emb.CLASS_LIST_400, visual_features_valid=cfg_emb.VISUAL_FEATURES_VALID, visual_features_zs_test=cfg_emb.VISUAL_FEATURES_TEST, text_features_zs_test=cfg_emb.GET_TEXT_FEATURES_100(), class_list_test=cfg_emb.CLASS_LIST_100): import numpy as np print("Loading visual features..") visual_features = ImageDataset().load_hdf5(visual_features) if visual_features_valid is not None: visual_features_valid = ImageDataset().load_hdf5(visual_features_valid) print("Loading textual features..") if not isinstance(text_features, np.ndarray): text_features = np.load(text_features) if not isinstance(text_features_zs_test, np.ndarray) and text_features_zs_test is not None: text_features_zs_test = np.load(text_features_zs_test) if class_list is None: class_list = np.unique(visual_features.labels).tolist() else: class_list = cfg_emb.load_class_list(class_list) if not isinstance(class_list_test, list): class_list_test = cfg_emb.load_class_list(class_list_test) print("Generating dataset..") if class_list is not None: cycle_on = class_list, text_features else: cycle_on = enumerate(text_features) data_train = [] target_train = [] if visual_features_valid is not None: data_valid = [] target_valid = [] for lbl, docv in zip(cycle_on[0], cycle_on[1]): lbl = int(lbl) norm_docv = docv / np.linalg.norm(docv) # l2 normalization visual_features_with_label = visual_features.sub_dataset_with_label( lbl) for visual_feat in visual_features_with_label.data: data_train.append(visual_feat) target_train.append(norm_docv) if visual_features_valid is not None: visual_features_valid_with_label = visual_features_valid.sub_dataset_with_label( lbl) for visual_feat in visual_features_valid_with_label.data: data_valid.append(visual_feat) target_valid.append(norm_docv) data_train = np.asarray(data_train) data_valid = np.asarray(data_valid) while len(data_train.shape) > 2: if data_train.shape[-1] == 1: data_train = np.squeeze(data_train, axis=(-1, )) while len(data_valid.shape) > 2: if data_valid.shape[-1] == 1: data_valid = np.squeeze(data_valid, axis=(-1, )) target_train = np.asarray(target_train) target_valid = np.asarray(target_valid) validation_data = [data_valid, target_valid] print("Generating model..") EPOCHS = 20 hiddens = [[1000], [500], [200]] #hiddens = [ [2000,1000], ] lrs = [10] batch_sizes = [32] optimizers_str = ['Adadelta'] optimizers = [Adadelta] #hiddens = [ [1000], [2000], [4000], [2000,1000], [4000, 2000], [4000, 2000, 1000]] #lrs = [10, 1] #batch_sizes = [64, 32, 16] #optimizers_str = ['Adadelta', 'Adagrad'] #optimizers = [Adadelta, Adagrad] for hid in hiddens: for opt, opt_str in zip(optimizers, optimizers_str): for lr in lrs: for bs in batch_sizes: print "" print("Training model..") print "hiddens: " + str(hid) print "optim: " + str(opt_str) print "lr: " + str(lr) fname = "im2docvec_opt-{}_lr-{}_bs-{}".format( opt_str, lr, bs) for i, hu in enumerate(hid): fname += "_hl-" + str(hu) folder = os.path.join(cfg_emb.IM2DOC_MODEL_FOLDER, fname) if not os.path.isdir(folder): os.mkdir(folder) fname = os.path.join(folder, fname) model = get_model(data_train.shape[1], target_train.shape[-1], hid) model.compile(optimizer=opt(lr=lr), loss=cos_distance) earlystop = EarlyStopping(monitor=MONITOR, min_delta=0.0005, patience=9) reduceLR = ReduceLROnPlateau(monitor=MONITOR, factor=0.1, patience=4, verbose=1, epsilon=0.0005) bestpoint = ModelCheckpoint(fname + '.model.best.h5', monitor=MONITOR, save_best_only=True) checkpoint = ModelCheckpoint(fname + '.weights.{epoch:02d}.h5', monitor=MONITOR, save_best_only=False, save_weights_only=True) #mAP_tr = ModelMAP(visual_features=visual_features, docs_vectors=text_features, class_list=class_list) mAP_val = ModelMAP(visual_features=visual_features_valid, docs_vectors=text_features, class_list=class_list, history_key='val_mAP', exe_on_train_begin=True, on_train_begin_key='tr_begin-val_map', exe_on_batch_end=False, on_batch_end_key='batch-val_map') mAP_zs = ModelMAP(visual_features=visual_features_zs_test, docs_vectors=text_features_zs_test, class_list=class_list_test, history_key='zs_mAP', exe_on_train_begin=True, on_train_begin_key='tr_begin-zs_map', exe_on_batch_end=False, on_batch_end_key='batch-zs_map') callbacks = [ reduceLR, bestpoint, checkpoint, mAP_val, mAP_zs ] #, earlystop, ] # mAP = test_embedding_map(visual_features=visual_features_zs_test, # class_list_doc2vec=class_list_test, # docs_vectors_npy=text_features_zs_test, # im2doc_model=model, # verbose=False) # print("Pre train mAP: " + str(mAP)) history = model.fit(data_train, target_train, batch_size=bs, nb_epoch=EPOCHS, verbose=1, shuffle=True, callbacks=callbacks, validation_data=validation_data) loss_csv = file(fname + '.loss.csv', 'w') hist = history.history return if 'tr_begin-val_map' in hist.keys(): loss_csv.write('val_mAP pre train:, {}\n'.format( hist['tr_begin-val_map'][0])) if 'tr_begin-zs_map' in hist.keys(): loss_csv.write('zs_mAP pre train:, {}\n'.format( hist['tr_begin-zs_map'][0])) loss_csv.write( 'Epoch, Loss, Val Loss, valid mAP, test mAP\n') epoch = 0 for loss, val_loss, val_mAP, zs_mAP in zip( hist['loss'], hist['val_loss'], hist['val_mAP'], hist['zs_mAP']): epoch += 1 loss_csv.write( str(epoch) + ', ' + str(loss) + ', ' + str(val_loss) + ', ' + str(val_mAP) + ', ' + str(zs_mAP) + '\n') if 'batch-zs_map' in hist.keys( ) or 'batch-val_map' in hist.keys(): loss_csv.write( '\n\n\n\nbatch_size:, {}\n\n'.format(bs)) loss_csv.write('Batch, val mAP, test mAP\n') batch = 0 for val_mAP, zs_mAP in zip(hist['batch-val_map', 'batch-zs_map']): batch += 1 loss_csv.write('{}, {}, {}\n'.format( batch, str(val_mAP), str(zs_mAP)))
def test_embedding_im_mAP(visual_features, docs_vectors_npy, class_list_doc2vec, im2doc_model, im2doc_model_ext=None, im2doc_weights_ext=None, load_precomputed_imdocs=None, verbose=False, progressbar=True): def printv(str): if verbose: print(str) if im2doc_model_ext is None: im2doc_model_ext = DEFAULT_IM2DOC_MODEL_EXT if load_precomputed_imdocs is None: load_precomputed_imdocs = False printv("Loading visual features..") if not isinstance(visual_features, ImageDataset): visual_features = ImageDataset().load_hdf5(visual_features) printv("Loading im2doc model..") if not isinstance(im2doc_model, Model): im2doc_model_name = im2doc_model model_file = os.path.join(cfg_emb.IM2DOC_MODEL_FOLDER, os.path.join(im2doc_model_name, im2doc_model_name + im2doc_model_ext)) im2doc_model = load_model(model_file, custom_objects={'cos_distance': cos_distance}) else: im2doc_model_name = None if im2doc_weights_ext is not None: printv("Loading im2doc weights..") weight_file = os.path.join(cfg_emb.IM2DOC_MODEL_FOLDER, os.path.join(im2doc_model, im2doc_model + im2doc_weights_ext)) im2doc_model.load_weights(weight_file) if im2doc_model_name is not None: imdocs_path = os.path.join(cfg_emb.IM2DOC_PREDICTION_FOLDER, im2doc_prediction_fname(im2doc_model_name)) else: imdocs_path = "precomputed_imdocs.temp" if load_precomputed_imdocs and os.path.exists(imdocs_path): printv("Pre computed docs from images found (im2doc embedding)... loading...") output_doc_vectors = np.load(imdocs_path) else: printv("Predict docs from images (im2doc embedding)..") im_data = visual_features.data while len(im_data.shape) > 2: if im_data.shape[-1] == 1: im_data = np.squeeze(im_data, axis=(-1,)) output_doc_vectors = im2doc_model.predict(im_data, verbose=verbose) np.save(imdocs_path, output_doc_vectors) printv("Loading doc2vec vectors...") if not isinstance(docs_vectors_npy, np.ndarray): docs_vectors_npy = np.load(docs_vectors_npy) if not isinstance(class_list_doc2vec, list): class_list_doc2vec = cfg_emb.load_class_list(class_list_doc2vec) # mAP test (optimized with cdist) if progressbar: import sys bar = pyprind.ProgBar(len(visual_features.labels), stream = sys.stdout) av_prec = [] from scipy.spatial.distance import cdist C = cdist(output_doc_vectors, docs_vectors_npy, 'cosine') C = 1-C for i, im_label in enumerate(visual_features.labels): scores = [] targets = [] if progressbar: bar.update() for j, dv in enumerate(docs_vectors_npy): lbl = int(class_list_doc2vec[j]) target = not bool(im_label - lbl) score = C[i, j] scores.append(score) targets.append(target) from sklearn.metrics import average_precision_score AP = average_precision_score(targets, scores) av_prec.append(AP) printv("Class {} - AP = {}".format(lbl, AP)) mAP = np.mean(np.asarray(av_prec)) printv("\t\tmAP = {}".format(mAP)) return mAP
def test_embedding_top_similars(visual_features, docs_vectors_npy, class_list_doc2vec, im2doc_model_name, im2doc_model_ext=None, im2doc_weights_ext=None, load_precomputed_imdocs=None, top_similars=None): if im2doc_model_ext is None: im2doc_model_ext = DEFAULT_IM2DOC_MODEL_EXT if load_precomputed_imdocs is None: load_precomputed_imdocs = False if top_similars is None: top_similars = 10 print("Loading visual features..") if not isinstance(visual_features, ImageDataset): visual_features = ImageDataset().load_hdf5(visual_features) print("Loading im2doc model..") model_file = os.path.join(cfg_emb.IM2DOC_MODEL_FOLDER, os.path.join(im2doc_model_name, im2doc_model_name + im2doc_model_ext)) im2doc_model = load_model(model_file, custom_objects={'cos_distance': cos_distance}) if im2doc_weights_ext is not None: print("Loading im2doc weights..") weight_file = os.path.join(cfg_emb.IM2DOC_MODEL_FOLDER, os.path.join(im2doc_model, im2doc_model + im2doc_weights_ext)) im2doc_model.load_weights(weight_file) imdocs_path = os.path.join(cfg_emb.IM2DOC_PREDICTION_FOLDER, im2doc_prediction_fname(im2doc_model_name)) if load_precomputed_imdocs and os.path.exists(imdocs_path): print("Pre computed docs from images found (im2doc embedding)... loading...") output_doc_vectors = np.load(imdocs_path) else: print("Predict docs from images (im2doc embedding)..") im_data = visual_features.data while len(im_data.shape) > 2: if im_data.shape[-1] == 1: im_data = np.squeeze(im_data, axis=(-1,)) output_doc_vectors = im2doc_model.predict(im_data, verbose=True) np.save(imdocs_path, output_doc_vectors) print("Loading doc2vec vectors...") if not isinstance(docs_vectors_npy, np.ndarray): docs_vectors_npy = np.load(docs_vectors_npy) # * * * * * * OLD METHOD (use d2v model) * * * * * * # print("Loading doc2vec model..") # d2v_model = doc2vec.Doc2Vec.load(DOC2VEC_MODEL) if not isinstance(class_list_doc2vec, list): class_list_doc2vec = cfg_emb.load_class_list(class_list_doc2vec) for index, vec in enumerate(output_doc_vectors): nv = np.asarray(vec) # * * * * * * OLD METHOD (use d2v model) * * * * * * # similars_2 = d2v_model.docvecs.most_similar(positive=[nv], topn=10) # similars_2 = np.asarray(similars_2, dtype=np.uint32) # # Translate class index of doc2vec (executed on a subset of dataset) in class index of original dataset # if class_list_doc2vec is not None: # similars_2 = [int(class_list_doc2vec[s]) for s in similars_2[:, 0]] # else: # similars_2 = similars[:, 0] # # * * * * * * NEW METHOD (use only stored vectors) * * * * * * similars, dists = distances(nv, docs_vectors_npy, get_first_n=top_similars) similars = [int(class_list_doc2vec[s]) for s in similars[:]] # similars = similars_2 # activate the use of the old method (you need also tu uncomment the d2v_model loading) fname = visual_features.fnames[index] label = visual_features.labels[index] label_name = visual_features.labelIntToStr(label) # # Print the images # sub_d = imdataset.sub_dataset_from_filename(fname) # image = sub_d.data[0] # image = image.transpose((2, 0, 1)) # image = image.transpose((2, 0, 1)) #plt.title("Class: {} - {}".format(label, label_name) ) #plt.imshow(image) print("") print("Class: {} - {}".format(label, str(label_name).decode('utf-8'))) print("Image: " + str(fname).decode('utf-8')) print("Top {} similars classes: ".format(top_similars) + str(similars[:])) for i in range(0, top_similars): print("{} similar class: {} - {} ".format(i+1, str(similars[i]), visual_features.labelIntToStr(similars[i])))