def prepare_data(k, frt, feature, master): min_max_scaler = MinMaxScaler() if master: images, meta, matrix = get_full_matrix(feature, master=True) matrix = min_max_scaler.fit_transform(matrix) matrix, _, _ = reducer(matrix, k, frt) # Image-Image similarity img_img = 1 / (euclidean_distances(matrix) + 1) np.fill_diagonal(img_img, 0) return images, meta, img_img l_images, l_meta, l_matrix = get_full_matrix(feature) u_images, u_meta, u_matrix = get_full_matrix(feature, unlabelled=True) meta = l_meta meta.update(u_meta) matrix = min_max_scaler.fit_transform(np.vstack(( l_matrix, u_matrix, ))) matrix, _, _ = reducer(matrix, k, frt) # Image-Image similarity img_img = 1 / (euclidean_distances(matrix) + 1) np.fill_diagonal(img_img, 0) return l_images + u_images, meta, img_img
def svm_feedback(relevant_images, irrelevant_images, images_to_display, query, results): if not irrelevant_images: print("Please provide irrelevant images set for svm to work properly.") return relevant_images model = settings.SVM.CLASSIFIER.MODEL k = settings.SVM.CLASSIFIER.K frt = settings.SVM.CLASSIFIER.FRT images_rel, data_matrix_rel = get_all_vectors( model, f={'path': { '$in': relevant_images }}, master_db=True) images_irel, data_matrix_irel = get_all_vectors( model, f={'path': { '$in': irrelevant_images }}, master_db=True) images_test, test_vector = get_all_vectors( model, f={'path': { '$in': results + [query] }}, master_db=True) labelled_vectors, _, _, unlabelled_vectors = reducer( np.vstack((data_matrix_rel, data_matrix_irel)), k, frt, query_vector=test_vector) rel_class = np.array([1] * len(data_matrix_rel)) irel_class = np.array([-1] * len(data_matrix_irel)) x_train = labelled_vectors x_train = np.array(x_train) * 2 y_train = np.concatenate((rel_class, irel_class)) svclassifier = SVM() svclassifier.fit(np.array(x_train), np.array(y_train)) unlabelled_vectors = np.array(unlabelled_vectors) * 2 y_pred = svclassifier.predict(unlabelled_vectors) c = 0 dic = {} for y in y_pred: if y == 1: dic[images_test[c]] = unlabelled_vectors[c] c += 1 length_dict = {} for key in dic.keys(): length_dict[key] = np.dot(dic[key], svclassifier.w) sorted_dict = sorted(length_dict.items(), key=lambda x: x[1], reverse=True) list_img = [] c = 0 for key, j in sorted_dict: if c < images_to_display - len(irrelevant_images): list_img.append(key) c += 1 else: break return (list_img + irrelevant_images)
def prepare_data(k, frt, feature, paths=None): min_max_scaler = MinMaxScaler() if paths: images, meta, matrix = get_data_matrix(feature, f={'path': { '$in': paths }}) else: images, meta, matrix = get_data_matrix(feature) matrix = min_max_scaler.fit_transform(matrix) matrix, _, _ = reducer(matrix, k, frt) return images, meta, matrix
def run_svm(evaluate, model='lbp', k=30, frt='pca'): train_data, train_labels = build_labelled(model) if evaluate: test_data, test_labels, test_paths = build_unlabelled(model) else: test_paths, test_data = get_all_vectors(model, f={}, unlabelled_db=True) labelled_vectors, _, _, unlabelled_vectors = reducer(train_data, k, frt, query_vector=test_data) labelled_vectors *= 2 unlabelled_vectors *= 2 svclassifier = SVM() svclassifier.fit(labelled_vectors, train_labels) y_pred = svclassifier.predict(unlabelled_vectors) if evaluate: print(classification_report(test_labels,y_pred)) return test_paths, y_pred
def prepare_data(cls, feature, k_latent_semantics, frt_technique, ignore_metadata): # Get the images from the folders specified in config # We expect the vectors to be build for the features for both the labelled # and unlabelled data. u_images, u_meta, u_matrix = cls.get_data_matrix( feature, unlabelled=True, ignore_metadata=ignore_metadata) l_images, l_meta, l_matrix = cls.get_data_matrix( feature, ignore_metadata=ignore_metadata) # Reduce the labeled and unlabeled matrix together old_matrix = np.vstack(( l_matrix, u_matrix, )) matrix, _, _ = reducer(old_matrix, k_latent_semantics, frt_technique) r_l_matrix = matrix[:len(l_images)] r_u_matrix = matrix[len(l_images):] return l_images, u_images, l_meta, u_meta, r_l_matrix, r_u_matrix
def feedback_probab(relevant, irrelevant, t, query, prev_results): if not relevant: print("Probabilistic model requires relevant images for re-ordering.") return prev_results img_all, img_all_vec = get_all_vectors( model, f={'path': { '$in': prev_results + [query] }}, master_db=True) #f={'path': {'$nin': relevant}} img_all_vec_red, _, __ = reducer(img_all_vec, k, frt) img_all_vec_red = scale(img_all_vec_red, 0, 1) dict_all_red = {} for i in range(len(img_all)): name = img_all[i] dict_all_red[name] = img_all_vec_red[i] img_rel_vec_red = [] for name in relevant: img_rel_vec_red.append(dict_all_red[name]) img_rel_vec_red = np.array(img_rel_vec_red) img_all_vec_red = makeArrayBinary(img_all_vec_red, img_all_vec_red.shape[0], img_all_vec_red.shape[1]) img_rel_vec_red = makeArrayBinary(img_rel_vec_red, img_rel_vec_red.shape[0], img_rel_vec_red.shape[1]) R = img_rel_vec_red.shape[0] N = len(img_all) p_list = [] for j in range(k): r = 0 for i in range(R): if img_rel_vec_red[i][j] == 1: r += 1 p_list.append((r + 0.5) / (R + 1)) n_list = [] for j in range(k): n = 0 for i in range(N): if img_all_vec_red[i][j] == 1: n += 1 n_list.append(n) for i in range(k): n_list[i] = (n_list[i] - p_list[i] + 0.5) / (N - R + 1) log_list = [] for i in range(k): num = (p_list[i] * (1 - n_list[i])) / (n_list[i] * (1 - p_list[i])) if num > 0: log_list.append(math.log(num, 2)) log_list = np.array(log_list) new_result = [] for name in dict_all_red.keys(): sim = np.dot(dict_all_red[name], log_list) new_result.append((name, sim)) new_result = sorted(new_result, key=lambda x: x[1], reverse=True) final = [] for i in range(t): final.append(new_result[i][0]) return final
# Only 1 input to be taken, that is k (latent semantics), number of features to be extracted k_each = args.Features # On extensive testing, the best feature extraction model was found out to be SIFT model = settings.TASK1_CONFIG.MODEL # On extensive testing, the best feature reduction technique was founf out to be PCA feature = settings.TASK1_CONFIG.FRT # Generating the vectors for Dorsal Labelled, Palmar labelled and the test vectors # Also fetching the labels of unlabelled images so as to check accuracy later dorsal_vectors, palmar_vectors, test_data, test_data_paths = generate_vec() # Applying PCA to Dorsal Images and fetching the 'k' latent semantics reduced_dorsal_vectors, _, _, _, dorsal_pca = reducer(dorsal_vectors,k_each,feature,get_scaler_model=True) dorsal_variance_ratio = dorsal_pca.explained_variance_ratio_ print("Computed ",k_each," Latent Semantics for Dorsal") # Applying PCA to Palmar Images and fetching the 'k' latent semantics reduced_palmar_vectors, _, _, _, palmar_pca = reducer(palmar_vectors,k_each,feature,get_scaler_model=True) palmar_variance_ratio = palmar_pca.explained_variance_ratio_ print("Computed ",k_each," Latent Semantics for Palmar") # Applying PCA to Test Images and fetching the 'k' latent semantics reduced_test_data, _, _, _, test_pca = reducer(test_data,k_each,feature,get_scaler_model=True) test_variance_ratio = test_pca.explained_variance_ratio_ # Initiate List that will store the total dorsal dot product scores for each test image dorsal = []
images, feature_space = utils.get_all_vectors( settings.PPR.CLASSIFIER.FEATURE) feature_space = min_max_scaler.fit_transform(feature_space) meta = utils.get_metadata() meta = {m['path']: m for m in meta} """ u_images, u_feature_space = utils.get_all_vectors( settings.PPR.CLASSIFIER.FEATURE, unlabelled_db=True) u_feature_space = min_max_scaler.fit_transform(u_feature_space) matrix = np.vstack(( feature_space, u_feature_space, )) matrix, eigen_values, latent_vs_old = reducer( matrix, """ matrix, eigen_values, latent_vs_old = reducer(feature_space, settings.PPR.CLASSIFIER.K, settings.PPR.CLASSIFIER.FRT) dm = helper.build_matrix_with_labels(matrix, images, meta) """ dm = helper.build_labelled_matrix(matrix, images + u_images, 'aspectOfHand') """ evaluate(dm)
sub_meta[meta[img]['id']] = meta[img] # sub id to order in matrix sub_to_idx = {sub: idx for idx, sub in enumerate(subs)} # index to sub id idx_to_sub = [0] * len(sub_to_idx) for sub in sub_to_idx: idx_to_sub[sub_to_idx[sub]] = sub # A subject subject similarity index sub_sub = np.zeros((len(subs), len(subs),)) for sub1 in sub_to_idx: for sub2 in sub_to_idx: sub_sub[sub_to_idx[sub1], sub_to_idx[sub2]] = img_img[subs[sub1],:].take(subs[sub2], axis=1).mean() w, _, h = reducer(sub_sub, args.k_latent_semantics, "nmf") # Print term weigth pairs get_term_weight_pairs(w, "task7_{}.csv".format(args.k_latent_semantics)) sub_weight = [ sorted([("z{}".format(idx), weight,) for idx, weight in enumerate(row)], key=lambda x: x[1]) for row in w ] output.write_to_file("visualize_task7.html", "task7-{}.html".format(args.k_latent_semantics), vectors=sub_weight, subs=subs, idx_to_sub=idx_to_sub, images=images, sub_meta=sub_meta,
img_meta = [] try: for m in meta: images.append(m['path']) img_meta.append([ m["age"], mapping[m["gender"]], mapping[m["skinColor"]], mapping[m["accessories"]], m["nailPolish"], mapping[m["aspectOfHand"].split()[0]], mapping[m["aspectOfHand"].split()[1]], m["irregularities"] ]) except KeyError: raise Exception("Invalid metadata detected") vectors, eigen_values, latent_vs_old = reducer(img_meta, args.k_latent_semantics, "nmf") get_term_weight_pairs(vectors, "task8_{}.csv".format(args.k_latent_semantics)) get_term_weight_pairs(latent_vs_old, "task8_{}.csv".format(args.k_latent_semantics)) # Extra Credit # image path with a vector in the latent semantic space data_z = zip(images, vectors) # image path for each latenet semantic in h feature_z = [(idx, images[np.argmax(np.dot(img_meta, i))]) for idx, i in enumerate(latent_vs_old)] output.write_to_file("visualize_data_z.html",