def main(): parser = setup_arg_parse() args = parser.parse_args() imglsh = ImageLSH(args.L, args.k) reduced_data, image_ids = imglsh.load_data() idx_structure, points_dict = imglsh.create_index_structure() for key, val in idx_structure.items(): idx_structure[key] = list(val) lsh_points = {} for key, val in points_dict.items(): lsh_points[str(key)] = val idx_structure['_id'] = "index" lsh_points['_id'] = "points" mongo_client = connect_to_db() for img, data in zip(image_ids, reduced_data): mongo_client.mwdb_project.image_features.update_one( {'imageName': img}, {"$set": { "HOG_reduced": data.tolist() }}) mongo_client.mwdb_project.images_index.insert_one(lsh_points) w_length = {'_id': "w_length", "w_length": imglsh.w_length} mongo_client.mwdb_project.images_index.insert_one(w_length) config = {'k': args.k, 'L': args.L, '_id': "config"} mongo_client.mwdb_project.images_index.insert_one(config) with open('index.json', 'w') as fp: json.dump(idx_structure, fp) mongo_client.close()
def classify_with_probablistic_feedback(query_vector, similar_images, relevant_images): mongo_client = connect_to_db() value = 0 N = len(similar_images) * len(similar_images[0]) R = len(relevant_images) * len(relevant_images[0]) similarity_dict = convert_to_dict(similar_images) reordered_images = [] for img in relevant_images: di = similarity_dict[img] image_vector = list( mongo_client.mwdb_project.image_features.find( {'imageName': img}))[0]["HOG_reduced"] ni = count_occurances(image_vector, query_vector) ri = count_relevant_occurances(relevant_images, image_vector) pi = (ri + (ni / N)) / (R + 1) ui = (ni - ri + (ni / N)) / (N - R + 1) value -= di * np.log(pi * (1 - ui) / ui * (1 - pi)) reordered_images.append((img, value)) similarity_dict.pop(img) mongo_client.close() for k, v in similarity_dict.items(): reordered_images.append((k, v)) return sorted(reordered_images, key=lambda tup: tup[1])
def count_relevant_occurances(relevant_images, image_vector): mongo_client = connect_to_db() count = 0 for img in relevant_images: vector = list( mongo_client.mwdb_project.image_features.find( {'imageName': img}))[0]["HOG_reduced"] count += count_occurances(vector, image_vector) mongo_client.close() return count
def get_vectors_for_images(images): mongo_client = connect_to_db() result = [] for image in images: result.append( np.array( list( mongo_client.mwdb_project.image_features.find( {'imageName': image}))[0]["HOG_reduced"])) mongo_client.close() return np.array(result)
def main(): parser = setup_arg_parse() args = parser.parse_args() mongo_client = connect_to_db() config = list(mongo_client.mwdb_project.images_index.find({'_id' : "config"}))[0] with open('index.json', 'r') as fp: idx_structure = json.load(fp) points_dict = list(mongo_client.mwdb_project.images_index.find({'_id': "points"}, {"_id" : 0}))[0] w_length = list(mongo_client.mwdb_project.images_index.find({"_id" : "w_length"}))[0]["w_length"] lsh_points = {} for k, v in points_dict.items(): lsh_points[int(k)] = v print(config) query_image_details = list(mongo_client.mwdb_project.image_features.find({"imageName": args.query_image_id}))[0] query_vector = query_image_details["HOG_reduced"] imglsh = ImageLSH(config["k"], config["L"]) imglsh.load_index_structure(idx_structure, lsh_points, w_length) similarity_scores, total_images_considered, unique_images_considered = imglsh.find_similar_images(query_vector, args.t, mongo_client) similar_images = [] for score in similarity_scores: image = {} image["distance_score"] = score[1] image["imageName"] = score[0] image["image_path"] = list(mongo_client.mwdb_project.image_features.find({"imageName" : score[0]}))[0]["image_path"] similar_images.append(image) mongo_client.close() print("Number of unique images considered = {0}".format(total_images_considered)) print("Number of overall Images considerd = {0}".format(unique_images_considered)) plot_results(similar_images, query_image_details["image_path"])
def get_seed_matrix(label): mongo_client = connect_to_db() images = mongo_client.mwdb_project.image_features.find({}, { "imageName": 1, "_id": 0, "aspectOfHand": 1 }) seed_list = [] count = 0 for img in images: if "aspectOfHand" in img: if label in img["aspectOfHand"].lower(): count += 1 seed_list.append(1) else: seed_list.append(0) else: seed_list.append(0) seed_matrix = np.array(seed_list) seed_matrix = seed_matrix / np.sum(seed_matrix) return seed_matrix
def main(): parser = setup_arg_parse() args = parser.parse_args() populate_database(args) model = "CM" dorsal_data_matrix, _ = get_data_matrix( model, convert_label_to_filterstring("dorsal")) palmar_data_matrix, _ = get_data_matrix( model, convert_label_to_filterstring("palmar")) dorsal_labels = np.zeros((dorsal_data_matrix.shape[0], 1)) palmar_labels = np.ones((palmar_data_matrix.shape[0], 1)) labels = np.append(dorsal_labels, palmar_labels, axis=0) combined_data = np.append(dorsal_data_matrix, palmar_data_matrix, axis=0) #reduced_data = reduce_dimensions_svd(combined_data, 20) reduced_data, v_matrix = reduce_dimensions_svd(combined_data, 20, get_v=True) dx, ddx, labels, d_labels = train_test_split(reduced_data, labels, test_size=0.1, random_state=42) reduced_data = np.append(dx, ddx, axis=0) labels = np.append(labels, d_labels, axis=0) labeled_data = np.append(reduced_data, labels, axis=1) testing_images, test_image_ids = enumerate_files_in_dir(args.test_folder) test_dataset = [] for test_image, image_id in zip(testing_images, test_image_ids): #test_dataset.append(np.array(extract_hog_features(test_image))) test_dataset.append(np.array(extract_color_moments(test_image))) test_dataset = np.array(test_dataset) #reduced_test_dataset = reduce_dimensions_svd(test_dataset, 20) reduced_test_dataset = np.matmul(test_dataset, v_matrix) mongo_client = connect_to_db() actual_labels = get_actual_labels_from_csv(args.labels_csv, test_image_ids) predicted = [] if args.classifier == "DT": model = DecisionTreeClassifier() model.fit(labeled_data) results = model.transform(reduced_test_dataset) for test_image_id, result in zip(test_image_ids, results): if result == 0: label = "dorsal" elif result == 1: label = "palmar" predicted.append((test_image_id, label)) print("{0} - {1}".format(test_image_id, label)) elif args.classifier == "SVM": clf = SupportVectorMachine(kernel=rbf_kernel, power=4, coef=1) training_labels = labels[:] # SVM needs labels to be 1, and -1 training_labels[training_labels == 0] = -1 clf.fit(reduced_data, training_labels) values = clf.predict(reduced_test_dataset) print(values) for test_image_id, result in zip(test_image_ids, values): if result == 1: label = "palmar" else: label = "dorsal" predicted.append((test_image_id, label)) print("{0} - {1}".format(test_image_id, label)) elif args.classifier == "PPR": args.k = 15 function_val = "manhattan" #process_all_images(args.train_folder, "CM") #process_all_images(args.test_folder, "CM") outgoing_img_graph, image_ids = create_similarity_graph( args.k, function_val, "CM") transition_matrix = get_transition_matrix(outgoing_img_graph, args.k) seed_matrix_dorsal = get_seed_matrix("dorsal") seed_matrix_palmar = get_seed_matrix("palmar") dorsal_pagerank = compute_pagerank(transition_matrix, seed_matrix_dorsal) palmar_pagerank = compute_pagerank(transition_matrix, seed_matrix_palmar) dorsal_pagerank_dict = { x: y for x, y in zip(image_ids, dorsal_pagerank) } palmar_pagerank_dict = { x: y for x, y in zip(image_ids, palmar_pagerank) } predicted = label_images(dorsal_pagerank_dict, palmar_pagerank_dict, test_image_ids) print(get_accuracy(actual_labels, predicted)) mongo_client.close()
def main(): parser = setup_arg_parse() args = parser.parse_args() mongo_client = connect_to_db() config = list( mongo_client.mwdb_project.images_index.find({'_id': "config"}))[0] with open('index.json', 'r') as fp: idx_structure = json.load(fp) points_dict = list( mongo_client.mwdb_project.images_index.find({'_id': "points"}, {"_id": 0}))[0] w_length = list( mongo_client.mwdb_project.images_index.find({"_id": "w_length" }))[0]["w_length"] lsh_points = {} for k, v in points_dict.items(): lsh_points[int(k)] = v print(config) query_image_details = list( mongo_client.mwdb_project.image_features.find( {"imageName": args.query_image_id.split("/")[-1]}) )[0] #a change was made here cuz my database does not contain imageName as a whole path query_vector = query_image_details["HOG_reduced"] imglsh = ImageLSH(config["k"], config["L"]) imglsh.load_index_structure(idx_structure, lsh_points, w_length) #full_data_matrix, image_ids = get_data_matrix("HOG") #full_data_matrix = reduce_dimensions_lda(full_data_matrix, 256) # print(full_data_matrix.shape) similarity_scores, total_images_considered, unique_images_considered = imglsh.find_similar_images( query_vector, args.t, mongo_client) jmp_matrix = None while True: pprint(similarity_scores) similar_images = [] for score in similarity_scores: image = {} image["imageName"] = score[0] image["image_path"] = list( mongo_client.mwdb_project.image_features.find( {"imageName": score[0]}))[0]["image_path"] similar_images.append(image) plot_results( similar_images, query_image_details["image_path"] ) #work has to be done here, we need R and IR to be created by this R, IR = get_feedback_ids() if args.clf == "SVM": similarity_scores = classify_with_svm(query_vector, similarity_scores, R, IR) elif args.clf == "PPR": similarity_scores, jmp_matrix = classify_with_ppr( query_vector, similarity_scores, R, IR) elif args.clf == "DT": similarity_scores = classify_with_dt(query_vector, similarity_scores, R, IR) else: similarity_scores = classify_with_probablistic_feedback( query_vector, similarity_scores, R)