def compute_threshold(self): reconstructed_normalized_feat_desc = self.inv_transform( self.objectLatentSemantics) reconstructed_feat_desc = self.scaler.inverse_transform( reconstructed_normalized_feat_desc) reconstruction_err = find_distance_2_vectors(reconstructed_feat_desc, self.featureDescriptor) # print('shape: ', np.shape(reconstruction_err), np.average(reconstruction_err)) self.threshold = np.percentile(reconstruction_err, 85)
def startTask3(): print("start task3") k = input("Please enter the k value for outgoing edges ") K = input("Please enter the K value for visualizing dominant images ") k = int(k) K = int(K) folder = input( "Please Select the folder to apply Page Rank \n 1. Labelled Set 1 \n 2. Labelled Set 2 \n" ) if folder == "1": folderPath = config.IMAGE_FOLDER_SET_1 else: folderPath = config.IMAGE_FOLDER_SET_2 data = {} for file in os.listdir(str(folderPath)): filename = os.fsdecode(file) fileExists = os.path.exists( join(config.FEATURES_FOLDER, file + ".json")) if fileExists: with open(join(config.FEATURES_FOLDER, filename + ".json"), "r") as f: eachData = json.load(f) data.update(eachData) else: data = HOG().HOGForSingleImage(folderPath, file) data.update(eachData) # mergingFeatureJson.append(data) # print(mergingFeatureJson) # fileHOGFullExists = os.path.exists(join(config.DATABASE_FOLDER, "HOG.json")) # # fileExists = os.path.exists(join(config.DATABASE_FOLDER, "HOG_set_2.json")) # if not fileExists: # hog = HOG() # featureVector = hog.HOGFeatureDescriptor() # # with open(join(config.DATABASE_FOLDER, "HOG_set_2.json"), 'w', encoding='utf-8') as f: # json.dump(featureVector, f, ensure_ascii=True, indent=4) # # with open(join(config.DATABASE_FOLDER, "HOG_set_2.json"), "r") as f: # data = json.load(f) reducerObject = list(data.values()) pca = PCA_Reducer(reducerObject) latentFeatureDict = {} data = pca.reduceDimension(pca.featureDescriptor) i = 0 imageNames = [] for file in os.listdir(str(folderPath)): filename = os.fsdecode(file) latent = data.iloc[i][:] imageNames.append(filename) latentFeatureDict[filename] = latent i = i + 1 adjacency_matrix = [[0 for _ in range(len(latentFeatureDict))] for _ in range(len(latentFeatureDict))] for i in range(len(latentFeatureDict)): distances = [] for j in range(len(latentFeatureDict)): # print(len(latentFeatureDict[imageNames[i]]), len(latentFeatureDict[imageNames[j]])) distances.append( find_distance_2_vectors(latentFeatureDict[imageNames[i]], latentFeatureDict[imageNames[j]])) distances = np.asarray(distances) ind = np.argpartition(distances, k)[:k] total = 0 for distance_index in ind: if distances[distance_index] != 0: total += 1 / distances[distance_index] for distance_index in ind: # This is adding only k nearest neighbours into the matrix and doing ratio to get probablistic matrix if distances[distance_index] != 0: adjacency_matrix[distance_index][ i] = 1 / distances[distance_index] / total rowDict = {} i = 0 for image in imageNames: rowDict[i] = image i = i + 1 df = pd.DataFrame(adjacency_matrix, columns=imageNames) df.rename(index=rowDict, inplace=True) df.to_csv(join(config.DATABASE_FOLDER, "adjacency_matrix.csv")) I = np.identity(df.shape[1]) print("Enter the three imageIDs to be used as seed") imageID_1 = input() imageID_2 = input() imageID_3 = input() seed = pd.Series(0, index=df.index) seed.loc[imageID_1] = 0.33 seed.loc[imageID_2] = 0.33 seed.loc[imageID_3] = 0.34 page_rank = np.matmul(np.linalg.inv(I - .75 * df), 0.25 * seed) # ind = np.argpartition(page_rank, -K)[-K:] # print(page_rank[ind]) steady_state = pd.Series(page_rank, index=df.index) # df.rename(columns={0:"imageName",1:"values"}, inplace=True) # steady_state.nlargest(K, ["values"],keep="all") steady_state.to_csv(join(config.DATABASE_FOLDER, "steady_state_matrix.csv")) col_Names = ["imageNames", "values"] my_CSV_File = pd.read_csv(join(config.DATABASE_FOLDER, "steady_state_matrix.csv"), names=col_Names) kDominant = my_CSV_File.nlargest(K, ["values"], keep="all") # print(my_CSV_File.nlargest(K, ["values"], keep="all")) s = "<style>" \ "img { width:160px;height:120px" \ "</style>" s = s + "<h2> 3 Seed Images</h2>" s = s + "<img src='" s = s + join(folderPath, imageID_1) s = s + "'>" s = s + "<img src='" s = s + join(folderPath, imageID_2) s = s + "'>" s = s + "<img src='" s = s + join(folderPath, imageID_3) s = s + "'>" s = s + "</br></br>" s = s + "<h2>" + str(K) + " Dominant Images</h2>" for index, row in kDominant.iterrows(): news = "" news = news + "<img src='" news = news + join(folderPath, row["imageNames"]) news = news + "'>" s = s + news f = open(join(config.DATABASE_FOLDER, "task3.html"), "w") f.write(s) f.close() import webbrowser url = join(config.DATABASE_FOLDER, "task3.html") # MacOS # chrome_path = 'open -a /Applications/Google\ Chrome.app %s' # Windows chrome_path = 'C:/Program Files (x86)/Google/Chrome/Application/chrome.exe %s' # Linux # chrome_path = '/usr/bin/google-chrome %s' webbrowser.get(chrome_path).open(url)
def startTask5(): global hash_rep global database l = int(input('Enter layer: ')) k = int(input('Enter hashes per layer: ')) database = lsh_index_structure(l, k) query_img = input('Enter the Query Image: ') t = int(input('Most similar images (t): ')) # Compute hash rep for query img. with open(path.join(config.FEATURES_FOLDER, query_img + '.json'), 'r', encoding='utf-8') as f: query_feat_desc = json.load(f) query_rep = [] for i in range(len(hash_rep)): img_bucket = img_index(query_feat_desc[query_img], i) query_rep.append(img_bucket) # print(query_rep) # Get list of similar imgs similar_imgs = [] bits_to_ignore = 0 while (1): bins_to_consider = set() if (bits_to_ignore > 0): arr = [None] * bits_to_ignore ret_n_size_bin_strings(bits_to_ignore, arr, 0, bins_to_consider) for layer_ptr in range(len(database)): curr_layer_bit_rep = query_rep[layer_ptr] if bits_to_ignore > 0: for bin in bins_to_consider: key = curr_layer_bit_rep[:-1 * bits_to_ignore] + bin if key in database[layer_ptr]: similar_imgs.extend(database[layer_ptr][key]) else: if curr_layer_bit_rep in database[layer_ptr]: similar_imgs.extend( database[layer_ptr][curr_layer_bit_rep]) if (len(similar_imgs) >= t or bits_to_ignore == len(query_rep[0])): break # Also look to break when all the buckets have been scanned. else: # Reduce the bit size bits_to_ignore += 1 del similar_imgs[:] total_imgs = len(similar_imgs) unique_similar_imgs = set(similar_imgs) unique_imgs = len(unique_similar_imgs) print('Number of unique images: ', unique_imgs) print('Overall number of images considered: ', total_imgs) # Rank all unique images. euclid_dist = dict() query_fd = query_feat_desc[query_img] for img in unique_similar_imgs: # print(img) with open(path.join(config.FEATURES_FOLDER, img + '.json'), 'r', encoding='utf-8') as f: img_fd = json.load(f) euclid_dist[img] = find_distance_2_vectors(np.asarray(query_fd), np.asarray(img_fd[img])) sorted_imgs = sorted(euclid_dist.items(), key=lambda kv: kv[1]) # print(sorted_imgs[:t]) visualize_for_lsh(sorted_imgs, l, k, t, query_img) store_results_as_json(sorted_imgs, l, k, t, query_img) store_pickles(l, k)
def task_6_inp(query_img_fd, l, k, t): global hash_rep global database with open( path.join(config.DATABASE_FOLDER, "task5_hashRep_l" + str(l) + "_k" + str(k)), "rb") as f: hash_rep = pickle.load(f) with open( path.join(config.DATABASE_FOLDER, "task5_indexedImgs_l" + str(l) + "_k" + str(k)), "rb") as f: database = pickle.load(f) query_rep = [] for i in range(len(hash_rep)): img_bucket = img_index(query_img_fd, i) query_rep.append(img_bucket) # print(query_rep) # Get list of similar imgs similar_imgs = [] bits_to_ignore = 0 while (1): bins_to_consider = set() similar_imgs = [] if (bits_to_ignore > 0): arr = [None] * bits_to_ignore ret_n_size_bin_strings(bits_to_ignore, arr, 0, bins_to_consider) for layer_ptr in range(len(database)): curr_layer_bit_rep = query_rep[layer_ptr] if bits_to_ignore > 0: for bin in bins_to_consider: key = curr_layer_bit_rep[:-1 * bits_to_ignore] + bin if key in database[layer_ptr]: similar_imgs.extend(database[layer_ptr][key]) else: if curr_layer_bit_rep in database[layer_ptr]: similar_imgs.extend( database[layer_ptr][curr_layer_bit_rep]) if (len(similar_imgs) >= t or bits_to_ignore == len(query_rep[0])): break # Also look to break when all the buckets have been scanned. else: # Reduce the bit size bits_to_ignore += 1 total_imgs = len(similar_imgs) unique_similar_imgs = set(similar_imgs) unique_imgs = len(unique_similar_imgs) print('Number of unique images: ', unique_imgs) print('Overall number of images considered: ', total_imgs) # Rank all unique images. euclid_dist = dict() query_fd = query_img_fd for img in unique_similar_imgs: # print(img) with open(path.join(config.FEATURES_FOLDER, img + '.json'), 'r', encoding='utf-8') as f: img_fd = json.load(f) euclid_dist[img] = find_distance_2_vectors(np.asarray(query_fd), np.asarray(img_fd[img])) sorted_imgs = sorted(euclid_dist.items(), key=lambda kv: kv[1]) # print(sorted_imgs[:t]) visualize_for_task6(sorted_imgs, l, k, t) store_results_for_task6(sorted_imgs, l, k, t)
def startTask3(): print("start task3") k = input("Please enter the k value for outgoing edges ") # K = input("Please enter the K value for visualizing dominant images ") k = int(k) classify_folder = input("Enter the folder to classify images ") fileHOGFullExists = os.path.exists( join(config.DATABASE_FOLDER, "HOG_FULL.json")) fileExists = os.path.exists( join(config.DATABASE_FOLDER, "HOG_classify.json")) if not fileExists: hog = HOG() featureVector = hog.HOGFeatureDescriptor() featureVector_classify = hog.HOGFeatureDescriptorForFolder( join(config.CLASSIFICATION_FOLDER, classify_folder)) featureVector.update(featureVector_classify) with open(join(config.DATABASE_FOLDER, "HOG_classify.json"), 'w+', encoding='utf-8') as f: json.dump(featureVector, f, ensure_ascii=True, indent=4) with open(join(config.DATABASE_FOLDER, "HOG_classify.json"), "r") as f: data = json.load(f) reducerObject = list(data.values()) pca = PCA_Reducer(reducerObject) latentFeatureDict = {} data = pca.reduceDimension(pca.featureDescriptor) print(data.shape) i = 0 imageNames = [] for file in os.listdir(str(config.IMAGE_FOLDER)): filename = os.fsdecode(file) latent = data.iloc[i][:] imageNames.append(filename) latentFeatureDict[filename] = latent i = i + 1 for file in os.listdir(join(config.CLASSIFICATION_FOLDER, classify_folder)): filename = os.fsdecode(file) latent = data.iloc[i][:] imageNames.append(filename) latentFeatureDict[filename] = latent i = i + 1 adjacency_matrix = [[0 for _ in range(len(latentFeatureDict))] for _ in range(len(latentFeatureDict))] for i in range(len(latentFeatureDict)): distances = [] for j in range(len(latentFeatureDict)): distances.append( find_distance_2_vectors(latentFeatureDict[imageNames[i]], latentFeatureDict[imageNames[j]])) distances = np.asarray(distances) ind = np.argpartition(distances, k)[:k] total = 0 for distance_index in ind: if distances[distance_index] != 0: total += 1 / distances[distance_index] for distance_index in ind: # This is adding only k nearest neighbours into the matrix and doing ratio to get probablistic matrix if distances[distance_index] != 0: adjacency_matrix[distance_index][ i] = 1 / distances[distance_index] / total rowDict = {} i = 0 for image in imageNames: rowDict[i] = image i = i + 1 df = pd.DataFrame(adjacency_matrix, columns=imageNames) df.rename(index=rowDict, inplace=True) df.to_csv(join(config.DATABASE_FOLDER, "adjacency_matrix.csv")) I = np.identity(df.shape[1]) print("Enter the file where the meta-data of the images is present") fileName = input() metaData = pd.read_csv(join(config.METADATA_FOLDER, fileName)) metaData.set_index('imageName') count = metaData.loc[metaData['aspectOfHand'].str.contains( "dorsal")].shape[0] print(count) seed = pd.Series(0, index=df.index) seed[metaData.loc[metaData['aspectOfHand'].str.contains( "dorsal")].imageName] = 1 / count page_rank = np.matmul(np.linalg.inv(I - .50 * df), 0.50 * seed) steady_state = pd.Series(page_rank, index=df.index) steady_state = steady_state.sort_values(ascending=True) steady_state.to_csv(join(config.DATABASE_FOLDER, "steady_state_matrix.csv")) steady_state.plot() plt.show()
def startTask6(): print("Task 6") feedbackSystem = int( input( "Please select the relevance feedback system \n1.SVM Based \n2.Decision Tree Based \n3.PPR Based \n4.Probabilistic based\n" )) filename = input("Please enter the name of the file (output of 5b)") with open(join(config.DATABASE_FOLDER, filename), "r") as f: data = json.load(f) imagesNames = list(data.keys()) reducerObjectpp = list(data.values()) pca_pp = PCA_Reducer(reducerObjectpp, k=len(imagesNames)) latentFeatureDict = {} data_pp = pca_pp.reduceDimension(pca_pp.featureDescriptor) relavantImages = set() irrelavantImages = set() iteration = 0 # Below values are used for PPR, not to calculate everytime iteatively rowDict = {} calculated = False ch = "n" while ch == "n" or ch == "N": iteration = iteration + 1 numberOfRelavant = int(input("Number of relevant images ")) numberOfIrRelavant = int(input("Number of irrelevant images ")) for i in range(numberOfRelavant): relavantImages.add( input("Please " + str(i + 1) + " relevant image ")) for i in range(numberOfIrRelavant): irrelavantImages.add( input("Please " + str(i + 1) + " irrelevant image ")) if feedbackSystem == 1: print("SVM Based Feedback system") image_labels = [] reducerObject = [] for i in relavantImages: reducerObject.append(data.get(i)) image_labels.append(-1) for i in irrelavantImages: reducerObject.append(data.get(i)) image_labels.append(1) pca = PCA_Reducer(reducerObject, k=len(relavantImages) + len(irrelavantImages)) pca_result = pca.reduceDimension(pca.featureDescriptor) svm_object = SVM() print("Training SVM") svm_object.svm_fit(pca_result, image_labels) print("Done Training SVM") tempList = list(set(imagesNames) - set(relavantImages)) unlabelledImages = list(set(tempList) - set(irrelavantImages)) predicted_values = [] relevantDistances = {} irrelavantDistances = {} for i in unlabelledImages: pca_output = pca.reduceDimension([data.get(i)]) # print("pca output: ", pca_output) output_label = np.asarray(svm_object.predict(pca_output))[0] # print(type(pca_output)) pca_output = pca_output.values.tolist() distance = svm_object.distance(pca_output[0]) # print(distance) # print(type(distance)) predicted_values.append(output_label) if output_label == -1: relevantDistances[distance] = i elif output_label == 1: irrelavantDistances[distance] = i for i in relavantImages: pca_output = pca.reduceDimension([data.get(i)]) pca_output = pca_output.values.tolist() distance = svm_object.distance(pca_output[0]) # print(distance) relevantDistances[distance] = i for i in irrelavantImages: pca_output = pca.reduceDimension([data.get(i)]) pca_output = pca_output.values.tolist() distance = svm_object.distance(pca_output[0]) # print(distance) irrelavantDistances[distance] = i relevantDistancesList = sorted(relevantDistances, reverse=True) irrelavantDistancesList = sorted(irrelavantDistances) output_images_list = [] for i in relevantDistancesList: output_images_list.append(relevantDistances.get(i)) for i in irrelavantDistancesList: output_images_list.append(irrelavantDistances.get(i)) # print(output_images_list) plotTheResultInChrome(relavantImages, irrelavantImages, output_images_list, iteration, "SVM") elif feedbackSystem == 2: print("Decision Tree Based Feedback system") reducerObject = [] for i in relavantImages: reducerObject.append(data.get(i)) for i in irrelavantImages: reducerObject.append(data.get(i)) pca = PCA_Reducer(reducerObject, k=len(relavantImages) + len(irrelavantImages)) pca_result = pca.reduceDimension(pca.featureDescriptor) pca_result = pca_result.values.tolist() class_labels = [-1, 1] for i in range(0, len(relavantImages)): pca_result[i].append(-1) count = len(relavantImages) for i in range(count, count + len(irrelavantImages)): pca_result[i].append(1) dtree_object = decisionTree() root = dtree_object.construct_dt(pca_result, class_labels, 2, 2) tempList = list(set(imagesNames) - set(relavantImages)) unlabelledImages = list(set(tempList) - set(irrelavantImages)) relevantConfidence = {} irrelevantConfidence = {} for i in unlabelledImages: pca_output = pca.reduceDimension([data.get(i)]) pca_output = pca_output.values.tolist()[0] output_label = dtree_object.predict(root, pca_output) confidence = dtree_object.confidence(root, pca_output, output_label) if output_label == -1: if relevantConfidence.get(confidence) is None: relevantConfidence[i] = confidence else: irrelevantConfidence[i] = confidence for i in relavantImages: pca_output = pca.reduceDimension([data.get(i)]) pca_output = pca_output.values.tolist()[0] output_label = dtree_object.predict(root, pca_output) confidence = dtree_object.confidence(root, pca_output, output_label) relevantConfidence[i] = confidence for i in irrelavantImages: pca_output = pca.reduceDimension([data.get(i)]) pca_output = pca_output.values.tolist()[0] output_label = dtree_object.predict(root, pca_output) confidence = dtree_object.confidence(root, pca_output, output_label) irrelevantConfidence[i] = confidence relevantConfidenceList = sorted(relevantConfidence.items(), key=operator.itemgetter(1), reverse=True) irrelavantConfidenceList = sorted(irrelevantConfidence.items(), key=operator.itemgetter(1)) output_images_list = [] # print(relevantConfidenceList) # print(irrelavantConfidenceList) for key, value in relevantConfidenceList: output_images_list.append(key) for key, value in irrelavantConfidenceList: output_images_list.append(key) # print(output_images_list) plotTheResultInChrome(relavantImages, irrelavantImages, output_images_list, iteration, "Decision Tree") elif feedbackSystem == 3: print("PPR Based Feedback system") if not calculated: for i in range(len(imagesNames)): latent = data_pp.iloc[i][:] latentFeatureDict[imagesNames[i]] = latent rowDict[i] = imagesNames[i] adjacency_matrix = [[0 for _ in range(len(latentFeatureDict))] for _ in range(len(latentFeatureDict))] # print("") print("Generating Adjacency Matrix..") for i in range(len(latentFeatureDict)): distances = [] for j in range(len(latentFeatureDict)): # print(len(latentFeatureDict[imageNames[i]]), len(latentFeatureDict[imageNames[j]])) distances.append( find_distance_2_vectors( latentFeatureDict[imagesNames[i]], latentFeatureDict[imagesNames[j]])) distances = np.asarray(distances) ind = np.argpartition(distances, 5)[:5] total = 0 for distance_index in ind: if distances[distance_index] != 0: total += 1 / distances[distance_index] for distance_index in ind: # This is adding only k nearest neighbours into the matrix and doing ratio to get probablistic matrix if distances[distance_index] != 0: adjacency_matrix[distance_index][ i] = 1 / distances[distance_index] / total calculated = True seed = pd.Series(0, index=imagesNames) length = len(relavantImages) for img in relavantImages: seed.loc[img] = 1 / length seed2 = pd.Series(0, index=imagesNames) length2 = len(irrelavantImages) for img in irrelavantImages: seed2.loc[img] = 1 / length2 df = pd.DataFrame(adjacency_matrix, columns=imagesNames) df.rename(index=rowDict, inplace=True) df.to_csv( join(config.DATABASE_FOLDER, "adjacency_matrix_task6_c.csv")) I = np.identity(df.shape[1]) page_rank = np.matmul(np.linalg.inv(I - .75 * df), 0.25 * seed) page_rank2 = np.matmul(np.linalg.inv(I - .75 * df), 0.25 * seed2) steady_state = pd.Series(page_rank, index=df.index) steady_state2 = pd.Series(page_rank2, index=df.index) steady_state.to_csv( join(config.DATABASE_FOLDER, "steady_state_matrix_6_c_" + str(iteration) + ".csv")) finalResult = {} for i in range(len(imagesNames)): finalResult[imagesNames[i]] = steady_state[ imagesNames[i]] - steady_state2[imagesNames[i]] # finalResult = list(finalResult.keys()) sortList = sorted(finalResult.items(), key=lambda x: x[1], reverse=True) finalResult = list(dict(sortList).keys()) plotTheResultInChrome(relavantImages, irrelavantImages, finalResult, iteration, "PPR") elif feedbackSystem == 4: images_df = pd.read_json(join(config.DATABASE_FOLDER, filename), "r") threshold = 0.02 nQuery = [] for q in range(images_df.shape[0]): nq = 0 rq = 0 irq = 0 for column in images_df: if images_df[column][q] >= threshold: nq += 1 if column in relavantImages: rq += 1 if column in irrelavantImages: irq += 1 pq = (rq + nq / images_df.shape[1]) / (len(relavantImages) + 1) uq = (irq + nq / images_df.shape[1]) / (len(irrelavantImages) + 1) if pq * (1 - uq) / (uq * (1 - pq) + 1) <= 0: nQuery.append(0) else: q = math.log((pq * (1 - uq)) / (uq * (1 - pq)), 10) if q < 0: nQuery.append(0) elif q > 1: nQuery.append(1) else: nQuery.append(q) finalResult = {} for i in range(len(imagesNames)): product = np.dot(nQuery, reducerObjectpp[i]) finalResult[imagesNames[i]] = product sortList = sorted(finalResult.items(), key=lambda x: x[1], reverse=True) finalResult = list(dict(sortList).keys()) plotTheResultInChrome(relavantImages, irrelavantImages, finalResult, iteration, "Probabilistic") else: print("Wrong input") exit() ch = input( "Are you satisfied with the output? type Y for exit N for running again " )
def startTask4(): print("starting task4") print("Enter the folder path containing the labeled images") training_folder = input() print("Choose one of the below classifier") print("1. SVM classifer\n2. Decision-Tree classifier\n3. PPR based classifier") classifier = int(input()) print("Enter the folder path containing the test images") test_folder = input() hog_feature_map = {} counter = 1 training_files = os.listdir(training_folder) print("Extracting features for the training images!") for trainingFile in training_files: trainingFileJson = os.fsdecode(trainingFile).split('.')[0] + '.' + os.fsdecode(trainingFile).split('.')[ 1] + '.json' fileExists = os.path.exists(join(config.FEATURES_FOLDER, trainingFileJson)) if fileExists: with open(join(config.FEATURES_FOLDER, trainingFileJson), "r") as f: data = json.load(f) hog_feature_map.update(data) else: data = HOG().HOGForSingleImage(training_folder, trainingFile) hog_feature_map.update(data) progress(counter, len(training_files)) counter = counter + 1 reducer_object = list(hog_feature_map.values()) print("Performing PCA!") pca = PCA_Reducer(reducer_object) data = pca.reduceDimension(pca.featureDescriptor) print("Done performing PCA!") if classifier == 1: # image labels are added to the imageLabels list. -1 for dorsal and 1 for palmar metadata = pd.read_csv(config.METADATA_FOLDER) image_lables = get_labels(training_folder, metadata) svm_object = SVM() print("Training SVM") svm_object.svm_fit(data, image_lables) print("Done Training SVM") test_labels_map = {} predicted_values = [] actual_values = get_labels(test_folder, metadata) for file in os.listdir(test_folder): test_file = file test_file_json = file + '.json' file_exists = os.path.exists(join(config.FEATURES_FOLDER, test_file_json)) if file_exists: with open(join(config.FEATURES_FOLDER, test_file_json), "r") as f: data = json.load(f) else: data = HOG().HOGForSingleImage(test_folder, test_file) pca_output = pca.reduceDimension(list(data.values())) output_label = np.asarray(svm_object.predict(pca_output))[0] predicted_values.append(output_label) if output_label == -1: test_labels_map[test_file] = "dorsal" else: test_labels_map[test_file] = "palmar" print(test_labels_map) accuracy = accuracy_score(actual_values, predicted_values) plotInChromeForTask4(test_labels_map, "Task_4_SVM", accuracy) print("Test Accuracy: ", accuracy) if classifier == 2: data = data.values.tolist() # decision tree takes data as 2d array class_labels = [-1, 1] i = 0 metadata = pd.read_csv(config.METADATA_FOLDER) for file in os.listdir(training_folder): training_file = os.fsdecode(file) label = metadata.loc[metadata['imageName'] == training_file]['aspectOfHand'].iloc[0] if "dorsal" in label: data[i].append(-1) else: data[i].append(1) i = i + 1 dtree_object = decisionTree() root = dtree_object.construct_dt(data, class_labels, 5, 2) test_labels_map = {} predicted_values = [] actual_values = get_labels(test_folder, metadata) for file in os.listdir(test_folder): test_file = os.fsdecode(file).split('.')[0] + '.' + os.fsdecode(file).split('.')[1] test_file_json = os.fsdecode(file).split('.')[0] + '.' + os.fsdecode(file).split('.')[1] + '.json' file_exists = os.path.exists(join(config.FEATURES_FOLDER, test_file_json)) if file_exists: with open(join(config.FEATURES_FOLDER, test_file_json), "r") as f: data = json.load(f) else: data = HOG().HOGForSingleImage(test_folder, test_file) pca_output = pca.reduceDimension(list(data.values())) pca_output = pca_output.values.tolist()[0] output_label = dtree_object.predict(root, pca_output) predicted_values.append(output_label) if output_label == -1: test_labels_map[test_file] = "dorsal" else: test_labels_map[test_file] = "palmar" accuracy = accuracy_score(actual_values, predicted_values) plotInChromeForTask4(test_labels_map, "Task_4_DECISION", accuracy) print("Test Accuracy: ", accuracy) if classifier == 3: pca_for_all = data i = 0 imageNames = [] latentFeatureDict = {} # Preprocessing for UnLabelled set ppr_hog_map = {} for test_file in os.listdir(test_folder): trainingFileJson = str(test_file) + '.json' fileExists = os.path.exists(join(config.FEATURES_FOLDER, trainingFileJson)) if fileExists: with open(join(config.FEATURES_FOLDER, trainingFileJson), "r") as f: data = json.load(f) ppr_hog_map.update(data) else: data = HOG().HOGForSingleImage(test_folder, test_file) ppr_hog_map.update(data) # Appending the labelled data values with unlabelled images data reducer_object = list(hog_feature_map.values()) pp_reducer_object = list(ppr_hog_map.values()) pp_reducer_object = reducer_object + pp_reducer_object pca = PCA_Reducer(pp_reducer_object) unlabelled_ppr_data = pca.reduceDimension(pca.featureDescriptor) pca_for_all = unlabelled_ppr_data for file in os.listdir(str(training_folder)): filename = os.fsdecode(file) latent = pca_for_all.iloc[i][:] imageNames.append(filename) latentFeatureDict[filename] = latent i = i + 1 for file in os.listdir(join(test_folder)): filename = os.fsdecode(file) latent = pca_for_all.iloc[i][:] imageNames.append(filename) latentFeatureDict[filename] = latent i = i + 1 # seed = pd.Series(0, index=imageNames) print("Generating Adjacency Matrix..") adjacency_matrix = [[0 for _ in range(len(latentFeatureDict))] for _ in range(len(latentFeatureDict))] for i in range(len(latentFeatureDict)): distances = [] for j in range(len(latentFeatureDict)): # print(len(latentFeatureDict[imageNames[i]]), len(latentFeatureDict[imageNames[j]])) distances.append(find_distance_2_vectors(latentFeatureDict[imageNames[i]], latentFeatureDict[imageNames[j]])) distances = np.asarray(distances) ind = np.argpartition(distances, 20)[:20] total = 0 for distance_index in ind: if distances[distance_index] != 0: total += 1 / distances[distance_index] for distance_index in ind: # This is adding only k nearest neighbours into the matrix and doing ratio to get probablistic matrix if distances[distance_index] != 0: adjacency_matrix[distance_index][i] = 1 / distances[distance_index] / total rowDict = {} i = 0 for image in imageNames: rowDict[i] = image i = i + 1 df = pd.DataFrame(adjacency_matrix, columns=imageNames) df.rename(index=rowDict, inplace=True) df.to_csv(join(config.DATABASE_FOLDER, "adjacency_matrix_for_task_4.csv")) I = np.identity(df.shape[1]) seed = pd.Series(0, index=imageNames) metadata = pd.read_csv(config.METADATA_FOLDER) image_lables = get_labels(training_folder, metadata) count = image_lables.count(-1) val = 1 / count for i in range(len(os.listdir(training_folder))): if image_lables[i] == -1: seed.loc[imageNames[i]] = val # print(seed) seed2 = pd.Series(0, index=imageNames) count2 = image_lables.count(1) val2 = 1 / count2 for i in range(len(os.listdir(training_folder))): if image_lables[i] == 1: seed2.loc[imageNames[i]] = val page_rank = np.matmul(np.linalg.inv(I - .75 * df), 0.25 * seed) page_rank2 = np.matmul(np.linalg.inv(I - .75 * df), 0.25 * seed2) steady_state = pd.Series(page_rank, index=df.index) steady_state2 = pd.Series(page_rank2, index=df.index) test_labels_map = {} predicted_values = [] for file in os.listdir(join(test_folder)): if steady_state[file] >= steady_state2[file]: test_labels_map[file] = "dorsal" predicted_values.append(-1) else: test_labels_map[file] = "palmer" predicted_values.append(1) actual_values = get_labels(test_folder, metadata) accuracy = accuracy_score(actual_values, predicted_values) plotInChromeForTask4(test_labels_map, "Task_4_PPR", accuracy) print("Test Accuracy: ", accuracy) steady_state = steady_state.sort_values(ascending=True) steady_state.to_csv(join(config.DATABASE_FOLDER, "steady_state_matrix_for_task_4.csv")) steady_state.plot() plt.show()