def clearParseToMAP(self, index): if index not in self.mapSumNodePerInstance: return if self.width == 1 and self.height == 1: return mapSumNodeIndex = self.mapSumNodePerInstance[index] decomp_id = self.decompPerInstance[index] del self.mapSumNodePerInstance[index] del self.decompPerInstance[index] decomp = Decomposition.getDecomp(decomp_id) regionLeft = Region.getRegion(decomp.regionLeftId) regionRight = Region.getRegion(decomp.regionRightId) #record changes Utility.parseBuffer.append(self.id) Utility.parseBuffer.append(mapSumNodeIndex) Utility.parseBuffer.append(decomp.regionLeftId) Utility.parseBuffer.append(decomp.regionRightId) Utility.parseBuffer.append(decomp.regionLeftMax) Utility.parseBuffer.append(decomp.regionRightMax) # recursively parse the tree regionLeft.clearParseToMAP(index) regionRight.clearParseToMAP(index)
def clearUnusedDecomp(self): """ Top-down scan each region and clear all the unused decomposition in each region. """ # clear coarse region for region_id in self.__coarseRegionId: region = Region.getRegion(region_id) # first of all, gather all the decomposition in use # (the decomposition is child of one of the SumNode) alive = [] for node in region.sumNodes: if node.getNumOfChildren() > 0: alive.extend(node.getChildren().keys()) all_decomps = region.prodNodes.keys() # find dead decomp using set compliment operation dead = set(all_decomps) - set(alive) # clear these dead decompositions for ddp in dead: # remove in local region del region.prodNodes[ddp] # remove in global decomposition dict Decomposition.deleteDecomp(ddp) #clear fine region for region_id in self.__fineRegionId: region = Region.getRegion(region_id) # first of all, gather all the decomposition in use # (the decomposition is child of one of the SumNode) alive = [] for node in region.sumNodes: if node.getNumOfChildren() > 0: alive.extend(node.getChildren().keys()) all_decomps = region.prodNodes.keys() # find dead decomp using set compliment operation dead = set(all_decomps) - set(alive) # clear these dead decompositions for ddp in dead: # remove in local region del region.prodNodes[ddp] # remove in global decomposition dict Decomposition.deleteDecomp(ddp)
def clearParseToMAPFromBuffer(self, \ mapSumNodeIndex, \ regionLeftId, \ regionRightId, \ regionLeftMax, \ regionRightMax): if self.width == 1 and self.height == 1: return decomp_id = Decomposition.getDecompId(regionLeftId, \ regionRightId, \ regionLeftMax, \ regionRightMax) sumNode = self.sumNodes[mapSumNodeIndex] sumNode.removeChild(decomp_id, 1)
def set_features(self): if self.decomposition_name != '': self.decomposition = Decomposition(self.decomposition_name, 100, self.feature_name, self.labelled_dataset_path) self.decomposition.dimensionality_reduction() else: test_dataset_folder_path = os.path.abspath( os.path.join( Path(os.getcwd()).parent, self.labelled_dataset_path)) print('Getting the Model Features from Phase1') features_obj = FeaturesImages(self.feature_name, test_dataset_folder_path) features_obj.compute_features_images_folder() self.unlabelled_dataset_features = self.get_unlabelled_images_decomposed_features( ) misc.save2pickle(self.unlabelled_dataset_features, self.reduced_pickle_file_folder, feature='unlabelled_' + self.decomposed_feature) print("Getting features for dorsal_images ") self.dorsal_features = self.get_features('dorsal') print("Getting features for palmar images") self.palmar_features = self.get_features('palmar')
def setParseToMAPFromBuffer(self, \ maxSumNodeIndex, \ regionLeftId, \ regionRightId, \ regionLeftMax, \ regionRightMax): if self.width == 1 and self.height == 1: return decomp_id = Decomposition.getDecompId(regionLeftId, \ regionRightId, \ regionLeftMax, \ regionRightMax) sumNode = self.sumNodes[maxSumNodeIndex] prodNode = self.prodNodes[decomp_id] sumNode.addChild(decomp_id, prodNode, 1)
def setParseToMAP(self, index): # skip unit region if self.width == 1 and self.height == 1: return if len(self.sumNodes) == 1: self.mapSumNodePerInstance[index] = 0 mapSumNodeIndex = self.mapSumNodePerInstance[index] decomp_id = self.bestDecompPerSumNode[mapSumNodeIndex] self.decompPerInstance[index] = decomp_id decomp = Decomposition.getDecomp(decomp_id) regionLeft = Region.getRegion(decomp.regionLeftId) regionRight = Region.getRegion(decomp.regionRightId) regionLeft.mapSumNodePerInstance[index] = decomp.regionLeftMax regionRight.mapSumNodePerInstance[index] = decomp.regionRightMax # we are working in single machine, so record updates anyway Utility.parseBuffer.append(self.id) Utility.parseBuffer.append(mapSumNodeIndex) Utility.parseBuffer.append(decomp.regionLeftId) Utility.parseBuffer.append(decomp.regionRightId) Utility.parseBuffer.append(decomp.regionLeftMax) Utility.parseBuffer.append(decomp.regionRightMax) # check whether or not the ProdNode for the decomposition is created if decomp_id not in self.prodNodes: node = ProductNode() self.prodNodes[decomp_id] = node leftChild = regionLeft.getSumNode(decomp.regionLeftMax) rightChild = regionRight.getSumNode(decomp.regionRightMax) node.addChild(leftChild) node.addChild(rightChild) # recursively parse the tree regionLeft.setParseToMAP(index) regionRight.setParseToMAP(index)
from Decomposition import Decomposition from Metadata import Metadata import os from pathlib import Path import misc from NMF import NMFModel task = input("Please specify the task number: ") test_dataset_path = input("Please specify test folder path: ") if task == '1': model = input("1.CM\n2.LBP\n3.HOG\n4.SIFT\nSelect model: ") decomposition_model = input( "1.PCA\n2.SVD\n3.NMF\n4.LDA\nSelect decomposition: ") k = int(input("Enter the number of latent features to consider: ")) decomposition = Decomposition(decomposition_model, k, model, test_dataset_path) decomposition.dimensionality_reduction() decomposition.decomposition_model.print_term_weight_pairs(k) elif task == '2': model = input("1.CM\n2.LBP\n3.HOG\n4.SIFT\nSelect model: ") decomposition_model = input( "1.PCA\n2.SVD\n3.NMF\n4.LDA\nSelect decomposition: ") image_id = input("Please specify the test image file name: ") k = int(input("Please specify the number of components : ")) m = int(input("Please specify the value of m: ")) decomposition = Decomposition(decomposition_model, k, model, test_dataset_path) similarity = Similarity(model, image_id, m) similarity.get_similar_images(test_dataset_path, decomposition,
def ppr_classifier(): model = "CM" decomposition_model = "NMF" k = 8 phase = input("Choose from \n. 1. Train \n. 2. Test \n.") if (phase == "train"): decomposition = Decomposition(decomposition_model, k, model, labelled_set_path, phase) decomposition.dimensionality_reduction() reduced_dim_folder_images_dict = misc.load_from_pickle("D:\MS_1\MWDB-project\Phase2\pickle_files",feature = (model + '_' + decomposition_model + '_' + phase)) image_image_graph_keys = () columns = list(reduced_dim_folder_images_dict.keys()) image_image_graph_keys = list(it.combinations(reduced_dim_folder_images_dict.keys(),2)) image_image_df = pd.DataFrame(0.00, columns = reduced_dim_folder_images_dict.keys(), index = reduced_dim_folder_images_dict.keys()) image_image_df_top_features = pd.DataFrame(0.00, columns = reduced_dim_folder_images_dict.keys(), index = reduced_dim_folder_images_dict.keys()) image_image_df = norm_distance(image_image_df, reduced_dim_folder_images_dict, image_image_graph_keys, 2) misc.save2pickle(image_image_df, reduced_pickle_file_folder, feature=(model + '_' + decomposition_model + '_image_image_df' )) if (phase == "test"): phase = "test" decomposition = Decomposition(decomposition_model, k, model, unlabelled_set_path, phase) decomposition.dimensionality_reduction() labelled_images_feature_dict = misc.load_from_pickle("D:\MS_1\MWDB-project\Phase2\pickle_files",feature = (model + '_' + decomposition_model + '_train')) unlabelled_images_feature_dict = misc.load_from_pickle("D:\MS_1\MWDB-project\Phase2\pickle_files",feature = (model + '_' + decomposition_model + '_test')) # K = int(input("Enter the number of dominant images - ")) K = 9 prediction = {} image_image_df = misc.load_from_pickle(reduced_pickle_file_folder,feature = (model + '_' + decomposition_model + "_image_image_df")) for unlabelled_img in unlabelled_images_list: # unlabelled_img = "Hand_0000070.jpg" # new_col_dict = {} new_col = [] for labelled_img in labelled_images_feature_dict.keys(): features1 = unlabelled_images_feature_dict.get(unlabelled_img) features2 = labelled_images_feature_dict.get(labelled_img) ind_distance = 0.00 distance = 0.00 # print(features1) # print("--------------") # print(features2) for i in range(len(features1)): ind_distance = abs(features1[i] - features2[i]) distance += (ind_distance ** 2) distance = distance ** (1/float(2)) # new_col_dict[labelled_img] = distance new_col.append(distance) # print(new_col) # new_row = pd.DataFrame(new_col, columns=image_image_df.columns, index=[unlabelled_img]) # image_image_df = image_image_df.append(new_row) image_image_df = image_image_df.append(pd.Series(new_col, index=image_image_df.columns, name=unlabelled_img)) # image_image_df = pd.concat([image_image_df, new_row_df]) new_col.append(0) # print(new_col) image_image_df = image_image_df.assign(unlabelled_img = new_col) image_image_df = image_image_df.rename({'unlabelled_img' : unlabelled_img},axis = 1) image_image_df = image_image_df.loc[:,~image_image_df.columns.duplicated()] image_image_df = image_image_df[~image_image_df.index.duplicated(keep='first')] image_image_features_df = k_neighbour_graph(image_image_df, image_image_df.columns, 8) dominant_img_list = steady_state([unlabelled_img],image_image_features_df, image_image_features_df.columns, K) # print(dominant_img_list) palmar = 0 dorsal = 0 for img in dominant_img_list: if img not in unlabelled_img: class_list = metadata_df['aspectOfHand'].where(metadata_df['imageName']==img) class_list = [class_l for class_l in class_list if str(class_l) != 'nan'] # print(str(class_list) + img ) if(class_list[0].split()[0] == "palmar"): palmar += 1 if(class_list[0].split()[0] == "dorsal"): dorsal += 1 if(dorsal >= palmar): prediction[unlabelled_img] = "dorsal" else: prediction[unlabelled_img] = "palmar" image_image_df = misc.load_from_pickle(reduced_pickle_file_folder,feature = (model + '_' + decomposition_model + "_image_image_df")) print(prediction) correct = 0 class_list = unlabelled_metadata_df['imageName'].tolist() actual_class_list = unlabelled_metadata_df['aspectOfHand'].tolist() # print(actual_class_list) for image_name in prediction.keys(): class_list = unlabelled_metadata_df['aspectOfHand'].where(unlabelled_metadata_df['imageName']==image_name) class_list = [class_l for class_l in class_list if str(class_l) != 'nan'] # print(str(class_list[0].split()[0]) + "--" + image_name ) if(class_list[0].split()[0] == prediction.get(image_name)): correct += 1 print(correct/len(prediction.keys()))
class LabelFeatures: def __init__(self, labelled_dataset_path='', unlabelled_dataset_path='', feature_name='HOG', decomposition_name='SVD'): self.labelled_dataset_path = labelled_dataset_path self.unlabelled_dataset_path = unlabelled_dataset_path self.reduced_pickle_file_folder = os.path.join( Path(os.path.dirname(__file__)).parent, 'Phase2', 'pickle_files') self.main_pickle_file_folder = os.path.join( Path(os.path.dirname(__file__)).parent, 'Phase1') self.dorsal_features = None self.palmar_features = None self.decomposition = None self.unlabelled_dataset_features = None self.feature_name = feature_name self.decomposition_name = decomposition_name self.decomposed_feature = self.feature_name + "_" + self.decomposition_name def get_unlabelled_dataset_features(self): self.unlabelled_dataset_features = misc.load_from_pickle( self.reduced_pickle_file_folder, 'unlabelled_' + self.decomposed_feature) return self.unlabelled_dataset_features def set_features(self): if self.decomposition_name != '': self.decomposition = Decomposition(self.decomposition_name, 100, self.feature_name, self.labelled_dataset_path) self.decomposition.dimensionality_reduction() else: test_dataset_folder_path = os.path.abspath( os.path.join( Path(os.getcwd()).parent, self.labelled_dataset_path)) print('Getting the Model Features from Phase1') features_obj = FeaturesImages(self.feature_name, test_dataset_folder_path) features_obj.compute_features_images_folder() self.unlabelled_dataset_features = self.get_unlabelled_images_decomposed_features( ) misc.save2pickle(self.unlabelled_dataset_features, self.reduced_pickle_file_folder, feature='unlabelled_' + self.decomposed_feature) print("Getting features for dorsal_images ") self.dorsal_features = self.get_features('dorsal') print("Getting features for palmar images") self.palmar_features = self.get_features('palmar') def get_label_features(self, label): if label == "dorsal": if self.dorsal_features is None: self.set_features() return self.dorsal_features elif label == "palmar": if self.palmar_features is None: self.set_features() return self.palmar_features def get_features(self, label): test_dataset_folder_path = os.path.abspath( os.path.join(Path(os.getcwd()).parent, self.labelled_dataset_path)) images_list = list( misc.get_images_in_directory(test_dataset_folder_path).keys()) metadata = Metadata(images_list) if self.feature_name != 'SIFT': metadata.save_label_decomposed_features(label, self.decomposed_feature) features = misc.load_from_pickle( self.reduced_pickle_file_folder, self.decomposed_feature + '_' + label) else: features = {} database_features = misc.load_from_pickle( self.main_pickle_file_folder, self.feature_name) label_images_list = metadata.get_specific_metadata_images_list( feature_dict={'aspectOfHand': label}) for image in label_images_list: features[image] = database_features[image] return features def get_unlabelled_images_decomposed_features(self): test_dataset_folder_path = os.path.abspath( os.path.join( Path(os.getcwd()).parent, self.unlabelled_dataset_path)) images_list = list( misc.get_images_in_directory(test_dataset_folder_path).keys()) images_decomposed_features = {} for image_id in images_list: features_images = FeaturesImages(self.feature_name, test_dataset_folder_path) test_image_path = os.path.join(test_dataset_folder_path, image_id) test_image_features = list() test_image_features.append( features_images.compute_image_features(test_image_path)) if self.decomposition_name != '': decomposed_features = self.decomposition.decomposition_model.get_new_image_features_in_latent_space( test_image_features) images_decomposed_features[image_id] = decomposed_features else: images_decomposed_features[image_id] = test_image_features return images_decomposed_features
def MAPinference(self, index, instance): self.mapSumNodeIndex = -1 self.mapSumNodeProb = 100 self.mapProdNodeProb = 100 # randomly choose a unused SumNode unusedNodes = [] for i, node in enumerate(self.sumNodes): if node.getNumOfChildren() == 0: unusedNodes.append(i) nodeIndex = -1 if len(unusedNodes) > 0: nodeIndex = np.random.randint(0, len(unusedNodes)) nodeIndex = unusedNodes[nodeIndex] # try to find a better decomposition of this region cl = self.columnLeft cr = self.columnRight ru = self.rowUp rd = self.rowDown step = self.interval decompOptions = [] # try to decompose into left and right parts for index in xrange(cl + step, cr, step): lr_id = Region.getRegionId(ru, rd, cl, index) rr_id = Region.getRegionId(ru, rd, index, cr) lr = Region.getRegion(lr_id) rr = Region.getRegion(rr_id) snl = lr.sumNodes[lr.mapSumNodeIndex] snr = rr.sumNodes[rr.mapSumNodeIndex] max_value = 0.0 if snl.getLogValue() == Node.ZERO \ or snr.getLogValue() == Node.ZERO: max_value = Node.ZERO else: max_value = snl.getLogValue() + snr.getLogValue() if len(decompOptions) == 0 \ or max_value > self.mapProdNodeProb: self.mapProdNodeProb = max_value decompOptions = [] if max_value == self.mapProdNodeProb: str_id = Decomposition.getDecompId( \ lr_id, rr_id, lr.mapSumNodeIndex, rr.mapSumNodeIndex) decompOptions.append(str_id) # try to decompose into up and down parts for index in xrange(ru + step, rd, step): ur_id = Region.getRegionId(ru, index, cl, cr) dr_id = Region.getRegionId(index, rd, cl, cr) ur = Region.getRegion(ur_id) dr = Region.getRegion(dr_id) snu = ur.sumNodes[ur.mapSumNodeIndex] snd = dr.sumNodes[dr.mapSumNodeIndex] max_value = 0.0 if snu.getLogValue() == Node.ZERO \ or snd.getLogValue() == Node.ZERO: max_value = Node.ZERO else: max_value = snu.getLogValue() + snd.getLogValue() if len(decompOptions) == 0 \ or max_value > self.mapProdNodeProb: self.mapProdNodeProb = max_value decompOptions = [] if max_value == self.mapProdNodeProb: str_id = Decomposition.getDecompId( \ ur_id, dr_id, ur.mapSumNodeIndex, dr.mapSumNodeIndex) decompOptions.append(str_id) # randomly choose a decomposition idx = np.random.randint(0, len(decompOptions)) mapDecomp = decompOptions[idx] # evaluate existing ProdNode/Decomp on this instance for d in self.prodNodes: node = self.prodNodes[d] node.evaluate() # temperary list for finding maxSumNodeIndex mapSumNodeOptions = [] bestDecompOptions = [] self.bestDecompPerSumNode = [''] * len(self.sumNodes) for i, node in enumerate(self.sumNodes): if node.getNumOfChildren() == 0: continue node.evaluate() mapSumNodeProbOption = 0 for decomp_id in node.getChildren(): child = node.getChild(decomp_id) # the following two equations will calculate # the new value if we vote this child in the # inference process. old_value = node.getLogValue() \ + np.log(node.getCounts()) child_value = child.getLogValue() value = 0.0 # using the Log-Exponential trick to calculate # log(exp(.) + exp(.)) for avoiding underlow/overflow ## if old_value > child_value: ## value = old_value + np.log(1 + np.exp(child_value - old_value)) ## else: ## value = child_value + np.log(1 + np.exp(old_value - child_value)) value = np.logaddexp(old_value, child_value) if len(bestDecompOptions) == 0 \ or value > mapSumNodeProbOption: bestDecompOptions = [] mapSumNodeProbOption = value if value == mapSumNodeProbOption: bestDecompOptions.append(decomp_id) # the is a new Decomposition (child) if mapDecomp not in node.getChildren(): value = self.mapProdNodeProb # this new child is not the only effective child if node.getLogValue() != Node.ZERO: value = node.getLogValue() + np.log(node.getCounts()) # same log exponential trick ## if self.mapProdNodeProb > value: ## value = self.mapProdNodeProb \ ## + np.log(1 + np.exp(value - self.mapProdNodeProb)) ## else: ## value = value + \ ## np.log(1 + np.exp(self.mapProdNodeProb - value)) value = np.logaddexp(self.mapProdNodeProb, value) value -= Parameters.prior if len(bestDecompOptions) == 0 \ or value > mapSumNodeProbOption: bestDecompOptions = [] mapSumNodeProbOption = value bestDecompOptions.append(mapDecomp) # get the final log value of this SumNode node.setLogValue(mapSumNodeProbOption \ - np.log(node.getCounts() + 1)) # find the new best decomposition # (maybe one of the old one or the new one) length = len(bestDecompOptions) index = np.random.randint(0, length) self.bestDecompPerSumNode[i] = bestDecompOptions[index] if len(mapSumNodeOptions) == 0 \ or node.getLogValue() > self.mapSumNodeProb: self.mapSumNodeProb = node.getLogValue() mapSumNodeOptions = [] if node.getLogValue() == self.mapSumNodeProb: mapSumNodeOptions.append(i) # find the map SumNode if nodeIndex >= 0: node = self.sumNodes[nodeIndex] node.setLogValue(self.mapProdNodeProb - \ np.log(node.getCounts() + 1) - \ Parameters.prior) self.bestDecompPerSumNode[nodeIndex] = mapDecomp if len(mapSumNodeOptions) == 0 \ or node.getLogValue() > self.mapSumNodeProb: self.mapSumNodeProb = node.getLogValue() mapSumNodeOptions = [] mapSumNodeOptions.append(nodeIndex) length = len(mapSumNodeOptions) index = np.random.randint(0, length) self.mapSumNodeIndex = mapSumNodeOptions[index]
def decision_tree_input(): model = "CM" decomposition_model = "NMF" k = 8 phase = input("Choose from \n. 1. Train \n. 2. Test \n.") if (phase == "train"): # phase = "train" decomposition = Decomposition(decomposition_model, k, model, labelled_set_path, phase) decomposition.dimensionality_reduction() labelled_images_feature_dict = misc.load_from_pickle(r"D:\MS_1\MWDB-project\Phase2\pickle_files",feature = (model + '_' + decomposition_model + '_train')) y_train = get_labels(labelled_images_feature_dict.keys(),metadata_df) X_train = np.vstack(labelled_images_feature_dict.values()) y_train = np.asarray(y_train).reshape(len(y_train),1) dataset = np.concatenate((X_train,y_train) ,axis=1) tree = build_tree(dataset, 6, 1) misc.save2pickle(tree, reduced_pickle_file_folder,feature = (model + '_' + decomposition_model + '_tree')) if(phase == "test"): # phase = "test" tree = misc.load_from_pickle(reduced_pickle_file_folder,feature = (model + '_' + decomposition_model + '_tree')) decomposition = Decomposition(decomposition_model, k, model, unlabelled_set_path, phase) decomposition.dimensionality_reduction() unlabelled_images_feature_dict = misc.load_from_pickle("D:\MS_1\MWDB-project\Phase2\pickle_files",feature = (model + '_' + decomposition_model + '_test')) y_test = get_labels(unlabelled_images_feature_dict.keys(),unlabelled_metadata_df) X_test = np.vstack(unlabelled_images_feature_dict.values()) # print(y_train.shape) # print(y_test) # print(tree) prediction = {} for key in unlabelled_images_feature_dict.keys(): est_val = predict(tree, unlabelled_images_feature_dict[key]) if (est_val == 1): prediction[key] = "palmar" else: prediction[key] = "dorsal" print(prediction) correct = 0 class_list = unlabelled_metadata_df['imageName'].tolist() actual_class_list = unlabelled_metadata_df['aspectOfHand'].tolist() # print(actual_class_list) for image_name in prediction.keys(): class_list = unlabelled_metadata_df['aspectOfHand'].where(unlabelled_metadata_df['imageName']==image_name) class_list = [class_l for class_l in class_list if str(class_l) != 'nan'] # print(str(class_list[0].split()[0]) + "--" + image_name ) if(class_list[0].split()[0] == prediction.get(image_name)): correct += 1 print(correct/len(prediction.keys()))