def predict_rfmm_distance(V, W, classId, XhT, patClassIdTest, gama=1): """ prediction using the distance in the paper "A refined Fuzzy min-max neural network with new learning procedure for pattern classification" """ if len(XhT.shape) == 1: XhT = XhT.reshape(1, -1) #initialization yX = XhT.shape[0] predicted_class = np.full(yX, None) misclass = np.zeros(yX) mem = np.zeros((yX, V.shape[0])) # classifications for i in range(yX): mem[i, :] = simpsonMembership( XhT[i, :], V, W, gama) # calculate memberships for all hyperboxes bmax = mem[i, :].max() # get max membership value maxVind = np.nonzero(mem[i, :] == bmax)[ 0] # get indexes of all hyperboxes with max membership if len(np.unique(classId[maxVind])) > 1: misclass[i] = True else: misclass[i] = ~(np.any(classId[maxVind] == patClassIdTest[i])) if len(np.unique(classId[maxVind])) > 1: #print("Using Manhattan function") XgT_mat = np.ones((len(maxVind), 1)) * XhT[i] # compute the distance from XgT_mat to all average points of all hyperboxes with the same membership value dist = rfmm_distance(XgT_mat, V[maxVind], W[maxVind]) id_min_dist = dist.argmin() predicted_class[i] = classId[maxVind[id_min_dist]] if classId[maxVind[id_min_dist]] == patClassIdTest[i]: misclass[i] = False else: misclass[i] = True else: predicted_class[i] = classId[maxVind[0]] if classId[maxVind[0]] == patClassIdTest[i]: misclass[i] = False else: misclass[i] = True # results summis = np.sum(misclass).astype(np.int64) result = Bunch(summis=summis, misclass=misclass, predicted_class=predicted_class) return result
def predict(V, W, classId, XhT, patClassIdTest, gama=1, is_using_manhattan=True): """ FMNN classifier (test routine) result = predict(V,W,classId,XhT,patClassIdTest,gama) INPUT V Tested model hyperbox lower bounds W Tested model hyperbox upper bounds classId Input data (hyperbox) class labels (crisp) XhT Test input data (rows = objects, columns = features) patClassIdTest Test data class labels (crisp) gama Membership function slope (default: 1) OUTPUT result A object with Bunch datatype containing all results as follows: + summis Number of misclassified objects + misclass Binary error map + sumamb Number of objects with maximum membership in more than one class + out Soft class memberships + mem Hyperbox memberships """ if len(XhT.shape) == 1: XhT = XhT.reshape(1, -1) #initialization yX = XhT.shape[0] predicted_class = np.full(yX, None) misclass = np.zeros(yX) mem = np.zeros((yX, V.shape[0])) # classifications for i in range(yX): mem[i, :] = simpsonMembership( XhT[i, :], V, W, gama) # calculate memberships for all hyperboxes bmax = mem[i, :].max() # get max membership value maxVind = np.nonzero(mem[i, :] == bmax)[ 0] # get indexes of all hyperboxes with max membership winner_cls = np.unique(classId[maxVind]) if len(winner_cls) > 1: if is_using_manhattan == True: #print("Using Manhattan function") XgT_mat = np.ones((len(maxVind), 1)) * XhT[i] # Find all average points of all hyperboxes with the same membership value avg_point_mat = (V[maxVind] + W[maxVind]) / 2 # compute the manhattan distance from XgT_mat to all average points of all hyperboxes with the same membership value maht_dist = manhattan_distance(avg_point_mat, XgT_mat) id_min_dist = maht_dist.argmin() predicted_class[i] = classId[maxVind[id_min_dist]] else: # select random class predicted_class[i] = rd.choice(winner_cls) if predicted_class[i] == patClassIdTest[i]: misclass[i] = False else: misclass[i] = True else: predicted_class[i] = classId[maxVind[0]] if predicted_class[i] == patClassIdTest[i]: misclass[i] = False else: misclass[i] = True # results summis = np.sum(misclass).astype(np.int64) result = Bunch(summis=summis, misclass=misclass, predicted_class=predicted_class) return result
def predict(V, W, classId, XhT, patClassIdTest, gama = 1): """ FMNN classifier (test routine) result = predict(V,W,classId,XhT,patClassIdTest,gama) INPUT V Tested model hyperbox lower bounds W Tested model hyperbox upper bounds classId Input data (hyperbox) class labels (crisp) XhT Test input data (rows = objects, columns = features) patClassIdTest Test data class labels (crisp) gama Membership function slope (default: 1) OUTPUT result A object with Bunch datatype containing all results as follows: + summis Number of misclassified objects + misclass Binary error map + sumamb Number of objects with maximum membership in more than one class + out Soft class memberships + mem Hyperbox memberships """ if len(XhT.shape) == 1: XhT = XhT.reshape(1, -1) #initialization yX = XhT.shape[0] misclass = np.zeros(yX) classes = np.unique(classId) noClasses = classes.size ambiguity = np.zeros(yX) mem = np.zeros((yX, V.shape[0])) out = np.zeros((yX, noClasses)) # classifications for i in range(yX): mem[i, :] = simpsonMembership(XhT[i, :], V, W, gama) # calculate memberships for all hyperboxes bmax = mem[i,:].max() # get max membership value maxVind = np.nonzero(mem[i,:] == bmax)[0] # get indexes of all hyperboxes with max membership for j in range(noClasses): out[i, j] = mem[i, classId == classes[j]].max() # get max memberships for each class ambiguity[i] = np.sum(out[i, :] == bmax) # number of different classes with max membership if bmax == 0: print('zero maximum membership value') # this is probably bad... # misclass[i] = ~(np.any(classId[maxVind] == patClassIdTest[i])) # if len(np.unique(classId[maxVind])) > 1: misclass[i] = True else: misclass[i] = ~(np.any(classId[maxVind] == patClassIdTest[i])) # results sumamb = np.sum(ambiguity > 1) summis = np.sum(misclass).astype(np.int64) result = Bunch(summis = summis, misclass = misclass, sumamb = sumamb, out = out, mem = mem) return result
def fit(self, Xh, patClassId): """ Training the classifier Xh Input data (rows = objects, columns = features) patClassId Input data class labels (crisp). patClassId[i] = 0 corresponds to an unlabeled item """ print('--Online Learning for Simpson' 's FMNN--') if self.isNorm == True: Xh = self.dataPreprocessing(Xh) time_start = time.perf_counter() yX, xX = Xh.shape mark = np.array([ '*', 'o', 'x', '+', '.', ',', 'v', '^', '<', '>', '1', '2', '3', '4', '8', 's', 'p', 'P', 'h', 'H', 'X', 'D', '|', '_' ]) mark_col = np.array(['r', 'g', 'b', 'y', 'c', 'm', 'k']) listLines = list() if self.isDraw: drawing_canvas = self.initializeCanvasGraph( "FMNN - Simpson's fuzzy min-max neural network", xX) if self.V.size > 0: # draw existed hyperboxes color_ = np.array(['k'] * len(self.classId), dtype=object) for c in range(len(self.classId)): if self.classId[c] < len(mark_col): color_[c] = mark_col[self.classId[c]] hyperboxes = drawbox(self.V[:, 0:np.minimum(xX, 3)], self.W[:, 0:np.minimum(xX, 3)], drawing_canvas, color_) listLines.extend(hyperboxes) self.delay() # for each input sample for i in range(yX): classOfX = patClassId[i] # draw input samples if self.isDraw: color_ = 'k' if classOfX < len(mark_col): color_ = mark_col[classOfX] marker_ = 'd' if classOfX < len(mark): marker_ = mark[classOfX] if xX == 2: drawing_canvas.plot(Xh[i, 0], Xh[i, 1], color=color_, marker=marker_) else: drawing_canvas.plot([Xh[i, 0]], [Xh[i, 1]], [Xh[i, 2]], color=color_, marker=marker_) self.delay() if self.V.size == 0: # no model provided - starting from scratch self.V = np.array([Xh[0]]) self.W = np.array([Xh[0]]) self.classId = np.array([patClassId[0]]) if self.isDraw == True: # draw hyperbox box_color = 'k' if patClassId[0] < len(mark_col): box_color = mark_col[patClassId[0]] hyperbox = drawbox( np.asmatrix(self.V[0, 0:np.minimum(xX, 3)]), np.asmatrix(self.W[0, 0:np.minimum(xX, 3)]), drawing_canvas, box_color) listLines.append(hyperbox[0]) self.delay() else: idSameClassOfX = np.nonzero(self.classId == classOfX)[0] # Find all hyperboxes same class with indexOfX V1 = self.V[idSameClassOfX] if len(V1) > 0: W1 = self.W[idSameClassOfX] b = simpsonMembership(Xh[i], V1, W1, self.gamma) max_mem_id = np.argmax(b) # store the index of the winner hyperbox in the list of all hyperboxes of all classes j = idSameClassOfX[max_mem_id] if b[max_mem_id] != 1: adjust = False # test violation of max hyperbox size and class labels if (np.maximum(self.W[j], Xh[i]) - np.minimum( self.V[j], Xh[i])).sum() <= self.teta * xX: # adjust the j-th hyperbox self.V[j] = np.minimum(self.V[j], Xh[i]) self.W[j] = np.maximum(self.W[j], Xh[i]) indOfWinner = j adjust = True if self.isDraw: # Handle drawing graph box_color = 'k' if self.classId[j] < len(mark_col): box_color = mark_col[self.classId[j]] try: listLines[j].remove() except: pass hyperbox = drawbox( np.asmatrix(self.V[j, 0:np.minimum(xX, 3)]), np.asmatrix(self.W[j, 0:np.minimum(xX, 3)]), drawing_canvas, box_color) listLines[j] = hyperbox[0] self.delay() # if i-th sample did not fit into any existing box, create a new one if not adjust: self.V = np.vstack((self.V, Xh[i])) self.W = np.vstack((self.W, Xh[i])) self.classId = np.append(self.classId, classOfX) if self.isDraw: # handle drawing graph box_color = 'k' if self.classId[-1] < len(mark_col): box_color = mark_col[self.classId[-1]] hyperbox = drawbox( np.asmatrix(Xh[i, 0:np.minimum(xX, 3)]), np.asmatrix(Xh[i, 0:np.minimum(xX, 3)]), drawing_canvas, box_color) listLines.append(hyperbox[0]) self.delay() elif self.V.shape[0] > 1: for ii in range(self.V.shape[0]): if ii != indOfWinner: caseDim = hyperboxOverlapTest( self.V, self.W, indOfWinner, ii) # overlap test if caseDim.size > 0 and self.classId[ ii] != self.classId[indOfWinner]: self.V, self.W = hyperboxContraction( self.V, self.W, caseDim, ii, indOfWinner) if self.isDraw: # Handle graph drawing boxii_color = boxwin_color = 'k' if self.classId[ii] < len( mark_col): boxii_color = mark_col[ self.classId[ii]] if self.classId[indOfWinner] < len( mark_col): boxwin_color = mark_col[ self.classId[indOfWinner]] try: listLines[ii].remove() listLines[indOfWinner].remove() except: pass hyperboxes = drawbox( self.V[[ii, indOfWinner], 0:np.minimum(xX, 3)], self.W[[ii, indOfWinner], 0:np.minimum(xX, 3)], drawing_canvas, [boxii_color, boxwin_color]) listLines[ii] = hyperboxes[0] listLines[ indOfWinner] = hyperboxes[1] self.delay() else: # create a new hyperbox self.V = np.vstack((self.V, Xh[i])) self.W = np.vstack((self.W, Xh[i])) self.classId = np.append(self.classId, classOfX) if self.isDraw: # handle drawing graph box_color = 'k' if self.classId[-1] < len(mark_col): box_color = mark_col[self.classId[-1]] hyperbox = drawbox( np.asmatrix(Xh[i, 0:np.minimum(xX, 3)]), np.asmatrix(Xh[i, 0:np.minimum(xX, 3)]), drawing_canvas, box_color) listLines.append(hyperbox[0]) self.delay() time_end = time.perf_counter() self.elapsed_training_time = time_end - time_start return self
def pruning_val(self, XTest, patClassIdTest, accuracy_threshold=0.5): """ pruning handling based on validation (validation routine) with hyperboxes stored in self. V, W, classId result = pruning_val(XlT,XuT,patClassIdTest) INPUT XlT Test data lower bounds (rows = objects, columns = features) XuT Test data upper bounds (rows = objects, columns = features) patClassIdTest Test data class labels (crisp) accuracy_threshold The minimum accuracy for each hyperbox """ #initialization yX = XTest.shape[0] mem = np.zeros((yX, self.V.shape[0])) no_predicted_samples_hyperboxes = np.zeros((len(self.classId), 2)) # classifications for i in range(yX): mem[i, :] = simpsonMembership( XTest[i, :], self.V, self.W, self.gamma) # calculate memberships for all hyperboxes bmax = mem[i, :].max() # get max membership value maxVind = np.nonzero(mem[i, :] == bmax)[ 0] # get indexes of all hyperboxes with max membership if len(maxVind) == 1: # Only one hyperbox with the highest membership function if self.classId[maxVind[0]] == patClassIdTest[i]: no_predicted_samples_hyperboxes[ maxVind[0], 0] = no_predicted_samples_hyperboxes[maxVind[0], 0] + 1 else: no_predicted_samples_hyperboxes[ maxVind[0], 1] = no_predicted_samples_hyperboxes[maxVind[0], 1] + 1 else: # More than one hyperbox with highest membership => random choosing id_min = maxVind[np.random.randint(len(maxVind))] if self.classId[id_min] != patClassIdTest[ i] and patClassIdTest[i] != 0: no_predicted_samples_hyperboxes[ id_min, 1] = no_predicted_samples_hyperboxes[id_min, 1] + 1 else: no_predicted_samples_hyperboxes[ id_min, 0] = no_predicted_samples_hyperboxes[id_min, 0] + 1 # pruning handling based on the validation results tmp_no_box = no_predicted_samples_hyperboxes.shape[0] accuracy_larger_half = np.zeros(tmp_no_box).astype(np.bool) accuracy_larger_half_keep_nojoin = np.zeros(tmp_no_box).astype(np.bool) for i in range(tmp_no_box): if (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1] != 0) and no_predicted_samples_hyperboxes[i, 0] / ( no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1] ) >= accuracy_threshold: accuracy_larger_half[i] = True accuracy_larger_half_keep_nojoin[i] = True if (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1] == 0): accuracy_larger_half_keep_nojoin[i] = True # keep one hyperbox for class prunned all current_classes = np.unique(self.classId) class_tmp = self.classId[accuracy_larger_half] for c in current_classes: if c not in class_tmp: pos = np.nonzero(self.classId == c) id_kept = np.random.randint(len(pos)) # keep pos[id_kept] accuracy_larger_half[pos[id_kept]] = True # Pruning V_prun_remove = self.V[accuracy_larger_half] W_prun_remove = self.W[accuracy_larger_half] classId_prun_remove = self.classId[accuracy_larger_half] W_prun_keep = self.W[accuracy_larger_half_keep_nojoin] V_prun_keep = self.V[accuracy_larger_half_keep_nojoin] classId_prun_keep = self.classId[accuracy_larger_half_keep_nojoin] result_prun_remove = predict(V_prun_remove, W_prun_remove, classId_prun_remove, XTest, patClassIdTest, self.gamma) result_prun_keep_nojoin = predict(V_prun_keep, W_prun_keep, classId_prun_keep, XTest, patClassIdTest, self.gamma) if (result_prun_remove.summis <= result_prun_keep_nojoin.summis): self.V = V_prun_remove self.W = W_prun_remove self.classId = classId_prun_remove else: self.V = V_prun_keep self.W = W_prun_keep self.classId = classId_prun_keep
def fit(self, Xh, patClassId): """ Training the classifier Xh Input data (rows = objects, columns = features) patClassId Input data class labels (crisp). patClassId[i] = 0 corresponds to an unlabeled item """ if self.isNorm == True: Xh = self.dataPreprocessing(Xh) time_start = time.clock() yX, xX = Xh.shape # for each input sample for i in range(yX): classOfX = patClassId[i] if self.V.size == 0: # no model provided - starting from scratch self.V = np.array([Xh[0]]) self.W = np.array([Xh[0]]) self.classId = np.array([patClassId[0]]) else: idSameClassOfX = np.nonzero(self.classId == classOfX)[0] idDifClassOfX = np.nonzero(self.classId != classOfX)[0] # Find all hyperboxes same class with indexOfX V_same = self.V[idSameClassOfX] V_dif = self.V[idDifClassOfX] W_dif = self.W[idDifClassOfX] isCreateNewBox = False if len(V_same) > 0: W_same = self.W[idSameClassOfX] b = simpsonMembership(Xh[i], V_same, W_same, self.gamma) max_mem_id = np.argmax(b) # store the index of the winner hyperbox in the list of all hyperboxes of all classes j = idSameClassOfX[max_mem_id] if b[max_mem_id] != 1: adjust = False # test violation of max hyperbox size and class labels V_cmp = np.minimum(self.V[j], Xh[i]) W_cmp = np.maximum(self.W[j], Xh[i]) if ((W_cmp - V_cmp) <= self.teta).all() == True: if is_overlap_general_formulas( V_dif, W_dif, V_cmp, W_cmp, False) == False: # adjust the j-th hyperbox self.V[j] = V_cmp self.W[j] = W_cmp adjust = True # if i-th sample did not fit into any existing box, create a new one if not adjust: self.V = np.vstack((self.V, Xh[i])) self.W = np.vstack((self.W, Xh[i])) self.classId = np.append(self.classId, classOfX) isCreateNewBox = True else: # create a new hyperbox self.V = np.vstack((self.V, Xh[i])) self.W = np.vstack((self.W, Xh[i])) self.classId = np.append(self.classId, classOfX) isCreateNewBox = True if isCreateNewBox == True and len(V_dif) > 0: is_ovl, hyperbox_ids_overlap, min_overlap_dimensions = is_overlap_general_formulas( V_dif, W_dif, self.V[-1], self.W[-1], True) if is_ovl == True: # convert hyperbox_ids_overlap of hyperboxes with other classes to ids of all existing hyperboxes hyperbox_ids_overlap = idDifClassOfX[ hyperbox_ids_overlap] # do contraction for parent hyperboxes with indices contained in hyperbox_ids_overlap self.V, self.W, self.classId = hyperbox_contraction_rfmm( self.V, self.W, self.classId, hyperbox_ids_overlap, -1, min_overlap_dimensions) time_end = time.clock() self.elapsed_training_time = time_end - time_start return self