def calculateProbability(self, idHyperbox, X_l, X_u, memVal): """ Compute the selected probability of current hyperbox INPUT: + idHyperbox Index of the hyperbox being considered + X_l, X_u Lower and upper bounds of input data OUTPUT: The probability value = the number of samples located in hyperbox / total samples belonging to the hyperbox """ index_Samples = self.cardin[idHyperbox] num_in = num_out = 0 for i in index_Samples: b = memberG(X_l[i], X_u[i], self.V[idHyperbox], self.W[idHyperbox], self.gamma) if b[0] == 1: num_in = num_in + 1 # Increate the number of samples located within the current hyperbox else: num_out = num_out + 1 if num_in + num_out == 0: prob = 1 else: # prob = (3 * (num_in / (num_in + num_out)) + memVal) / 4 prob = num_in / (num_in + num_out) return prob
def predict(V, W, classId, XlT, XuT, patClassIdTest, gama = 1, oper = 'min'): """ GFMM classifier (test routine) result = predict(V,W,classId,XlT,XuT,patClassIdTest,gama,oper) INPUT V Tested model hyperbox lower bounds W Tested model hyperbox upper bounds classId Input data (hyperbox) class labels (crisp) XlT Test data lower bounds (rows = objects, columns = features) XuT Test data upper bounds (rows = objects, columns = features) patClassIdTest Test data class labels (crisp) gama Membership function slope (default: 1) oper Membership calculation operation: 'min' or 'prod' (default: 'min') OUTPUT result A object with Bunch datatype containing all results as follows: + summis Number of misclassified objects + misclass Binary error map + sumamb Number of objects with maximum membership in more than one class + out Soft class memberships + mem Hyperbox memberships """ if len(XlT.shape) == 1: XlT = XlT.reshape(1, -1) if len(XuT.shape) == 1: XuT = XuT.reshape(1, -1) #initialization yX = XlT.shape[0] misclass = np.zeros(yX) # classifications for i in range(yX): mem = memberG(XlT[i, :], XuT[i, :], V, W, gama, oper) # calculate memberships for all hyperboxes bmax = mem.max() # get max membership value maxVind = np.nonzero(mem == bmax)[0] # get indexes of all hyperboxes with max membership if bmax == 0: print('zero maximum membership value') # this is probably bad... misclass[i] = True else: if len(np.unique(classId[maxVind])) > 1: #print('Input is in the boundary') misclass[i] = True else: if np.any(classId[maxVind] == patClassIdTest[i]) == True or patClassIdTest[i] == UNLABELED_CLASS: misclass[i] = False else: misclass[i] = True #misclass[i] = ~(np.any(classId[maxVind] == patClassIdTest[i]) | (patClassIdTest[i] == 0)) # results summis = np.sum(misclass).astype(np.int64) result = Bunch(summis = summis, misclass = misclass) return result
def predict(V, W, classId, XlT, XuT, patClassIdTest, gama = 1, oper = 'min'): """ GFMM classifier (test routine) result = predict(V,W,classId,XlT,XuT,patClassIdTest,gama,oper) INPUT V Tested model hyperbox lower bounds W Tested model hyperbox upper bounds classId Input data (hyperbox) class labels (crisp) XlT Test data lower bounds (rows = objects, columns = features) XuT Test data upper bounds (rows = objects, columns = features) patClassIdTest Test data class labels (crisp) gama Membership function slope (default: 1) oper Membership calculation operation: 'min' or 'prod' (default: 'min') OUTPUT result A object with Bunch datatype containing all results as follows: + summis Number of misclassified objects + misclass Binary error map + predicted_class Predicted class """ if len(XlT.shape) == 1: XlT = XlT.reshape(1, -1) if len(XuT.shape) == 1: XuT = XuT.reshape(1, -1) #initialization yX = XlT.shape[0] misclass = np.zeros(yX) predicted_class = np.full(yX, None) # classifications for i in range(yX): mem = memberG(XlT[i, :], XuT[i, :], V, W, gama, oper) # calculate memberships for all hyperboxes bmax = mem.max() # get max membership value maxVind = np.nonzero(mem == bmax)[0] # get indexes of all hyperboxes with max membership winner_cls = np.unique(classId[maxVind]) if len(winner_cls) > 1: #print('Input is in the boundary') # make random selection predicted_class[i] = random.choice(winner_cls) else: predicted_class[i] = classId[maxVind[0]] if predicted_class[i] == patClassIdTest[i] or patClassIdTest[i] == UNLABELED_CLASS: misclass[i] = False else: misclass[i] = True # results summis = np.sum(misclass).astype(np.int64) result = Bunch(summis = summis, misclass = misclass, predicted_class=predicted_class) return result
def pruning_val(self, XlT, XuT, patClassIdTest, accuracy_threshold = 0.5, newVerPredict = True): """ pruning handling based on validation (validation routine) with hyperboxes stored in self. V, W, classId result = pruning_val(XlT,XuT,patClassIdTest) INPUT XlT Test data lower bounds (rows = objects, columns = features) XuT Test data upper bounds (rows = objects, columns = features) patClassIdTest Test data class labels (crisp) accuracy_threshold The minimum accuracy for each hyperbox newVerPredict + True: using Manhattan distance in addition to fuzzy membership + False: No using Manhattan distance """ #initialization yX = XlT.shape[0] mem = np.zeros((yX, self.V.shape[0])) no_predicted_samples_hyperboxes = np.zeros((len(self.classId), 2)) # classifications for i in range(yX): mem[i, :] = memberG(XlT[i, :], XuT[i, :], self.V, self.W, self.gamma, self.oper) # calculate memberships for all hyperboxes bmax = mem[i,:].max() # get max membership value maxVind = np.nonzero(mem[i,:] == bmax)[0] # get indexes of all hyperboxes with max membership if len(maxVind) == 1: # Only one hyperbox with the highest membership function if self.classId[maxVind[0]] == patClassIdTest[i]: no_predicted_samples_hyperboxes[maxVind[0], 0] = no_predicted_samples_hyperboxes[maxVind[0], 0] + 1 else: no_predicted_samples_hyperboxes[maxVind[0], 1] = no_predicted_samples_hyperboxes[maxVind[0], 1] + 1 else: # More than one hyperbox with highest membership => random choosing id_min = maxVind[np.random.randint(len(maxVind))] if self.classId[id_min] != patClassIdTest[i] and patClassIdTest[i] != 0: no_predicted_samples_hyperboxes[id_min, 1] = no_predicted_samples_hyperboxes[id_min, 1] + 1 else: no_predicted_samples_hyperboxes[id_min, 0] = no_predicted_samples_hyperboxes[id_min, 0] + 1 # pruning handling based on the validation results tmp_no_box = no_predicted_samples_hyperboxes.shape[0] accuracy_larger_half = np.zeros(tmp_no_box).astype(np.bool) accuracy_larger_half_keep_nojoin = np.zeros(tmp_no_box).astype(np.bool) for i in range(tmp_no_box): if (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1] != 0) and no_predicted_samples_hyperboxes[i, 0] / (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1]) >= accuracy_threshold: accuracy_larger_half[i] = True accuracy_larger_half_keep_nojoin[i] = True if (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1] == 0): accuracy_larger_half_keep_nojoin[i] = True # keep one hyperbox for class prunned all current_classes = np.unique(self.classId) class_tmp = self.classId[accuracy_larger_half] class_tmp_keep = self.classId[accuracy_larger_half_keep_nojoin]
def removeContainedHyperboxes(self): """ Remove all hyperboxes contained in other hyperboxes """ numBoxes = len(self.classId) indtokeep = np.ones(numBoxes, dtype=np.bool) for i in range(numBoxes): memValue = memberG(self.V[i], self.W[i], self.V, self.W, self.gamma, self.oper) isInclude = (self.classId[memValue == 1] == self.classId[i]).all() # memValue always has one value being 1 because of self-containing if np.sum(memValue == 1) > 1 and isInclude == True: indtokeep[i] = False self.V = self.V[indtokeep, :] self.W = self.W[indtokeep, :] self.classId = self.classId[indtokeep]
def fit(self, X_l, X_u, patClassId, num_pat=None): """ Training the classifier Xl Input data lower bounds (rows = objects, columns = features) Xu Input data upper bounds (rows = objects, columns = features) patClassId Input data class labels (crisp). patClassId[i] = 0 corresponds to an unlabeled item num_pat Save the number of samples in hyperboxes [X_l, X_u] """ #print('--Online Learning--') if self.isNorm == True: X_l, X_u = self.dataPreprocessing(X_l, X_u) #X_l = X_l.astype(np.float32) #X_u = X_u.astype(np.float32) time_start = time.perf_counter() yX, xX = X_l.shape teta = self.teta mark = np.array(['*', 'o', 'x', '+', '.', ',', 'v', '^', '<', '>', '1', '2', '3', '4', '8', 's', 'p', 'P', 'h', 'H', 'X', 'D', '|', '_']) mark_col = np.array(['r', 'g', 'b', 'y', 'c', 'm', 'k']) listLines = list() listInputSamplePoints = list() if self.isDraw: drawing_canvas = self.initializeCanvasGraph("GFMM - Online learning", xX) if self.V.size > 0: # draw existed hyperboxes color_ = np.array(['k'] * len(self.classId), dtype = object) for c in range(len(self.classId)): if self.classId[c] < len(mark_col): color_[c] = mark_col[self.classId[c]] hyperboxes = drawbox(self.V[:, 0:np.minimum(xX,3)], self.W[:, 0:np.minimum(xX,3)], drawing_canvas, color_) listLines.extend(hyperboxes) self.delay() self.misclass = 1 threshold = 0 # No using lemma for branch and bound # for each input sample for i in range(yX): classOfX = patClassId[i] # draw input samples if self.isDraw: if i == 0 and len(listInputSamplePoints) > 0: # reset input point drawing for point in listInputSamplePoints: point.remove() listInputSamplePoints.clear() color_ = 'k' if classOfX < len(mark_col): color_ = mark_col[classOfX] if (X_l[i, :] == X_u[i, :]).all(): marker_ = 'd' if classOfX < len(mark): marker_ = mark[classOfX] if xX == 2: inputPoint = drawing_canvas.plot(X_l[i, 0], X_l[i, 1], color = color_, marker=marker_) else: inputPoint = drawing_canvas.plot([X_l[i, 0]], [X_l[i, 1]], [X_l[i, 2]], color = color_, marker=marker_) #listInputSamplePoints.append(inputPoint) else: inputPoint = drawbox(np.asmatrix(X_l[i, 0:np.minimum(xX, 3)]), np.asmatrix(X_u[i, 0:np.minimum(xX, 3)]), drawing_canvas, color_) listInputSamplePoints.append(inputPoint[0]) self.delay() if self.V.size == 0: # no model provided - starting from scratch self.V = np.array([X_l[0]]) self.W = np.array([X_u[0]]) self.classId = np.array([patClassId[0]]) if num_pat is None: self.counter = np.array([1]) # save number of samples of each hyperbox else: self.counter = np.array([num_pat[0]]) if self.isDraw == True: # draw hyperbox box_color = 'k' if patClassId[0] < len(mark_col): box_color = mark_col[patClassId[0]] hyperbox = drawbox(np.asmatrix(self.V[0, 0:np.minimum(xX,3)]), np.asmatrix(self.W[0, 0:np.minimum(xX,3)]), drawing_canvas, box_color) listLines.append(hyperbox[0]) self.delay() else: id_lb_sameX = np.nonzero(((self.classId == classOfX) | (self.classId == UNLABELED_CLASS)))[0] V_sameX = self.V[id_lb_sameX] if len(V_sameX) > 0: # if we have small number of hyperboxes with low dimension, this operation takes more time compared to computing membership value with all hyperboxes and ignore # hyperboxes with different class (the membership computation on small dimensionality is so rapidly). However, if we have hyperboxes with high dimensionality, the membership computing on all hyperboxes take so long => reduced to only hyperboxes with the # same class will significantly decrease the running time W_sameX = self.W[id_lb_sameX] lb_sameX = self.classId[id_lb_sameX] b = memberG(X_l[i], X_u[i], V_sameX, W_sameX, self.gamma) index = np.argsort(b)[::-1] if b[index[0]] != 1 or (classOfX != lb_sameX[index[0]] and classOfX != UNLABELED_CLASS): adjust = False id_lb_diff = ((self.classId != classOfX) | (self.classId == UNLABELED_CLASS)) V_diff = self.V[id_lb_diff] W_diff = self.W[id_lb_diff] indcomp = np.nonzero((W_diff >= V_diff).all(axis = 1))[0] # examine only hyperboxes w/o missing dimensions, meaning that in each dimension upper bound is larger than lowerbound no_check_overlap = False if len(indcomp) == 0 or len(V_diff) == 0: no_check_overlap = True else: V_diff = V_diff_save = V_diff[indcomp] W_diff = W_diff_save = W_diff[indcomp] for j in id_lb_sameX[index]: minV_new = np.minimum(self.V[j], X_l[i]) maxW_new = np.maximum(self.W[j], X_u[i]) # test violation of max hyperbox size and class labels if ((maxW_new - minV_new) <= teta).all() == True: if no_check_overlap == False and classOfX == UNLABELED_CLASS and self.classId[j] == UNLABELED_CLASS: # remove hyperbox themself keep_id = (V_diff != self.V[j]).any(1) V_diff = V_diff[keep_id] W_diff = W_diff[keep_id] # Test overlap if no_check_overlap == True or directedIsOverlap(V_diff, W_diff, minV_new, maxW_new) == False: # overlap test # adjust the j-th hyperbox self.V[j] = minV_new self.W[j] = maxW_new if num_pat is None: self.counter[j] = self.counter[j] + 1 else: self.counter[j] = self.counter[j] + num_pat[i] if classOfX != UNLABELED_CLASS and self.classId[j] == UNLABELED_CLASS: self.classId[j] = classOfX if self.isDraw: # Handle drawing graph box_color = 'k' if self.classId[j] < len(mark_col): box_color = mark_col[self.classId[j]] try: listLines[j].remove() except: pass hyperbox = drawbox(np.asmatrix(self.V[j, 0:np.minimum(xX, 3)]), np.asmatrix(self.W[j, 0:np.minimum(xX, 3)]), drawing_canvas, box_color) listLines[j] = hyperbox[0] self.delay() adjust = True break else: if no_check_overlap == False and classOfX == UNLABELED_CLASS and self.classId[j] == UNLABELED_CLASS: V_diff = V_diff_save W_diff = W_diff_save # if i-th sample did not fit into any existing box, create a new one if not adjust: self.V = np.concatenate((self.V, X_l[i].reshape(1, -1)), axis = 0) self.W = np.concatenate((self.W, X_u[i].reshape(1, -1)), axis = 0) self.classId = np.concatenate((self.classId, [classOfX])) if num_pat is None: self.counter = np.concatenate((self.counter, [1])) else: self.counter = np.concatenate((self.counter, [num_pat[i]])) if self.isDraw: # handle drawing graph box_color = 'k' if self.classId[-1] < len(mark_col): box_color = mark_col[self.classId[-1]] hyperbox = drawbox(np.asmatrix(X_l[i, 0:np.minimum(xX, 3)]), np.asmatrix(X_u[i, 0:np.minimum(xX, 3)]), drawing_canvas, box_color) listLines.append(hyperbox[0]) self.delay() else: t = 0 while (t + 1 < len(index)) and (b[index[t]] == 1) and (self.classId[id_lb_sameX[index[t]]] != classOfX): t = t + 1 if b[index[t]] == 1 and self.classId[id_lb_sameX[index[t]]] == classOfX: if num_pat is None: self.counter[id_lb_sameX[index[t]]] = self.counter[id_lb_sameX[index[t]]] + 1 else: self.counter[id_lb_sameX[index[t]]] = self.counter[id_lb_sameX[index[t]]] + num_pat[i] else: self.V = np.concatenate((self.V, X_l[i].reshape(1, -1)), axis = 0) self.W = np.concatenate((self.W, X_u[i].reshape(1, -1)), axis = 0) self.classId = np.concatenate((self.classId, [classOfX])) if num_pat is None: self.counter = np.concatenate((self.counter, [1])) else: self.counter = np.concatenate((self.counter, [num_pat[i]])) if self.isDraw: # handle drawing graph box_color = 'k' if self.classId[-1] < len(mark_col): box_color = mark_col[self.classId[-1]] hyperbox = drawbox(np.asmatrix(X_l[i, 0:np.minimum(xX, 3)]), np.asmatrix(X_u[i, 0:np.minimum(xX, 3)]), drawing_canvas, box_color) listLines.append(hyperbox[0]) self.delay()
def pruning_val(self, XlT, XuT, patClassIdTest, accuracy_threshold = 0.5, newVerPredict = True): """ pruning handling based on validation (validation routine) with hyperboxes stored in self. V, W, classId result = pruning_val(XlT,XuT,patClassIdTest) INPUT XlT Test data lower bounds (rows = objects, columns = features) XuT Test data upper bounds (rows = objects, columns = features) patClassIdTest Test data class labels (crisp) accuracy_threshold The minimum accuracy for each hyperbox newVerPredict + True: using probability formula for prediction in addition to fuzzy membership + False: No using probability formula for prediction """ #initialization yX = XlT.shape[0] no_predicted_samples_hyperboxes = np.zeros((len(self.classId), 2)) # classifications for i in range(yX): mem = memberG(XlT[i, :], XuT[i, :], self.V, self.W, self.gamma, self.oper) # calculate memberships for all hyperboxes bmax = mem.max() # get max membership value maxVind = np.nonzero(mem == bmax)[0] # get indexes of all hyperboxes with max membership if len(maxVind) == 1: # Only one hyperbox with the highest membership function if self.classId[maxVind[0]] == patClassIdTest[i]: no_predicted_samples_hyperboxes[maxVind[0], 0] = no_predicted_samples_hyperboxes[maxVind[0], 0] + 1 else: no_predicted_samples_hyperboxes[maxVind[0], 1] = no_predicted_samples_hyperboxes[maxVind[0], 1] + 1 else: if newVerPredict == True: cls_same_mem = np.unique(self.classId[maxVind]) if len(cls_same_mem) > 1: is_find_prob_val = True if bmax == 1: id_box_with_one_sample = np.nonzero(self.counter[maxVind] == 1)[0] if len(id_box_with_one_sample) > 0: is_find_prob_val = False id_min = random.choice(maxVind[id_box_with_one_sample]) if is_find_prob_val == True: sum_prod_denum = (mem[maxVind] * self.counter[maxVind]).sum() max_prob = -1 pre_id_cls = None for c in cls_same_mem: id_cls = np.nonzero(self.classId[maxVind] == c)[0] sum_pro_num = (mem[maxVind[id_cls]] * self.counter[maxVind[id_cls]]).sum() tmp = sum_pro_num / sum_prod_denum if tmp > max_prob or (tmp == max_prob and pre_id_cls is not None and self.counter[maxVind[id_cls]].sum() > self.counter[maxVind[pre_id_cls]].sum()): max_prob = tmp pre_id_cls = id_cls id_min = random.choice(maxVind[id_cls]) else: id_min = random.choice(maxVind) else: # More than one hyperbox with highest membership => random choosing id_min = maxVind[np.random.randint(len(maxVind))] if self.classId[id_min] != patClassIdTest[i] and patClassIdTest[i] != UNLABELED_CLASS: no_predicted_samples_hyperboxes[id_min, 1] = no_predicted_samples_hyperboxes[id_min, 1] + 1 else: no_predicted_samples_hyperboxes[id_min, 0] = no_predicted_samples_hyperboxes[id_min, 0] + 1 # pruning handling based on the validation results tmp_no_box = no_predicted_samples_hyperboxes.shape[0] accuracy_larger_half = np.zeros(tmp_no_box).astype(np.bool) accuracy_larger_half_keep_nojoin = np.zeros(tmp_no_box).astype(np.bool) for i in range(tmp_no_box): if (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1] != 0) and no_predicted_samples_hyperboxes[i, 0] / (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1]) >= accuracy_threshold: accuracy_larger_half[i] = True accuracy_larger_half_keep_nojoin[i] = True if (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1] == 0): accuracy_larger_half_keep_nojoin[i] = True # keep one hyperbox for class prunned all current_classes = np.unique(self.classId) class_tmp = self.classId[accuracy_larger_half] class_tmp_keep = self.classId[accuracy_larger_half_keep_nojoin] for c in current_classes: if c not in class_tmp: pos = np.nonzero(self.classId == c) id_kept = np.random.randint(len(pos)) # keep pos[id_kept] accuracy_larger_half[pos[id_kept]] = True if c not in class_tmp_keep: pos = np.nonzero(self.classId == c) id_kept = np.random.randint(len(pos)) accuracy_larger_half_keep_nojoin[pos[id_kept]] = True V_prun_remove = self.V[accuracy_larger_half] W_prun_remove = self.W[accuracy_larger_half] classId_prun_remove = self.classId[accuracy_larger_half] numSample_prun_remove = self.counter[accuracy_larger_half] W_prun_keep = self.W[accuracy_larger_half_keep_nojoin] V_prun_keep = self.V[accuracy_larger_half_keep_nojoin] classId_prun_keep = self.classId[accuracy_larger_half_keep_nojoin] numSample_prun_keep = self.classId[accuracy_larger_half_keep_nojoin] if newVerPredict == True: result_prun_remove = predict_with_probability(V_prun_remove, W_prun_remove, classId_prun_remove, numSample_prun_remove, XlT, XuT, patClassIdTest, self.gamma, self.oper) result_prun_keep_nojoin = predict_with_probability(V_prun_keep, W_prun_keep, classId_prun_keep, numSample_prun_keep, XlT, XuT, patClassIdTest, self.gamma, self.oper) else: result_prun_remove = predict(V_prun_remove, W_prun_remove, classId_prun_remove, XlT, XuT, patClassIdTest, self.gamma, self.oper) result_prun_keep_nojoin = predict(V_prun_keep, W_prun_keep, classId_prun_keep, XlT, XuT, patClassIdTest, self.gamma, self.oper) if (result_prun_remove.summis <= result_prun_keep_nojoin.summis): self.V = V_prun_remove self.W = W_prun_remove self.classId = classId_prun_remove self.counter = numSample_prun_remove else: self.V = V_prun_keep self.W = W_prun_keep self.classId = classId_prun_keep self.counter = numSample_prun_keep
def pruning_val(self, XlT, XuT, patClassIdTest, accuracy_threshold = 0.5): """ pruning handling based on validation (validation routine) with hyperboxes stored in self. V, W, classId result = pruning_val(XlT,XuT,patClassIdTest) INPUT XlT Test data lower bounds (rows = objects, columns = features) XuT Test data upper bounds (rows = objects, columns = features) patClassIdTest Test data class labels (crisp) accuracy_threshold The minimum accuracy for each hyperbox """ #initialization yX = XlT.shape[0] mem = np.zeros((yX, self.V.shape[0])) no_predicted_samples_hyperboxes = np.zeros((len(self.classId), 2)) # classifications for i in range(yX): mem[i, :] = memberG(XlT[i, :], XuT[i, :], self.V, self.W, self.gamma, self.oper) # calculate memberships for all hyperboxes bmax = mem[i,:].max() # get max membership value maxVind = np.nonzero(mem[i,:] == bmax)[0] # get indexes of all hyperboxes with max membership if len(maxVind) == 1: # Only one hyperbox with the highest membership function if self.classId[maxVind[0]] == patClassIdTest[i]: no_predicted_samples_hyperboxes[maxVind[0], 0] = no_predicted_samples_hyperboxes[maxVind[0], 0] + 1 else: no_predicted_samples_hyperboxes[maxVind[0], 1] = no_predicted_samples_hyperboxes[maxVind[0], 1] + 1 else: # More than one hyperbox with highest membership => random choosing id_min = maxVind[np.random.randint(len(maxVind))] if self.classId[id_min] != patClassIdTest[i] and patClassIdTest[i] != 0: no_predicted_samples_hyperboxes[id_min, 1] = no_predicted_samples_hyperboxes[id_min, 1] + 1 else: no_predicted_samples_hyperboxes[id_min, 0] = no_predicted_samples_hyperboxes[id_min, 0] + 1 # pruning handling based on the validation results tmp_no_box = no_predicted_samples_hyperboxes.shape[0] accuracy_larger_half = np.zeros(tmp_no_box).astype(np.bool) accuracy_larger_half_keep_nojoin = np.zeros(tmp_no_box).astype(np.bool) for i in range(tmp_no_box): if (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1] != 0) and no_predicted_samples_hyperboxes[i, 0] / (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1]) >= accuracy_threshold: accuracy_larger_half[i] = True accuracy_larger_half_keep_nojoin[i] = True if (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1] == 0): accuracy_larger_half_keep_nojoin[i] = True # keep one hyperbox for class prunned all current_classes = np.unique(self.classId) class_tmp = self.classId[accuracy_larger_half] for c in current_classes: if c not in class_tmp: pos = np.nonzero(self.classId == c) id_kept = np.random.randint(len(pos)) # keep pos[id_kept] accuracy_larger_half[pos[id_kept]] = True V_prun_remove = self.V[accuracy_larger_half] W_prun_remove = self.W[accuracy_larger_half] classId_prun_remove = self.classId[accuracy_larger_half] W_prun_keep = self.W[accuracy_larger_half_keep_nojoin] V_prun_keep = self.V[accuracy_larger_half_keep_nojoin] classId_prun_keep = self.classId[accuracy_larger_half_keep_nojoin] result_prun_remove = predict(V_prun_remove, W_prun_remove, classId_prun_remove, XlT, XuT, patClassIdTest, self.gamma, self.oper) result_prun_keep_nojoin = predict(V_prun_keep, W_prun_keep, classId_prun_keep, XlT, XuT, patClassIdTest, self.gamma, self.oper) # Pruning if (result_prun_remove.summis <= result_prun_keep_nojoin.summis): self.V = V_prun_remove self.W = W_prun_remove self.classId = classId_prun_remove else: self.V = V_prun_keep self.W = W_prun_keep self.classId = classId_prun_keep
def predictOnlineOfflineCombination(onlClassifier, offClassifier, XlT, XuT, patClassIdTest, gama=1, oper='min'): """ GFMM online-offline classifier (test routine) result = predictOnlineOfflineCombination(onlClassifier, offClassifier, XlT,XuT,patClassIdTest,gama,oper) INPUT onlClassifier online classifier with the following attributes: + V: hyperbox lower bounds + W: hyperbox upper bounds + classId: hyperbox class labels (crisp) offClassifier offline classifier with the following attributes: + V: hyperbox lower bounds + W: hyperbox upper bounds + classId: hyperbox class labels (crisp) XlT Test data lower bounds (rows = objects, columns = features) XuT Test data upper bounds (rows = objects, columns = features) patClassIdTest Test data class labels (crisp) gama Membership function slope (default: 1) oper Membership calculation operation: 'min' or 'prod' (default: 'min') OUTPUT result A object with Bunch datatype containing all results as follows: + summis Number of misclassified objects + misclass Binary error map + out Soft class memberships """ #initialization yX = XlT.shape[0] misclass = np.zeros(yX) classes = np.union1d(onlClassifier.classId, offClassifier.classId) noClasses = classes.size mem_onl = np.zeros((yX, onlClassifier.V.shape[0])) mem_off = np.zeros((yX, offClassifier.V.shape[0])) out = np.zeros((yX, noClasses)) # classifications for i in range(yX): mem_onl[i, :] = memberG( XlT[i, :], XuT[i, :], onlClassifier.V, onlClassifier.W, gama, oper ) # calculate memberships for all hyperboxes in the online classifier bmax_onl = mem_onl[i, :].max( ) # get max membership value among hyperboxes in the online classifier maxVind_onl = np.nonzero( mem_onl[i, :] == bmax_onl )[0] # get indexes of all hyperboxes in the online classifier with max membership mem_off[i, :] = memberG( XlT[i, :], XuT[i, :], offClassifier.V, offClassifier.W, gama, oper ) # calculate memberships for all hyperboxes in the offline classifier bmax_off = mem_off[i, :].max( ) # get max membership value among hyperboxes in the offline classifier maxVind_off = np.nonzero( mem_off[i, :] == bmax_off )[0] # get indexes of all hyperboxes in the offline classifier with max membership for j in range(noClasses): out_onl_mems = mem_onl[i, onlClassifier.classId == classes[ j]] # get max memberships for each class of online classifier if len(out_onl_mems) > 0: out_onl = out_onl_mems.max() else: out_onl = 0 out_off_mems = mem_off[i, offClassifier.classId == classes[ j]] # get max memberships for each class of offline classifier if len(out_off_mems) > 0: out_off = out_off_mems.max() else: out_off = 0 if out_onl > out_off: out[i, j] = out_onl else: out[i, j] = out_off if bmax_onl > bmax_off: misclass[i] = ~(np.any( onlClassifier.classId[maxVind_onl] == patClassIdTest[i]) | (patClassIdTest[i] == 0)) else: misclass[i] = ~(np.any( offClassifier.classId[maxVind_off] == patClassIdTest[i]) | (patClassIdTest[i] == 0)) # results summis = np.sum(misclass).astype(np.int64) result = Bunch(summis=summis, misclass=misclass, out=out) return result
def predict(V, W, classId, XlT, XuT, patClassIdTest, gama=1, oper='min'): """ GFMM classifier (test routine) result = predict(V,W,classId,XlT,XuT,patClassIdTest,gama,oper) INPUT V Tested model hyperbox lower bounds W Tested model hyperbox upper bounds classId Input data (hyperbox) class labels (crisp) XlT Test data lower bounds (rows = objects, columns = features) XuT Test data upper bounds (rows = objects, columns = features) patClassIdTest Test data class labels (crisp) gama Membership function slope (default: 1) oper Membership calculation operation: 'min' or 'prod' (default: 'min') OUTPUT result A object with Bunch datatype containing all results as follows: + summis Number of misclassified objects + misclass Binary error map + sumamb Number of objects with maximum membership in more than one class + out Soft class memberships + mem Hyperbox memberships """ #initialization yX = XlT.shape[0] misclass = np.zeros(yX) classes = np.unique(classId) noClasses = classes.size ambiguity = np.zeros((yX, 1)) mem = np.zeros((yX, V.shape[0])) out = np.zeros((yX, noClasses)) # classifications for i in range(yX): mem[i, :] = memberG(XlT[i, :], XuT[i, :], V, W, gama, oper) # calculate memberships for all hyperboxes bmax = mem[i, :].max() # get max membership value maxVind = np.nonzero(mem[i, :] == bmax)[ 0] # get indexes of all hyperboxes with max membership for j in range(noClasses): out[i, j] = mem[i, classId == classes[j]].max( ) # get max memberships for each class ambiguity[i, :] = np.sum(out[ i, :] == bmax) # number of different classes with max membership if bmax == 0: print('zero maximum membership value') # this is probably bad... misclass[i] = ~(np.any(classId[maxVind] == patClassIdTest[i]) | (patClassIdTest[i] == 0)) # results sumamb = np.sum(ambiguity[:, 0] > 1) summis = np.sum(misclass).astype(np.int64) result = Bunch(summis=summis, misclass=misclass, sumamb=sumamb, out=out, mem=mem) return result
def fit(self, X_l, X_u, patClassId): """ Training the classifier Xl Input data lower bounds (rows = objects, columns = features) Xu Input data upper bounds (rows = objects, columns = features) patClassId Input data class labels (crisp). patClassId[i] = 0 corresponds to an unlabeled item """ print('--Online Learning--') if self.isNorm == True: X_l, X_u = self.dataPreprocessing(X_l, X_u) #X_l = X_l.astype(np.float32) #X_u = X_u.astype(np.float32) time_start = time.perf_counter() yX, xX = X_l.shape teta = self.teta mark = np.array([ '*', 'o', 'x', '+', '.', ',', 'v', '^', '<', '>', '1', '2', '3', '4', '8', 's', 'p', 'P', 'h', 'H', 'X', 'D', '|', '_' ]) mark_col = np.array(['r', 'g', 'b', 'y', 'c', 'm', 'k']) listLines = list() listInputSamplePoints = list() if self.isDraw: drawing_canvas = self.initializeCanvasGraph( "GFMM - Online learning", xX) if self.V.size > 0: # draw existed hyperboxes color_ = np.array(['k'] * len(self.classId), dtype=object) for c in range(len(self.classId)): if self.classId[c] < len(mark_col): color_[c] = mark_col[self.classId[c]] hyperboxes = drawbox(self.V[:, 0:np.minimum(xX, 3)], self.W[:, 0:np.minimum(xX, 3)], drawing_canvas, color_) listLines.extend(hyperboxes) self.delay() self.misclass = 1 while self.misclass > 0 and teta >= self.tMin: # for each input sample for i in range(yX): classOfX = patClassId[i] # draw input samples if self.isDraw: if i == 0 and len(listInputSamplePoints) > 0: # reset input point drawing for point in listInputSamplePoints: point.remove() listInputSamplePoints.clear() color_ = 'k' if classOfX < len(mark_col): color_ = mark_col[classOfX] if (X_l[i, :] == X_u[i, :]).all(): marker_ = 'd' if classOfX < len(mark): marker_ = mark[classOfX] if xX == 2: inputPoint = drawing_canvas.plot(X_l[i, 0], X_l[i, 1], color=color_, marker=marker_) else: inputPoint = drawing_canvas.plot([X_l[i, 0]], [X_l[i, 1]], [X_l[i, 2]], color=color_, marker=marker_) #listInputSamplePoints.append(inputPoint) else: inputPoint = drawbox( np.asmatrix(X_l[i, 0:np.minimum(xX, 3)]), np.asmatrix(X_u[i, 0:np.minimum(xX, 3)]), drawing_canvas, color_) listInputSamplePoints.append(inputPoint[0]) self.delay() if self.V.size == 0: # no model provided - starting from scratch self.V = np.array([X_l[0]]) self.W = np.array([X_u[0]]) self.classId = np.array([patClassId[0]]) if self.isDraw == True: # draw hyperbox box_color = 'k' if patClassId[0] < len(mark_col): box_color = mark_col[patClassId[0]] hyperbox = drawbox( np.asmatrix(self.V[0, 0:np.minimum(xX, 3)]), np.asmatrix(self.W[0, 0:np.minimum(xX, 3)]), drawing_canvas, box_color) listLines.append(hyperbox[0]) self.delay() else: id_lb_sameX = np.logical_or( self.classId == classOfX, self.classId == UNLABELED_CLASS) if id_lb_sameX.any() == True: V_sameX = self.V[id_lb_sameX] W_sameX = self.W[id_lb_sameX] lb_sameX = self.classId[id_lb_sameX] id_range = np.arange(len(self.classId)) id_processing = id_range[id_lb_sameX] b = memberG(X_l[i], X_u[i], np.minimum(V_sameX, W_sameX), np.maximum(V_sameX, W_sameX), self.gamma) index = np.argsort(b)[::-1] bSort = b[index] if bSort[0] != 1 or (classOfX != lb_sameX[index[0]] and classOfX != UNLABELED_CLASS): adjust = False for j in id_processing[index]: # test violation of max hyperbox size and class labels if (classOfX == self.classId[j] or self.classId[j] == UNLABELED_CLASS or classOfX == UNLABELED_CLASS) and ( (np.maximum(self.W[j], X_u[i]) - np.minimum(self.V[j], X_l[i])) <= teta).all() == True: # adjust the j-th hyperbox self.V[j] = np.minimum(self.V[j], X_l[i]) self.W[j] = np.maximum(self.W[j], X_u[i]) indOfWinner = j adjust = True if classOfX != UNLABELED_CLASS and self.classId[ j] == UNLABELED_CLASS: self.classId[j] = classOfX if self.isDraw: # Handle drawing graph box_color = 'k' if self.classId[j] < len(mark_col): box_color = mark_col[ self.classId[j]] try: listLines[j].remove() except: pass hyperbox = drawbox( np.asmatrix( self.V[j, 0:np.minimum(xX, 3)]), np.asmatrix( self.W[j, 0:np.minimum(xX, 3)]), drawing_canvas, box_color) listLines[j] = hyperbox[0] self.delay() break # if i-th sample did not fit into any existing box, create a new one if not adjust: self.V = np.concatenate( (self.V, X_l[i].reshape(1, -1)), axis=0) self.W = np.concatenate( (self.W, X_u[i].reshape(1, -1)), axis=0) self.classId = np.concatenate( (self.classId, [classOfX])) if self.isDraw: # handle drawing graph box_color = 'k' if self.classId[-1] < len(mark_col): box_color = mark_col[self.classId[-1]] hyperbox = drawbox( np.asmatrix(X_l[i, 0:np.minimum(xX, 3)]), np.asmatrix(X_u[i, 0:np.minimum(xX, 3)]), drawing_canvas, box_color) listLines.append(hyperbox[0]) self.delay() elif self.V.shape[0] > 1: for ii in range(self.V.shape[0]): if ii != indOfWinner and ( self.classId[ii] != self.classId[indOfWinner] or self.classId[indOfWinner] == UNLABELED_CLASS): caseDim = hyperboxOverlapTest( self.V, self.W, indOfWinner, ii) # overlap test if caseDim.size > 0: self.V, self.W = hyperboxContraction( self.V, self.W, caseDim, ii, indOfWinner) if self.isDraw: # Handle graph drawing boxii_color = boxwin_color = 'k' if self.classId[ii] < len( mark_col): boxii_color = mark_col[ self.classId[ii]] if self.classId[ indOfWinner] < len( mark_col): boxwin_color = mark_col[ self. classId[indOfWinner]] try: listLines[ii].remove() listLines[ indOfWinner].remove() except: pass hyperboxes = drawbox( self.V[ [ii, indOfWinner], 0:np.minimum(xX, 3)], self.W[ [ii, indOfWinner], 0:np.minimum(xX, 3)], drawing_canvas, [ boxii_color, boxwin_color ]) listLines[ii] = hyperboxes[0] listLines[ indOfWinner] = hyperboxes[ 1] self.delay() else: self.V = np.concatenate( (self.V, X_l[i].reshape(1, -1)), axis=0) self.W = np.concatenate( (self.W, X_u[i].reshape(1, -1)), axis=0) self.classId = np.concatenate( (self.classId, [classOfX])) if self.isDraw: # handle drawing graph box_color = 'k' if self.classId[-1] < len(mark_col): box_color = mark_col[self.classId[-1]] hyperbox = drawbox( np.asmatrix(X_l[i, 0:np.minimum(xX, 3)]), np.asmatrix(X_u[i, 0:np.minimum(xX, 3)]), drawing_canvas, box_color) listLines.append(hyperbox[0]) self.delay() teta = teta * 0.9 if teta >= self.tMin: result = predict(self.V, self.W, self.classId, X_l, X_u, patClassId, self.gamma, self.oper) self.misclass = result.summis # Draw last result # if self.isDraw == True: # # Handle drawing graph # drawing_canvas.cla() # color_ = np.empty(len(self.classId), dtype = object) # for c in range(len(self.classId)): # color_[c] = mark_col[self.classId[c]] # # drawbox(self.V[:, 0:np.minimum(xX, 3)], self.W[:, 0:np.minimum(xX, 3)], drawing_canvas, color_) # self.delay() # # if self.isDraw: # plt.show() time_end = time.perf_counter() self.elapsed_training_time = time_end - time_start return self
def predict_with_manhattan(V, W, classId, XlT, XuT, patClassIdTest, gama=1, oper='min'): """ GFMM classifier (test routine): Using Manhattan distance in the case of many hyperboxes with different classes having the same maximum membership value result = predict(V,W,classId,XlT,XuT,patClassIdTest,gama,oper) INPUT V Tested model hyperbox lower bounds W Tested model hyperbox upper bounds classId Input data (hyperbox) class labels (crisp) XlT Test data lower bounds (rows = objects, columns = features) XuT Test data upper bounds (rows = objects, columns = features) patClassIdTest Test data class labels (crisp) gama Membership function slope (default: 1) oper Membership calculation operation: 'min' or 'prod' (default: 'min') OUTPUT result A object with Bunch datatype containing all results as follows: + summis Number of misclassified objects + misclass Binary error map + numSampleInBoundary The number of samples in decision boundary + predicted_class Predicted class """ if len(XlT.shape) == 1: XlT = XlT.reshape(1, -1) if len(XuT.shape) == 1: XuT = XuT.reshape(1, -1) #initialization yX = XlT.shape[0] misclass = np.zeros(yX) mem_vals = np.zeros(yX) numPointInBoundary = 0 predicted_class = np.full(yX, None) # classifications for i in range(yX): if patClassIdTest[i] == UNLABELED_CLASS: misclass[i] = False else: mem = memberG(XlT[i, :], XuT[i, :], V, W, gama, oper) # calculate memberships for all hyperboxes bmax = mem.max() # get max membership value maxVind = np.nonzero(mem == bmax)[ 0] # get indexes of all hyperboxes with max membership mem_vals[i] = bmax # if bmax == 0: # predicted_class[i] = classId[maxVind[0]] # if predicted_class[i] == patClassIdTest[i]: # misclass[i] = False # else: # misclass[i] = True # else: if len(np.unique(classId[maxVind])) > 1: numPointInBoundary = numPointInBoundary + 1 #print("Using Manhattan function") if (XlT[i] == XuT[i]).all() == False: XlT_mat = np.ones((len(maxVind), 1)) * XlT[i] XuT_mat = np.ones((len(maxVind), 1)) * XuT[i] XgT_mat = (XlT_mat + XuT_mat) / 2 else: XgT_mat = np.ones((len(maxVind), 1)) * XlT[i] # Find all average points of all hyperboxes with the same membership value avg_point_mat = (V[maxVind] + W[maxVind]) / 2 # compute the manhattan distance from XgT_mat to all average points of all hyperboxes with the same membership value maht_dist = manhattan_distance(avg_point_mat, XgT_mat) #maht_dist = min_distance(avg_point_mat, XgT_mat) id_min_dist = maht_dist.argmin() predicted_class[i] = classId[maxVind[id_min_dist]] if classId[maxVind[id_min_dist]] == patClassIdTest[i]: misclass[i] = False else: misclass[i] = True else: predicted_class[i] = classId[maxVind[0]] if classId[maxVind[0]] == patClassIdTest[i]: misclass[i] = False else: misclass[i] = True #misclass[i] = ~(np.any(classId[maxVind] == patClassIdTest[i]) | (patClassIdTest[i] == 0)) # results summis = np.sum(misclass).astype(np.int64) result = Bunch(summis=summis, misclass=misclass, numSampleInBoundary=numPointInBoundary, predicted_class=predicted_class, mem_vals=mem_vals) return result
def fit(self, X_l, X_u, patClassId): """ X_l Input data lower bounds (rows = objects, columns = features) X_u Input data upper bounds (rows = objects, columns = features) patClassId Input data class labels (crisp) """ if self.isNorm == True: X_l, X_u = self.dataPreprocessing(X_l, X_u) time_start = time.perf_counter() self.V = X_l self.W = X_u self.classId = patClassId yX, xX = X_l.shape if len(self.cardin) == 0 or len(self.clusters) == 0: self.cardin = np.ones(yX) self.clusters = np.empty(yX, dtype=object) for i in range(yX): self.clusters[i] = np.array([i], dtype = np.int32) if self.isDraw: mark_col = np.array(['r', 'g', 'b', 'y', 'c', 'm', 'k']) drawing_canvas = self.initializeCanvasGraph("GFMM - AGGLO-SM-Slow version", xX) self.delay() # plot initial hyperbox Vt, Wt = self.pcatransform() color_ = np.empty(len(self.classId), dtype = object) for c in range(len(self.classId)): color_[c] = mark_col[self.classId[c]] drawbox(Vt, Wt, drawing_canvas, color_) self.delay() # calculate all pairwise memberships b = np.zeros(shape = (yX, yX)) if self.simil == 'short': for j in range(yX): b[j, :] = memberG(self.W[j], self.V[j], self.V, self.W, self.gamma, self.oper) elif self.simil == 'long': for j in range(yX): b[j, :] = memberG(self.V[j], self.W[j], self.W, self.V, self.gamma, self.oper) else: for j in range(yX): b[j, :] = memberG(self.V[j], self.W[j], self.V, self.W, self.gamma, self.oper) maxb = self.splitSimilarityMaxtrix(b, self.sing) # get a sorted similarity (membership) list if len(maxb) > 0: maxb = maxb[maxb[:, 2] >= self.bthres, :] # scrap memberhsip values below threshold # training isTraining = True while isTraining: isTraining = False i = 0 while i < maxb.shape[0]: # if maxb(i, 0)-th and maxb(i, 1)-th come from the same class, try to join them if self.classId[int(maxb[i, 0])] == self.classId[int(maxb[i, 1])]: # calculate new coordinates of maxb(i,0)-th hyperbox by including maxb(i,1)-th box, scrap the latter and leave the rest intact # agglomorate maxb(i, 0) and maxb(i, 1) by adjust maxb(i, 0), remove maxb(i, 1) by get newV from 1:maxb(i, 0) - 1, new coordinates for maxb(i, 0), maxb(i, 0) + 1:maxb(i, 1) - 1, maxb(i, 1) + 1:end newV = np.concatenate((self.V[:int(maxb[i, 0])], np.minimum(self.V[int(maxb[i, 0])], self.V[int(maxb[i, 1])]).reshape(1, -1), self.V[int(maxb[i, 0]) + 1:int(maxb[i, 1])], self.V[int(maxb[i, 1]) + 1:]), axis=0) newW = np.concatenate((self.W[:int(maxb[i, 0])], np.maximum(self.W[int(maxb[i, 0])], self.W[int(maxb[i, 1])]).reshape(1, -1), self.W[int(maxb[i, 0]) + 1:int(maxb[i, 1])], self.W[int(maxb[i, 1]) + 1:]), axis=0) newClassId = np.concatenate((self.classId[:int(maxb[i, 1])], self.classId[int(maxb[i, 1]) + 1:])) # adjust the hyperbox if no overlap and maximum hyperbox size is not violated if (not isOverlap(newV, newW, int(maxb[i, 0]), newClassId)) and (((newW[int(maxb[i, 0])] - newV[int(maxb[i, 0])]) <= self.teta).all() == True): isTraining = True self.V = newV self.W = newW self.classId = newClassId self.cardin[int(maxb[i, 0])] = self.cardin[int(maxb[i, 0])] + self.cardin[int(maxb[i, 1])] self.cardin = np.append(self.cardin[0:int(maxb[i, 1])], self.cardin[int(maxb[i, 1]) + 1:]) self.clusters[int(maxb[i, 0])] = np.append(self.clusters[int(maxb[i, 0])], self.clusters[int(maxb[i, 1])]) self.clusters = np.append(self.clusters[0:int(maxb[i, 1])], self.clusters[int(maxb[i, 1]) + 1:]) # recalculate all pairwise memberships yX, xX = self.V.shape b = np.zeros(shape = (yX, yX)) if self.simil == 'short': for j in range(yX): b[j, :] = memberG(self.W[j], self.V[j], self.V, self.W, self.gamma, self.oper) elif self.simil == 'long': for j in range(yX): b[j, :] = memberG(self.V[j], self.W[j], self.W, self.V, self.gamma, self.oper) else: for j in range(yX): b[j, :] = memberG(self.V[j], self.W[j], self.V, self.W, self.gamma, self.oper) if self.V.shape[0] == 1: maxb = np.array([]) else: maxb = self.splitSimilarityMaxtrix(b, self.sing) # get a sorted similarity (membership) list if len(maxb) > 0: maxb = maxb[maxb[:, 2] >= self.bthres, :] if self.isDraw: Vt, Wt = self.pcatransform() color_ = np.empty(len(self.classId), dtype = object) for c in range(len(self.classId)): color_[c] = mark_col[self.classId[c]] drawing_canvas.cla() drawbox(Vt, Wt, drawing_canvas, color_) self.delay() break i = i + 1 time_end = time.perf_counter() self.elapsed_training_time = time_end - time_start return self
def predict_with_probability_k_voting_new(V, W, classId, weights, XlT, XuT, patClassIdTest, K_threshold=5, gama=1, oper='min'): """ GFMM classifier (test routine): Using K voting of values in weights for K hyperboxes with the highest membership values result = predict(V,W,classId,XlT,XuT,patClassIdTest,gama,oper) INPUT V Tested model hyperbox lower bounds W Tested model hyperbox upper bounds classId Input data (hyperbox) class labels (crisp) numSamples Save number of samples of each corresponding hyperboxes contained in V and W weights The weights of hyperboxes XlT Test data lower bounds (rows = objects, columns = features) XuT Test data upper bounds (rows = objects, columns = features) patClassIdTest Test data class labels (crisp) gama Membership function slope (default: 1) oper Membership calculation operation: 'min' or 'prod' (default: 'min') OUTPUT result A object with Bunch datatype containing all results as follows: + summis Number of misclassified objects + misclass Binary error map + predicted_class Predicted class """ if len(XlT.shape) == 1: XlT = XlT.reshape(1, -1) if len(XuT.shape) == 1: XuT = XuT.reshape(1, -1) #initialization yX = XlT.shape[0] misclass = np.zeros(yX) predicted_class = np.full(yX, None) # classifications for i in range(yX): if patClassIdTest[i] == UNLABELED_CLASS: misclass[i] = False else: mem = memberG(XlT[i, :], XuT[i, :], V, W, gama, oper) # calculate memberships for all hyperboxes mem = mem * weights sort_id_mem = np.argsort(mem)[::-1] selected_id = sort_id_mem[:K_threshold] selected_cls = np.unique(classId[selected_id]) if len(selected_cls) == 1: predicted_class[i] = selected_cls[0] if predicted_class[i] == patClassIdTest[i]: misclass[i] = False else: misclass[i] = True else: # voting based on sum of weights max_prob = -1 max_mem_sum = -1 for c in selected_cls: id_cls = classId[selected_id] == c cur_prob = np.sum(mem[selected_id[id_cls]]) cur_mem = np.max(weights[selected_id[id_cls]]) if max_prob < cur_prob: max_prob = cur_prob predicted_class[i] = c max_mem_sum = cur_mem else: if max_prob == cur_prob and max_mem_sum < cur_mem: max_prob = cur_prob predicted_class[i] = c max_mem_sum = cur_mem if predicted_class[i] == patClassIdTest[i]: misclass[i] = False else: misclass[i] = True #misclass[i] = ~(np.any(classId[maxVind] == patClassIdTest[i]) | (patClassIdTest[i] == 0)) #print(numPointInBoundary) # results summis = np.sum(misclass).astype(np.int64) result = Bunch(summis=summis, misclass=misclass, predicted_class=predicted_class) return result
def predict_with_probability_weighted(V, W, classId, numSamples, weights, XlT, XuT, patClassIdTest, gama=1, oper='min'): """ GFMM classifier (test routine): Using probability formular based on the number of samples in the case of many hyperboxes with different classes having the same maximum membership value result = predict(V,W,classId,XlT,XuT,patClassIdTest,gama,oper) INPUT V Tested model hyperbox lower bounds W Tested model hyperbox upper bounds classId Input data (hyperbox) class labels (crisp) numSamples Save number of samples of each corresponding hyperboxes contained in V and W weights The weights of hyperboxes XlT Test data lower bounds (rows = objects, columns = features) XuT Test data upper bounds (rows = objects, columns = features) patClassIdTest Test data class labels (crisp) gama Membership function slope (default: 1) oper Membership calculation operation: 'min' or 'prod' (default: 'min') OUTPUT result A object with Bunch datatype containing all results as follows: + summis Number of misclassified objects + misclass Binary error map + numSampleInBoundary The number of samples in decision boundary + predicted_class Predicted class """ if len(XlT.shape) == 1: XlT = XlT.reshape(1, -1) if len(XuT.shape) == 1: XuT = XuT.reshape(1, -1) #initialization yX = XlT.shape[0] misclass = np.zeros(yX) predicted_class = np.full(yX, None) mem_vals = np.zeros(yX) # classifications numPointInBoundary = 0 for i in range(yX): if patClassIdTest[i] == UNLABELED_CLASS: misclass[i] = False else: mem = memberG(XlT[i, :], XuT[i, :], V, W, gama, oper) # calculate memberships for all hyperboxes mem = mem * weights bmax = mem.max() # get max membership value maxVind = np.nonzero(mem == bmax)[ 0] # get indexes of all hyperboxes with max membership mem_vals[i] = bmax # if bmax == 0: # #print('zero maximum membership value') # this is probably bad... # predicted_class[i] = classId[maxVind[0]] # if predicted_class[i] == patClassIdTest[i]: # misclass[i] = False # else: # misclass[i] = True # else: cls_same_mem = np.unique(classId[maxVind]) if len(cls_same_mem) > 1: cls_val = UNLABELED_CLASS is_find_prob_val = True if bmax == 1: id_box_with_one_sample = np.nonzero( numSamples[maxVind] == 1)[0] if len(id_box_with_one_sample) > 0: is_find_prob_val = False cls_val = classId[int( random.choice(maxVind[id_box_with_one_sample]))] if is_find_prob_val == True: numPointInBoundary = numPointInBoundary + 1 #print('bmax=', bmax) #print("Using probability function") sum_prod_denum = (mem[maxVind] * numSamples[maxVind]).sum() max_prob = -1 pre_id_cls = None for c in cls_same_mem: id_cls = np.nonzero(classId[maxVind] == c)[0] sum_pro_num = (mem[maxVind[id_cls]] * numSamples[maxVind[id_cls]]).sum() tmp = sum_pro_num / sum_prod_denum if tmp > max_prob or ( tmp == max_prob and pre_id_cls is not None and numSamples[maxVind[id_cls]].sum() > numSamples[maxVind[pre_id_cls]].sum()): max_prob = tmp cls_val = c pre_id_cls = id_cls predicted_class[i] = cls_val if cls_val == patClassIdTest[i]: misclass[i] = False else: misclass[i] = True else: predicted_class[i] = classId[maxVind[0]] if predicted_class[i] == patClassIdTest[i]: misclass[i] = False else: misclass[i] = True #misclass[i] = ~(np.any(classId[maxVind] == patClassIdTest[i]) | (patClassIdTest[i] == 0)) #print(numPointInBoundary) # results summis = np.sum(misclass).astype(np.int64) result = Bunch(summis=summis, misclass=misclass, numSampleInBoundary=numPointInBoundary, predicted_class=predicted_class, mem_vals=mem_vals) return result
def fit(self, X_l, X_u, patClassId): """ Xl Input data lower bounds (rows = objects, columns = features) Xu Input data upper bounds (rows = objects, columns = features) patClassId Input data class labels (crisp) """ if self.isNorm == True: X_l, X_u = self.dataPreprocessing(X_l, X_u) time_start = time.perf_counter() self.V = X_l self.W = X_u self.classId = patClassId yX, xX = X_l.shape # if len(self.cardin) == 0 or len(self.clusters) == 0: # self.cardin = np.ones(yX) # self.clusters = np.empty(yX, dtype=object) # for i in range(yX): # self.clusters[i] = np.array([i], dtype = np.int64) if self.isDraw: mark_col = np.array(['r', 'g', 'b', 'y', 'c', 'm', 'k']) drawing_canvas = self.initializeCanvasGraph("GFMM - Faster AGGLO-2", xX) # plot initial hyperbox Vt, Wt = self.pcatransform() color_ = np.empty(len(self.classId), dtype = object) for c in range(len(self.classId)): color_[c] = mark_col[self.classId[c]] boxes = drawbox(Vt, Wt, drawing_canvas, color_) self.delay() hyperboxes = list(boxes) # training isTraining = True while isTraining: isTraining = False k = 0 # input pattern index while k < len(self.classId): idx_same_classes = (self.classId == self.classId[k]) | (self.classId == UNLABELED_CLASS) | ((self.classId != self.classId[k]) & (self.classId[k] == UNLABELED_CLASS)) idx_same_classes[k] = False # remove element in the position k idex = np.arange(len(self.classId)) idex = idex[idx_same_classes] # keep the indices of elements retained V_same_class = self.V[idx_same_classes] W_same_class = self.W[idx_same_classes] if self.simil == 'short': b = memberG(np.maximum(self.W[k], self.V[k]), np.minimum(self.V[k], self.W[k]), np.minimum(V_same_class, W_same_class), np.maximum(W_same_class, V_same_class), self.gamma, self.oper) elif self.simil == 'long': b = memberG(self.V[k], self.W[k], W_same_class, V_same_class, self.gamma, self.oper) else: b = asym_similarity_one_many_with_missing_value(self.V[k], self.W[k], V_same_class, W_same_class, self.gamma, self.sing, self.oper) indB = np.argsort(b)[::-1] idex = idex[indB] sortB = b[indB] maxB = sortB[sortB >= self.bthres] # apply membership threshold if len(maxB) > 0: idexmax = idex[sortB >= self.bthres] pairewise_maxb = np.concatenate((np.minimum(k, idexmax)[:, np.newaxis], np.maximum(k,idexmax)[:, np.newaxis], maxB[:, np.newaxis]), axis=1) for i in range(pairewise_maxb.shape[0]): # calculate new coordinates of k-th hyperbox by including pairewise_maxb(i,1)-th box, scrap the latter and leave the rest intact # agglomorate pairewise_maxb(i, 0) and pairewise_maxb(i, 1) by adjusting pairewise_maxb(i, 0) # remove pairewise_maxb(i, 1) by getting newV from 1 -> pairewise_maxb(i, 0) - 1, new coordinates for pairewise_maxb(i, 0), from pairewise_maxb(i, 0) + 1 -> pairewise_maxb(i, 1) - 1, pairewise_maxb(i, 1) + 1 -> end row1 = int(pairewise_maxb[i, 0]) row2 = int(pairewise_maxb[i, 1]) newV = np.concatenate((self.V[:row1], np.minimum(self.V[row1], self.V[row2]).reshape(1, -1), self.V[row1 + 1:row2], self.V[row2 + 1:]), axis=0) newW = np.concatenate((self.W[:row1], np.maximum(self.W[row1], self.W[row2]).reshape(1, -1), self.W[row1 + 1:row2], self.W[row2 + 1:]), axis=0) newClassId = np.concatenate((self.classId[:row2], self.classId[row2 + 1:])) if (newClassId[row1] == UNLABELED_CLASS): newClassId[row1] = self.classId[row2] # index_remain = np.ones(len(self.classId)).astype(np.bool) # index_remain[row2] = False # newV = self.V[index_remain] # newW = self.W[index_remain] # newClassId = self.classId[index_remain] # if row1 < row2: # tmp_row = row1 # else: # tmp_row = row1 - 1 # newV[tmp_row] = np.minimum(self.V[row1], self.V[row2]) # newW[tmp_row] = np.maximum(self.W[row1], self.W[row2]) # adjust the hyperbox if no overlap and maximum hyperbox size is not violated # position of adjustment is pairewise_maxb[i, 0] in new bounds if ((((newW[pairewise_maxb[i, 0].astype(np.int64)] - newV[pairewise_maxb[i, 0].astype(np.int64)]) <= self.teta).all() == True) and (not modifiedIsOverlap(newV, newW, pairewise_maxb[i, 0].astype(np.int64), newClassId))): self.V = newV self.W = newW self.classId = newClassId # self.cardin[int(pairewise_maxb[i, 0])] = self.cardin[int(pairewise_maxb[i, 0])] + self.cardin[int(pairewise_maxb[i, 1])] # #self.cardin = np.delete(self.cardin, int(pairewise_maxb[i, 1])) # self.cardin = np.append(self.cardin[0:int(pairewise_maxb[i, 1])], self.cardin[int(pairewise_maxb[i, 1]) + 1:]) # # self.clusters[int(pairewise_maxb[i, 0])] = np.append(self.clusters[int(pairewise_maxb[i, 0])], self.clusters[int(pairewise_maxb[i, 1])]) # #self.clusters = np.delete(self.clusters, int(pairewise_maxb[i, 1])) # self.clusters = np.append(self.clusters[0:int(pairewise_maxb[i, 1])], self.clusters[int(pairewise_maxb[i, 1]) + 1:]) # isTraining = True if k != pairewise_maxb[i, 0]: # position pairewise_maxb[i, 1] (also k) is removed, so next step should start from pairewise_maxb[i, 1] k = k - 1 if self.isDraw: try: hyperboxes[int(pairewise_maxb[i, 1])].remove() hyperboxes[int(pairewise_maxb[i, 0])].remove() except: print("No remove old hyperbox") Vt, Wt = self.pcatransform() box_color = 'k' if self.classId[int(pairewise_maxb[i, 0])] < len(mark_col): box_color = mark_col[self.classId[int(pairewise_maxb[i, 0])]] box = drawbox(np.asmatrix(Vt[int(pairewise_maxb[i, 0])]), np.asmatrix(Wt[int(pairewise_maxb[i, 0])]), drawing_canvas, box_color) self.delay() hyperboxes[int(pairewise_maxb[i, 0])] = box[0] hyperboxes.remove(hyperboxes[int(pairewise_maxb[i, 1])]) break # if hyperbox adjusted there's no need to look at other hyperboxes k = k + 1 time_end = time.perf_counter() self.elapsed_training_time = time_end - time_start return self
def fit(self, X_l, X_u, patClassId): """ X_l Input data lower bounds (rows = objects, columns = features) X_u Input data upper bounds (rows = objects, columns = features) patClassId Input data class labels (crisp) """ if self.isNorm == True: X_l, X_u = self.dataPreprocessing(X_l, X_u) time_start = time.perf_counter() self.V = X_l self.W = X_u self.classId = patClassId yX, xX = X_l.shape # if len(self.cardin) == 0 or len(self.clusters) == 0: # self.cardin = np.ones(yX) # self.clusters = np.empty(yX, dtype=object) # for i in range(yX): # self.clusters[i] = np.array([i], dtype = np.int32) if self.isDraw: mark_col = np.array(['r', 'g', 'b', 'y', 'c', 'm', 'k']) drawing_canvas = self.initializeCanvasGraph( "GFMM - AGGLO-SM-Fast version", xX) # plot initial hyperbox Vt, Wt = self.pcatransform() color_ = np.empty(len(self.classId), dtype=object) for c in range(len(self.classId)): color_[c] = mark_col[self.classId[c]] drawbox(Vt, Wt, drawing_canvas, color_) self.delay() # training isTraining = True while isTraining: isTraining = False # calculate class masks yX, xX = self.V.shape labList = np.unique( self.classId[self.classId != UNLABELED_CLASS])[::-1] clMask = np.zeros(shape=(yX, len(labList)), dtype=np.bool) for i in range(len(labList)): clMask[:, i] = (self.classId == labList[i]) | (self.classId == UNLABELED_CLASS) # calculate pairwise memberships *ONLY* within each class (faster!) b = np.zeros(shape=(yX, yX)) for i in range(len(labList)): Vi = self.V[ clMask[:, i]] # get bounds of patterns with class label i Wi = self.W[clMask[:, i]] clSize = np.sum(clMask[:, i]) # get number of patterns of class i clIdxs = np.nonzero( clMask[:, i] )[0] # get position of patterns with class label i in the training set if self.simil == 'short': for j in range(clSize): b[clIdxs[j], clIdxs] = memberG(np.maximum(Wi[j], Vi[j]), np.minimum(Vi[j], Wi[j]), np.minimum(Vi, Wi), np.maximum(Wi, Vi), self.gamma, self.oper) elif self.simil == 'long': for j in range(clSize): b[clIdxs[j], clIdxs] = memberG(Vi[j], Wi[j], Wi, Vi, self.gamma, self.oper) else: for j in range(clSize): b[clIdxs[j], clIdxs] = memberG(Vi[j], Wi[j], np.minimum(Vi, Wi), np.maximum(Wi, Vi), self.gamma, self.oper) if yX == 1: maxb = np.array([]) else: maxb = self.splitSimilarityMaxtrix(b, self.sing, False) if len(maxb) > 0: maxb = maxb[(maxb[:, 2] >= self.bthres), :] if len(maxb) > 0: # sort maxb in the decending order following the last column idx_smaxb = np.argsort(maxb[:, 2])[::-1] maxb = np.hstack( (maxb[idx_smaxb, 0].reshape(-1, 1), maxb[idx_smaxb, 1].reshape(-1, 1), maxb[idx_smaxb, 2].reshape(-1, 1))) #maxb = maxb[idx_smaxb] while len(maxb) > 0: curmaxb = maxb[0, :] # current position handling # calculate new coordinates of curmaxb(0)-th hyperbox by including curmaxb(1)-th box, scrap the latter and leave the rest intact row1 = int(curmaxb[0]) row2 = int(curmaxb[1]) newV = np.concatenate( (self.V[0:row1, :], np.minimum(self.V[row1, :], self.V[row2, :]).reshape( 1, -1), self.V[row1 + 1:row2, :], self.V[row2 + 1:, :]), axis=0) newW = np.concatenate( (self.W[0:row1, :], np.maximum(self.W[row1, :], self.W[row2, :]).reshape( 1, -1), self.W[row1 + 1:row2, :], self.W[row2 + 1:, :]), axis=0) newClassId = np.concatenate( (self.classId[0:row2], self.classId[row2 + 1:])) if (newClassId[row1] == UNLABELED_CLASS): newClassId[row1] = self.classId[row2] # index_remain = np.ones(len(self.classId)).astype(np.bool) # index_remain[row2] = False # newV = self.V[index_remain] # newW = self.W[index_remain] # newClassId = self.classId[index_remain] # if row1 < row2: # tmp_row = row1 # else: # tmp_row = row1 - 1 # newV[tmp_row] = np.minimum(self.V[row1], self.V[row2]) # newW[tmp_row] = np.maximum(self.W[row1], self.W[row2]) # adjust the hyperbox if no overlap and maximum hyperbox size is not violated if ((((newW[int(curmaxb[0])] - newV[int(curmaxb[0])]) <= self.teta).all() == True) and (not modifiedIsOverlap( newV, newW, int(curmaxb[0]), newClassId))): isTraining = True self.V = newV self.W = newW self.classId = newClassId # self.cardin[int(curmaxb[0])] = self.cardin[int(curmaxb[0])] + self.cardin[int(curmaxb[1])] # self.cardin = np.append(self.cardin[0:int(curmaxb[1])], self.cardin[int(curmaxb[1]) + 1:]) # # self.clusters[int(curmaxb[0])] = np.append(self.clusters[int(curmaxb[0])], self.clusters[int(curmaxb[1])]) # self.clusters = np.append(self.clusters[0:int(curmaxb[1])], self.clusters[int(curmaxb[1]) + 1:]) # # remove joined pair from the list as well as any pair with lower membership and consisting of any of joined boxes mask = (maxb[:, 0] != int(curmaxb[0])) & ( maxb[:, 1] != int(curmaxb[0])) & (maxb[:, 0] != int( curmaxb[1])) & (maxb[:, 1] != int( curmaxb[1])) & (maxb[:, 2] >= curmaxb[2]) maxb = maxb[mask, :] # update indexes to accomodate removed hyperbox # indices of V and W larger than curmaxb(1,2) are decreased 1 by the element whithin the location curmaxb(1,2) was removed if len(maxb) > 0: maxb[maxb[:, 0] > int(curmaxb[1]), 0] = maxb[maxb[:, 0] > int(curmaxb[1]), 0] - 1 maxb[maxb[:, 1] > int(curmaxb[1]), 1] = maxb[maxb[:, 1] > int(curmaxb[1]), 1] - 1 if self.isDraw: Vt, Wt = self.pcatransform() color_ = np.empty(len(self.classId), dtype=object) for c in range(len(self.classId)): color_[c] = mark_col[self.classId[c]] drawing_canvas.cla() drawbox(Vt, Wt, drawing_canvas, color_) self.delay() else: maxb = maxb[1:, :] # scrap examined pair from the list time_end = time.perf_counter() self.elapsed_training_time = time_end - time_start return self
def pruning_val(self, XlT, XuT, patClassIdTest, accuracy_threshold = 0.5, newVerPredict = True): """ pruning handling based on validation (validation routine) with hyperboxes stored in self. V, W, classId result = pruning_val(XlT,XuT,patClassIdTest) INPUT XlT Test data lower bounds (rows = objects, columns = features) XuT Test data upper bounds (rows = objects, columns = features) patClassIdTest Test data class labels (crisp) accuracy_threshold The minimum accuracy for each hyperbox newVerPredict + True: using Manhattan distance in addition to fuzzy membership + False: No using Manhattan distance """ #initialization yX = XlT.shape[0] mem = np.zeros((yX, self.V.shape[0])) no_predicted_samples_hyperboxes = np.zeros((len(self.classId), 2)) # classifications for i in range(yX): mem[i, :] = memberG(XlT[i, :], XuT[i, :], self.V, self.W, self.gamma, self.oper) # calculate memberships for all hyperboxes bmax = mem[i,:].max() # get max membership value maxVind = np.nonzero(mem[i,:] == bmax)[0] # get indexes of all hyperboxes with max membership if len(maxVind) == 1: # Only one hyperbox with the highest membership function if self.classId[maxVind[0]] == patClassIdTest[i]: no_predicted_samples_hyperboxes[maxVind[0], 0] = no_predicted_samples_hyperboxes[maxVind[0], 0] + 1 else: no_predicted_samples_hyperboxes[maxVind[0], 1] = no_predicted_samples_hyperboxes[maxVind[0], 1] + 1 else: if (newVerPredict == True): # More than one hyperbox with highest membership => using Manhattan distance if (XlT[i] == XuT[i]).all() == False: XlT_mat = np.ones((len(maxVind), 1)) * XlT[i] XuT_mat = np.ones((len(maxVind), 1)) * XuT[i] XgT_mat = (XlT_mat + XuT_mat) / 2 else: XgT_mat = np.ones((len(maxVind), 1)) * XlT[i] # Find all average points of all hyperboxes with the same membership value avg_point_mat = (self.V[maxVind] + self.W[maxVind]) / 2 # compute the manhattan distance from XgT_mat to all average points of all hyperboxes with the same membership value maht_dist = manhattan_distance(avg_point_mat, XgT_mat) #maht_dist = min_distance(avg_point_mat, XgT_mat) id_min_dist = maht_dist.argmin() id_min = maxVind[id_min_dist] else: # select randomly id_min = maxVind[np.random.randint(len(maxVind))] if self.classId[id_min] != patClassIdTest[i] and patClassIdTest[i] != UNLABELED_CLASS: no_predicted_samples_hyperboxes[id_min, 1] = no_predicted_samples_hyperboxes[id_min, 1] + 1 else: no_predicted_samples_hyperboxes[id_min, 0] = no_predicted_samples_hyperboxes[id_min, 0] + 1 # pruning handling based on the validation results tmp_no_box = no_predicted_samples_hyperboxes.shape[0] accuracy_larger_half = np.zeros(tmp_no_box).astype(np.bool) accuracy_larger_half_keep_nojoin = np.zeros(tmp_no_box).astype(np.bool) for i in range(tmp_no_box): if (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1] != 0) and no_predicted_samples_hyperboxes[i, 0] / (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1]) >= accuracy_threshold: accuracy_larger_half[i] = True accuracy_larger_half_keep_nojoin[i] = True if (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1] == 0): accuracy_larger_half_keep_nojoin[i] = True # keep one hyperbox for class prunned all current_classes = np.unique(self.classId) class_tmp = self.classId[accuracy_larger_half] class_tmp_keep = self.classId[accuracy_larger_half_keep_nojoin] for c in current_classes: if c not in class_tmp: pos = np.nonzero(self.classId == c) id_kept = np.random.randint(len(pos)) # keep pos[id_kept] accuracy_larger_half[pos[id_kept]] = True if c not in class_tmp_keep: pos = np.nonzero(self.classId == c) id_kept = np.random.randint(len(pos)) accuracy_larger_half_keep_nojoin[pos[id_kept]] = True V_prun_remove = self.V[accuracy_larger_half] W_prun_remove = self.W[accuracy_larger_half] classId_prun_remove = self.classId[accuracy_larger_half] W_prun_keep = self.W[accuracy_larger_half_keep_nojoin] V_prun_keep = self.V[accuracy_larger_half_keep_nojoin] classId_prun_keep = self.classId[accuracy_larger_half_keep_nojoin] if newVerPredict == True: result_prun_remove = predict_with_manhattan(V_prun_remove, W_prun_remove, classId_prun_remove, XlT, XuT, patClassIdTest, self.gamma, self.oper) result_prun_keep_nojoin = predict_with_manhattan(V_prun_keep, W_prun_keep, classId_prun_keep, XlT, XuT, patClassIdTest, self.gamma, self.oper) else: result_prun_remove = predict(V_prun_remove, W_prun_remove, classId_prun_remove, XlT, XuT, patClassIdTest, self.gamma, self.oper) result_prun_keep_nojoin = predict(V_prun_keep, W_prun_keep, classId_prun_keep, XlT, XuT, patClassIdTest, self.gamma, self.oper) if (result_prun_remove.summis <= result_prun_keep_nojoin.summis): self.V = V_prun_remove self.W = W_prun_remove self.classId = classId_prun_remove else: self.V = V_prun_keep self.W = W_prun_keep self.classId = classId_prun_keep
def predict_based_mem(self, Xl_Test, Xu_Test, patClassIdTest): """ This function is to evaluate the performance of the model The predictive class is given based on average membership value for each class of all base learners Parameters: + Xl_Test, Xu_Test: Lower and upper bounds of the testing set + patClassIdTest: classes of the testing set Returns: + Predictive results are saved into attribute 'predicted_class' of each element in self.list_learners + Accuracy + Number of wrong predicted samples """ numClassifier = len(self.list_learners) yX = Xl_Test.shape[0] # get all class labels of all base classifiers classes = self.list_learners[0].classId for i in range(1, numClassifier): classes = np.union1d(classes, self.list_learners[i].classId) noClasses = len(classes) out = np.zeros((yX, noClasses), dtype=np.float64) predicted_classes = [] # classification of each testing pattern i for i in range(yX): for idClf in range(numClassifier): # calculate memberships for all hyperboxes of classifier idClf mem_tmp = memberG(Xl_Test[i, :], Xu_Test[i, :], self.list_learners[idClf].V, self.list_learners[idClf].W, self.gamma) for j in range(noClasses): # get max membership of hyperboxes with class label j same_j_labels = mem_tmp[self.list_learners[idClf].classId == classes[j]] if len(same_j_labels) > 0: mem_max = same_j_labels.max() out[i, j] = out[i, j] + mem_max # compute membership value of each class over all classifiers out[i, :] = out[i, :] / numClassifier # get max membership value for each class with regard to the i-th sample maxb = out[i].max() # get positions of indices of all classes with max membership maxMemInd = np.nonzero(out[i] == maxb)[0] if len(maxMemInd) == 1: predicted_classes.append(classes[maxMemInd[0]]) else: # choose random class selected_cls_id = random.choice(maxMemInd) predicted_classes.append(classes[selected_cls_id]) predicted_classes = np.array(predicted_classes, dtype=np.int) num_correct_samples = np.sum(predicted_classes == patClassIdTest) num_wrong_samples = yX - num_correct_samples accuracy = num_correct_samples / yX return (accuracy, num_wrong_samples, predicted_classes)
def predictDecisionLevelEnsemble(classifiers, XlT, XuT, patClassIdTest, gama=1, oper='min'): """ Perform classification for a decision level ensemble learning result = predictDecisionLevelEnsemble(classifiers, XlT, XuT, patClassIdTest, gama, oper) INPUT classifiers An array of classifiers needed to combine, datatype of each element in the array is BaseGFMMClassifier XlT Test data lower bounds (rows = objects, columns = features) XuT Test data upper bounds (rows = objects, columns = features) patClassIdTest Test data class labels (crisp) gama Membership function slope (default: 1) oper Membership calculation operation: 'min' or 'prod' (default: 'min') OUTPUT result A object with Bunch datatype containing all results as follows: + summis Number of misclassified samples + misclass Binary error map for input samples + out Soft class memberships, rows are testing input patterns, columns are indices of classes + classes Store class labels corresponding column indices of out """ numClassifier = len(classifiers) yX = XlT.shape[0] misclass = np.zeros(yX, dtype=np.bool) # get all class labels of all base classifiers classId = classifiers[0].classId for i in range(numClassifier): if i != 0: classId = np.union1d(classId, classifiers[i].classId) classes = np.unique(classId) noClasses = len(classes) out = np.zeros((yX, noClasses), dtype=np.float64) # classification of each testing pattern i for i in range(yX): for idClf in range(numClassifier): # calculate memberships for all hyperboxes of classifier idClf mem_tmp = memberG(XlT[i, :], XuT[i, :], classifiers[idClf].V, classifiers[idClf].W, gama, oper) for j in range(noClasses): # get max membership of hyperboxes with class label j same_j_labels = mem_tmp[classifiers[idClf].classId == classes[j]] if len(same_j_labels) > 0: mem_max = same_j_labels.max() out[i, j] = out[i, j] + mem_max # compute membership value of each class over all classifiers out[i, :] = out[i, :] / numClassifier # get max membership value for each class with regard to the i-th sample maxb = out[i].max() # get positions of indices of all classes with max membership maxMemInd = out[i] == maxb #misclass[i] = ~(np.any(classes[maxMemInd] == patClassIdTest[i]) | (patClassIdTest[i] == 0)) misclass[i] = np.logical_or( (classes[maxMemInd] == patClassIdTest[i]).any(), patClassIdTest[i] == 0) != True # count number of missclassified patterns summis = np.sum(misclass) result = Bunch(summis=summis, misclass=misclass, out=out, classes=classes) return result