def predict(self, Xl_Test, Xu_Test, patClassIdTest, newVer = True): """ Perform classification result = predict(Xl_Test, Xu_Test, patClassIdTest) INPUT: Xl_Test Test data lower bounds (rows = objects, columns = features) Xu_Test Test data upper bounds (rows = objects, columns = features) patClassIdTest Test data class labels (crisp) newVer + False: Don't use an additional criterion for predicting + True : Using an additional criterion for predicting in the case of the same membership value OUTPUT: result A object with Bunch datatype containing all results as follows: + summis Number of misclassified objects + misclass Binary error map + numSampleInBoundary The number of samples in decision boundary + predicted_class Predicted class """ #Xl_Test, Xu_Test = delete_const_dims(Xl_Test, Xu_Test) # Normalize testing dataset if training datasets were normalized if len(self.mins) > 0: noSamples = Xl_Test.shape[0] Xl_Test = self.loLim + (self.hiLim - self.loLim) * (Xl_Test - np.ones((noSamples, 1)) * self.mins) / (np.ones((noSamples, 1)) * (self.maxs - self.mins)) Xu_Test = self.loLim + (self.hiLim - self.loLim) * (Xu_Test - np.ones((noSamples, 1)) * self.mins) / (np.ones((noSamples, 1)) * (self.maxs - self.mins)) if Xl_Test.min() < self.loLim or Xu_Test.min() < self.loLim or Xl_Test.max() > self.hiLim or Xu_Test.max() > self.hiLim: print('Test sample falls outside', self.loLim, '-', self.hiLim, 'interval') print('Number of original samples = ', noSamples) # only keep samples within the interval loLim-hiLim indXl_good = np.where((Xl_Test >= self.loLim).all(axis = 1) & (Xl_Test <= self.hiLim).all(axis = 1))[0] indXu_good = np.where((Xu_Test >= self.loLim).all(axis = 1) & (Xu_Test <= self.hiLim).all(axis = 1))[0] indKeep = np.intersect1d(indXl_good, indXu_good) Xl_Test = Xl_Test[indKeep, :] Xu_Test = Xu_Test[indKeep, :] print('Number of kept samples =', Xl_Test.shape[0]) #return # do classification result = None if Xl_Test.shape[0] > 0: if newVer: result = predict_with_probability(self.V, self.W, self.classId, self.counter, Xl_Test, Xu_Test, patClassIdTest, self.gamma, self.oper) else: result = predict(self.V, self.W, self.classId, Xl_Test, Xu_Test, patClassIdTest, self.gamma, self.oper) self.predicted_class = np.array(result.predicted_class, np.int) return result
def predict_model_combination(self, Xl_Test, Xu_Test, patClassIdTest): if len(self.mins) > 0: noSamples = Xl_Test.shape[0] Xl_Test = self.loLim + (self.hiLim - self.loLim) * ( Xl_Test - np.ones((noSamples, 1)) * self.mins) / (np.ones( (noSamples, 1)) * (self.maxs - self.mins)) Xu_Test = self.loLim + (self.hiLim - self.loLim) * ( Xu_Test - np.ones((noSamples, 1)) * self.mins) / (np.ones( (noSamples, 1)) * (self.maxs - self.mins)) if Xl_Test.min() < self.loLim or Xu_Test.min( ) < self.loLim or Xl_Test.max() > self.hiLim or Xu_Test.max( ) > self.hiLim: print('Test sample falls outside', self.loLim, '-', self.hiLim, 'interval') print('Number of original samples = ', noSamples) # only keep samples within the interval loLim-hiLim indXl_good = np.where((Xl_Test >= self.loLim).all(axis=1) & (Xl_Test <= self.hiLim).all(axis=1))[0] indXu_good = np.where((Xu_Test >= self.loLim).all(axis=1) & (Xu_Test <= self.hiLim).all(axis=1))[0] indKeep = np.intersect1d(indXl_good, indXu_good) Xl_Test = Xl_Test[indKeep, :] Xu_Test = Xu_Test[indKeep, :] print('Number of kept samples =', Xl_Test.shape[0]) #return # do classification result = None if Xl_Test.shape[0] > 0: if self.select_learner == 'iol-gfmm': result = predict_with_probability(self.V, self.W, self.classId, self.counter, Xl_Test, Xu_Test, patClassIdTest, self.gamma) else: result = predict_with_manhattan(self.V, self.W, self.classId, Xl_Test, Xu_Test, patClassIdTest, self.gamma) return result
def pruning_val(self, XlT, XuT, patClassIdTest, accuracy_threshold = 0.5, newVerPredict = True): """ pruning handling based on validation (validation routine) with hyperboxes stored in self. V, W, classId result = pruning_val(XlT,XuT,patClassIdTest) INPUT XlT Test data lower bounds (rows = objects, columns = features) XuT Test data upper bounds (rows = objects, columns = features) patClassIdTest Test data class labels (crisp) accuracy_threshold The minimum accuracy for each hyperbox newVerPredict + True: using probability formula for prediction in addition to fuzzy membership + False: No using probability formula for prediction """ #initialization yX = XlT.shape[0] no_predicted_samples_hyperboxes = np.zeros((len(self.classId), 2)) # classifications for i in range(yX): mem = memberG(XlT[i, :], XuT[i, :], self.V, self.W, self.gamma, self.oper) # calculate memberships for all hyperboxes bmax = mem.max() # get max membership value maxVind = np.nonzero(mem == bmax)[0] # get indexes of all hyperboxes with max membership if len(maxVind) == 1: # Only one hyperbox with the highest membership function if self.classId[maxVind[0]] == patClassIdTest[i]: no_predicted_samples_hyperboxes[maxVind[0], 0] = no_predicted_samples_hyperboxes[maxVind[0], 0] + 1 else: no_predicted_samples_hyperboxes[maxVind[0], 1] = no_predicted_samples_hyperboxes[maxVind[0], 1] + 1 else: if newVerPredict == True: cls_same_mem = np.unique(self.classId[maxVind]) if len(cls_same_mem) > 1: is_find_prob_val = True if bmax == 1: id_box_with_one_sample = np.nonzero(self.counter[maxVind] == 1)[0] if len(id_box_with_one_sample) > 0: is_find_prob_val = False id_min = random.choice(maxVind[id_box_with_one_sample]) if is_find_prob_val == True: sum_prod_denum = (mem[maxVind] * self.counter[maxVind]).sum() max_prob = -1 pre_id_cls = None for c in cls_same_mem: id_cls = np.nonzero(self.classId[maxVind] == c)[0] sum_pro_num = (mem[maxVind[id_cls]] * self.counter[maxVind[id_cls]]).sum() tmp = sum_pro_num / sum_prod_denum if tmp > max_prob or (tmp == max_prob and pre_id_cls is not None and self.counter[maxVind[id_cls]].sum() > self.counter[maxVind[pre_id_cls]].sum()): max_prob = tmp pre_id_cls = id_cls id_min = random.choice(maxVind[id_cls]) else: id_min = random.choice(maxVind) else: # More than one hyperbox with highest membership => random choosing id_min = maxVind[np.random.randint(len(maxVind))] if self.classId[id_min] != patClassIdTest[i] and patClassIdTest[i] != UNLABELED_CLASS: no_predicted_samples_hyperboxes[id_min, 1] = no_predicted_samples_hyperboxes[id_min, 1] + 1 else: no_predicted_samples_hyperboxes[id_min, 0] = no_predicted_samples_hyperboxes[id_min, 0] + 1 # pruning handling based on the validation results tmp_no_box = no_predicted_samples_hyperboxes.shape[0] accuracy_larger_half = np.zeros(tmp_no_box).astype(np.bool) accuracy_larger_half_keep_nojoin = np.zeros(tmp_no_box).astype(np.bool) for i in range(tmp_no_box): if (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1] != 0) and no_predicted_samples_hyperboxes[i, 0] / (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1]) >= accuracy_threshold: accuracy_larger_half[i] = True accuracy_larger_half_keep_nojoin[i] = True if (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1] == 0): accuracy_larger_half_keep_nojoin[i] = True # keep one hyperbox for class prunned all current_classes = np.unique(self.classId) class_tmp = self.classId[accuracy_larger_half] class_tmp_keep = self.classId[accuracy_larger_half_keep_nojoin] for c in current_classes: if c not in class_tmp: pos = np.nonzero(self.classId == c) id_kept = np.random.randint(len(pos)) # keep pos[id_kept] accuracy_larger_half[pos[id_kept]] = True if c not in class_tmp_keep: pos = np.nonzero(self.classId == c) id_kept = np.random.randint(len(pos)) accuracy_larger_half_keep_nojoin[pos[id_kept]] = True V_prun_remove = self.V[accuracy_larger_half] W_prun_remove = self.W[accuracy_larger_half] classId_prun_remove = self.classId[accuracy_larger_half] numSample_prun_remove = self.counter[accuracy_larger_half] W_prun_keep = self.W[accuracy_larger_half_keep_nojoin] V_prun_keep = self.V[accuracy_larger_half_keep_nojoin] classId_prun_keep = self.classId[accuracy_larger_half_keep_nojoin] numSample_prun_keep = self.classId[accuracy_larger_half_keep_nojoin] if newVerPredict == True: result_prun_remove = predict_with_probability(V_prun_remove, W_prun_remove, classId_prun_remove, numSample_prun_remove, XlT, XuT, patClassIdTest, self.gamma, self.oper) result_prun_keep_nojoin = predict_with_probability(V_prun_keep, W_prun_keep, classId_prun_keep, numSample_prun_keep, XlT, XuT, patClassIdTest, self.gamma, self.oper) else: result_prun_remove = predict(V_prun_remove, W_prun_remove, classId_prun_remove, XlT, XuT, patClassIdTest, self.gamma, self.oper) result_prun_keep_nojoin = predict(V_prun_keep, W_prun_keep, classId_prun_keep, XlT, XuT, patClassIdTest, self.gamma, self.oper) if (result_prun_remove.summis <= result_prun_keep_nojoin.summis): self.V = V_prun_remove self.W = W_prun_remove self.classId = classId_prun_remove self.counter = numSample_prun_remove else: self.V = V_prun_keep self.W = W_prun_keep self.classId = classId_prun_keep self.counter = numSample_prun_keep