Ejemplo n.º 1
0
    def predict(self, Xl_Test, Xu_Test, patClassIdTest, newVer = True):
        """
        Perform classification

            result = predict(Xl_Test, Xu_Test, patClassIdTest)

        INPUT:
            Xl_Test             Test data lower bounds (rows = objects, columns = features)
            Xu_Test             Test data upper bounds (rows = objects, columns = features)
            patClassIdTest	     Test data class labels (crisp)
            newVer              + False: Don't use an additional criterion for predicting
                                + True : Using an additional criterion for predicting in the case of the same membership value

        OUTPUT:
            result        A object with Bunch datatype containing all results as follows:
                          + summis           Number of misclassified objects
                          + misclass         Binary error map
                          + numSampleInBoundary     The number of samples in decision boundary
                          + predicted_class   Predicted class
        """
        #Xl_Test, Xu_Test = delete_const_dims(Xl_Test, Xu_Test)
        # Normalize testing dataset if training datasets were normalized
        if len(self.mins) > 0:
            noSamples = Xl_Test.shape[0]
            Xl_Test = self.loLim + (self.hiLim - self.loLim) * (Xl_Test - np.ones((noSamples, 1)) * self.mins) / (np.ones((noSamples, 1)) * (self.maxs - self.mins))
            Xu_Test = self.loLim + (self.hiLim - self.loLim) * (Xu_Test - np.ones((noSamples, 1)) * self.mins) / (np.ones((noSamples, 1)) * (self.maxs - self.mins))

            if Xl_Test.min() < self.loLim or Xu_Test.min() < self.loLim or Xl_Test.max() > self.hiLim or Xu_Test.max() > self.hiLim:
                print('Test sample falls outside', self.loLim, '-', self.hiLim, 'interval')
                print('Number of original samples = ', noSamples)

                # only keep samples within the interval loLim-hiLim
                indXl_good = np.where((Xl_Test >= self.loLim).all(axis = 1) & (Xl_Test <= self.hiLim).all(axis = 1))[0]
                indXu_good = np.where((Xu_Test >= self.loLim).all(axis = 1) & (Xu_Test <= self.hiLim).all(axis = 1))[0]
                indKeep = np.intersect1d(indXl_good, indXu_good)

                Xl_Test = Xl_Test[indKeep, :]
                Xu_Test = Xu_Test[indKeep, :]

                print('Number of kept samples =', Xl_Test.shape[0])
                #return

        # do classification
        result = None

        if Xl_Test.shape[0] > 0:
            if newVer:
                result = predict_with_probability(self.V, self.W, self.classId, self.counter, Xl_Test, Xu_Test, patClassIdTest, self.gamma, self.oper)
            else:
                result = predict(self.V, self.W, self.classId, Xl_Test, Xu_Test, patClassIdTest, self.gamma, self.oper)
                
            self.predicted_class = np.array(result.predicted_class, np.int)

        return result
Ejemplo n.º 2
0
    def predict_model_combination(self, Xl_Test, Xu_Test, patClassIdTest):
        if len(self.mins) > 0:
            noSamples = Xl_Test.shape[0]
            Xl_Test = self.loLim + (self.hiLim - self.loLim) * (
                Xl_Test - np.ones((noSamples, 1)) * self.mins) / (np.ones(
                    (noSamples, 1)) * (self.maxs - self.mins))
            Xu_Test = self.loLim + (self.hiLim - self.loLim) * (
                Xu_Test - np.ones((noSamples, 1)) * self.mins) / (np.ones(
                    (noSamples, 1)) * (self.maxs - self.mins))

            if Xl_Test.min() < self.loLim or Xu_Test.min(
            ) < self.loLim or Xl_Test.max() > self.hiLim or Xu_Test.max(
            ) > self.hiLim:
                print('Test sample falls outside', self.loLim, '-', self.hiLim,
                      'interval')
                print('Number of original samples = ', noSamples)

                # only keep samples within the interval loLim-hiLim
                indXl_good = np.where((Xl_Test >= self.loLim).all(axis=1)
                                      & (Xl_Test <= self.hiLim).all(axis=1))[0]
                indXu_good = np.where((Xu_Test >= self.loLim).all(axis=1)
                                      & (Xu_Test <= self.hiLim).all(axis=1))[0]
                indKeep = np.intersect1d(indXl_good, indXu_good)

                Xl_Test = Xl_Test[indKeep, :]
                Xu_Test = Xu_Test[indKeep, :]

                print('Number of kept samples =', Xl_Test.shape[0])
                #return

        # do classification
        result = None

        if Xl_Test.shape[0] > 0:
            if self.select_learner == 'iol-gfmm':
                result = predict_with_probability(self.V, self.W, self.classId,
                                                  self.counter, Xl_Test,
                                                  Xu_Test, patClassIdTest,
                                                  self.gamma)
            else:
                result = predict_with_manhattan(self.V, self.W, self.classId,
                                                Xl_Test, Xu_Test,
                                                patClassIdTest, self.gamma)

        return result
Ejemplo n.º 3
0
 def pruning_val(self, XlT, XuT, patClassIdTest, accuracy_threshold = 0.5, newVerPredict = True):
     """
     pruning handling based on validation (validation routine) with hyperboxes stored in self. V, W, classId
 
       result = pruning_val(XlT,XuT,patClassIdTest)
 
         INPUT
           XlT               Test data lower bounds (rows = objects, columns = features)
           XuT               Test data upper bounds (rows = objects, columns = features)
           patClassIdTest    Test data class labels (crisp)
           accuracy_threshold  The minimum accuracy for each hyperbox
           newVerPredict     + True: using probability formula for prediction in addition to fuzzy membership
                             + False: No using probability formula for prediction
     """
 
     #initialization
     yX = XlT.shape[0]
     no_predicted_samples_hyperboxes = np.zeros((len(self.classId), 2))
     # classifications
     for i in range(yX):
         mem = memberG(XlT[i, :], XuT[i, :], self.V, self.W, self.gamma, self.oper) # calculate memberships for all hyperboxes
         bmax = mem.max()	                                          # get max membership value
         maxVind = np.nonzero(mem == bmax)[0]                         # get indexes of all hyperboxes with max membership
         
         if len(maxVind) == 1:
             # Only one hyperbox with the highest membership function
             
             if self.classId[maxVind[0]] == patClassIdTest[i]:
                 no_predicted_samples_hyperboxes[maxVind[0], 0] = no_predicted_samples_hyperboxes[maxVind[0], 0] + 1                 
             else:
                 no_predicted_samples_hyperboxes[maxVind[0], 1] = no_predicted_samples_hyperboxes[maxVind[0], 1] + 1
         else:
             if newVerPredict == True:
                 cls_same_mem = np.unique(self.classId[maxVind])
                 if len(cls_same_mem) > 1:
                     is_find_prob_val = True
                     if bmax == 1:
                         id_box_with_one_sample = np.nonzero(self.counter[maxVind] == 1)[0]
                         if len(id_box_with_one_sample) > 0:
                             is_find_prob_val = False
                             id_min = random.choice(maxVind[id_box_with_one_sample])
                     
                     if is_find_prob_val == True:
                         sum_prod_denum = (mem[maxVind] * self.counter[maxVind]).sum()
                         max_prob = -1
                         pre_id_cls = None
                         for c in cls_same_mem:
                             id_cls = np.nonzero(self.classId[maxVind] == c)[0]
                             sum_pro_num = (mem[maxVind[id_cls]] * self.counter[maxVind[id_cls]]).sum()
                             tmp = sum_pro_num / sum_prod_denum
                             
                             if tmp > max_prob or (tmp == max_prob and pre_id_cls is not None and self.counter[maxVind[id_cls]].sum() > self.counter[maxVind[pre_id_cls]].sum()):
                                 max_prob = tmp
                                 pre_id_cls = id_cls
                                 id_min = random.choice(maxVind[id_cls])
                 else:
                     id_min = random.choice(maxVind)
             else:
                 # More than one hyperbox with highest membership => random choosing
                 id_min = maxVind[np.random.randint(len(maxVind))]
                     
             if self.classId[id_min] != patClassIdTest[i] and patClassIdTest[i] != UNLABELED_CLASS:
                 no_predicted_samples_hyperboxes[id_min, 1] = no_predicted_samples_hyperboxes[id_min, 1] + 1
             else:
                 no_predicted_samples_hyperboxes[id_min, 0] = no_predicted_samples_hyperboxes[id_min, 0] + 1
                 
     # pruning handling based on the validation results
     tmp_no_box = no_predicted_samples_hyperboxes.shape[0]
     accuracy_larger_half = np.zeros(tmp_no_box).astype(np.bool)
     accuracy_larger_half_keep_nojoin = np.zeros(tmp_no_box).astype(np.bool)
     for i in range(tmp_no_box):
         if (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1] != 0) and no_predicted_samples_hyperboxes[i, 0] / (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1]) >= accuracy_threshold:
             accuracy_larger_half[i] = True
             accuracy_larger_half_keep_nojoin[i] = True
         if (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1] == 0):
             accuracy_larger_half_keep_nojoin[i] = True
     
     # keep one hyperbox for class prunned all
     current_classes = np.unique(self.classId)
     class_tmp = self.classId[accuracy_larger_half]
     class_tmp_keep = self.classId[accuracy_larger_half_keep_nojoin]
     for c in current_classes:
         if c not in class_tmp:
             pos = np.nonzero(self.classId == c)
             id_kept = np.random.randint(len(pos))
             # keep pos[id_kept]
             accuracy_larger_half[pos[id_kept]] = True
         if c not in class_tmp_keep:
             pos = np.nonzero(self.classId == c)
             id_kept = np.random.randint(len(pos))
             accuracy_larger_half_keep_nojoin[pos[id_kept]] = True
     
     V_prun_remove = self.V[accuracy_larger_half]
     W_prun_remove = self.W[accuracy_larger_half]
     classId_prun_remove = self.classId[accuracy_larger_half]
     numSample_prun_remove = self.counter[accuracy_larger_half]
     
     W_prun_keep = self.W[accuracy_larger_half_keep_nojoin]
     V_prun_keep = self.V[accuracy_larger_half_keep_nojoin]
     
     classId_prun_keep = self.classId[accuracy_larger_half_keep_nojoin]
     numSample_prun_keep = self.classId[accuracy_larger_half_keep_nojoin]
     
     if newVerPredict == True:
         result_prun_remove = predict_with_probability(V_prun_remove, W_prun_remove, classId_prun_remove, numSample_prun_remove, XlT, XuT, patClassIdTest, self.gamma, self.oper)
         result_prun_keep_nojoin = predict_with_probability(V_prun_keep, W_prun_keep, classId_prun_keep, numSample_prun_keep, XlT, XuT, patClassIdTest, self.gamma, self.oper)
     else:
         result_prun_remove = predict(V_prun_remove, W_prun_remove, classId_prun_remove, XlT, XuT, patClassIdTest, self.gamma, self.oper)
         result_prun_keep_nojoin = predict(V_prun_keep, W_prun_keep, classId_prun_keep, XlT, XuT, patClassIdTest, self.gamma, self.oper)
     
     if (result_prun_remove.summis <= result_prun_keep_nojoin.summis):
         self.V = V_prun_remove
         self.W = W_prun_remove
         self.classId = classId_prun_remove
         self.counter = numSample_prun_remove
     else:
         self.V = V_prun_keep
         self.W = W_prun_keep
         self.classId = classId_prun_keep
         self.counter = numSample_prun_keep