def predict(self, Xl_Test, Xu_Test, patClassIdTest):
        """
        Perform classification

            result = predict(Xl_Test, Xu_Test, patClassIdTest)

        INPUT:
            Xl_Test             Test data lower bounds (rows = objects, columns = features)
            Xu_Test             Test data upper bounds (rows = objects, columns = features)
            patClassIdTest	     Test data class labels (crisp)

        OUTPUT:
            result        A object with Bunch datatype containing all results as follows:
                          + summis           Number of misclassified objects
                          + misclass         Binary error map
                          + sumamb           Number of objects with maximum membership in more than one class
                          + out              Soft class memberships
                          + mem              Hyperbox memberships
        """
        #Xl_Test, Xu_Test = delete_const_dims(Xl_Test, Xu_Test)
        # Normalize testing dataset if training datasets were normalized
        if len(self.mins) > 0:
            noSamples = Xl_Test.shape[0]
            Xl_Test = self.loLim + (self.hiLim - self.loLim) * (
                Xl_Test - np.ones((noSamples, 1)) * self.mins) / (np.ones(
                    (noSamples, 1)) * (self.maxs - self.mins))
            Xu_Test = self.loLim + (self.hiLim - self.loLim) * (
                Xu_Test - np.ones((noSamples, 1)) * self.mins) / (np.ones(
                    (noSamples, 1)) * (self.maxs - self.mins))

            if Xl_Test.min() < self.loLim or Xu_Test.min(
            ) < self.loLim or Xl_Test.max() > self.hiLim or Xu_Test.max(
            ) > self.hiLim:
                print('Test sample falls outside', self.loLim, '-', self.hiLim,
                      'interval')
                print('Number of original samples = ', noSamples)

                # only keep samples within the interval loLim-hiLim
                indXl_good = np.where((Xl_Test >= self.loLim).all(axis=1)
                                      & (Xl_Test <= self.hiLim).all(axis=1))[0]
                indXu_good = np.where((Xu_Test >= self.loLim).all(axis=1)
                                      & (Xu_Test <= self.hiLim).all(axis=1))[0]
                indKeep = np.intersect1d(indXl_good, indXu_good)

                Xl_Test = Xl_Test[indKeep, :]
                Xu_Test = Xu_Test[indKeep, :]

                print('Number of kept samples =', Xl_Test.shape[0])
                #return

        # do classification
        result = None

        if Xl_Test.shape[0] > 0:
            result = predict(np.minimum(self.V, self.W),
                             np.maximum(self.V, self.W), self.classId, Xl_Test,
                             Xu_Test, patClassIdTest, self.gamma, self.oper)

        return result
Ejemplo n.º 2
0
    def predict(self, Xl_Test, Xu_Test, patClassIdTest, newVer = True):
        """
        Perform classification

            result = predict(Xl_Test, Xu_Test, patClassIdTest)

        INPUT:
            Xl_Test             Test data lower bounds (rows = objects, columns = features)
            Xu_Test             Test data upper bounds (rows = objects, columns = features)
            patClassIdTest	     Test data class labels (crisp)
            newVer              + False: Don't use an additional criterion for predicting
                                + True : Using an additional criterion for predicting in the case of the same membership value

        OUTPUT:
            result        A object with Bunch datatype containing all results as follows:
                          + summis           Number of misclassified objects
                          + misclass         Binary error map
                          + numSampleInBoundary     The number of samples in decision boundary
                          + predicted_class   Predicted class
        """
        #Xl_Test, Xu_Test = delete_const_dims(Xl_Test, Xu_Test)
        # Normalize testing dataset if training datasets were normalized
        if len(self.mins) > 0:
            noSamples = Xl_Test.shape[0]
            Xl_Test = self.loLim + (self.hiLim - self.loLim) * (Xl_Test - np.ones((noSamples, 1)) * self.mins) / (np.ones((noSamples, 1)) * (self.maxs - self.mins))
            Xu_Test = self.loLim + (self.hiLim - self.loLim) * (Xu_Test - np.ones((noSamples, 1)) * self.mins) / (np.ones((noSamples, 1)) * (self.maxs - self.mins))

            if Xl_Test.min() < self.loLim or Xu_Test.min() < self.loLim or Xl_Test.max() > self.hiLim or Xu_Test.max() > self.hiLim:
                print('Test sample falls outside', self.loLim, '-', self.hiLim, 'interval')
                print('Number of original samples = ', noSamples)

                # only keep samples within the interval loLim-hiLim
                indXl_good = np.where((Xl_Test >= self.loLim).all(axis = 1) & (Xl_Test <= self.hiLim).all(axis = 1))[0]
                indXu_good = np.where((Xu_Test >= self.loLim).all(axis = 1) & (Xu_Test <= self.hiLim).all(axis = 1))[0]
                indKeep = np.intersect1d(indXl_good, indXu_good)

                Xl_Test = Xl_Test[indKeep, :]
                Xu_Test = Xu_Test[indKeep, :]

                print('Number of kept samples =', Xl_Test.shape[0])
                #return

        # do classification
        result = None

        if Xl_Test.shape[0] > 0:
            if newVer:
                result = predict_with_probability(self.V, self.W, self.classId, self.counter, Xl_Test, Xu_Test, patClassIdTest, self.gamma, self.oper)
            else:
                result = predict(self.V, self.W, self.classId, Xl_Test, Xu_Test, patClassIdTest, self.gamma, self.oper)
                
            self.predicted_class = np.array(result.predicted_class, np.int)

        return result
Ejemplo n.º 3
0
    def training(self, partitionedXtr):
        """
        Training a base classifier using K-fold cross-validation. This method is used when the input data are preprocessed and partitioned into k parts
        
        INPUT
            partitionedXtr      An numpy array contains k sub-arrays, in which each subarray is Bunch datatype:
                                + lower:    lower bounds
                                + upper:    upper bounds
                                + label:    class labels
                                partitionedXtr should be normalized (if needed) beforehand using this function
                                
        OUTPUT
            baseClassifier     base classifier was validated using K-fold cross-validation
        """
        baseClassifier = None
        minEr = 2
        for k in range(self.numFold):
            classifier_tmp = AccelBatchGFMM(self.gamma, self.teta, self.bthres,
                                            self.simil, self.sing, False,
                                            self.oper, False)
            classifier_tmp.fit(partitionedXtr[k].lower,
                               partitionedXtr[k].upper,
                               partitionedXtr[k].label)

            # Create the validation set being the remaining training data
            for l in range(self.numFold):
                if l == k:
                    continue
                else:
                    if (k == 0 and l == 1) or (l == 0 and k != 0):
                        lower_valid = partitionedXtr[l].lower
                        upper_valid = partitionedXtr[l].upper
                        label_valid = partitionedXtr[l].label
                    else:
                        lower_valid = np.concatenate(
                            (lower_valid, partitionedXtr[l].lower), axis=0)
                        upper_valid = np.concatenate(
                            (upper_valid, partitionedXtr[l].upper), axis=0)
                        label_valid = np.concatenate(
                            (label_valid, partitionedXtr[l].label))

            # validate the trained model
            rest = predict(classifier_tmp.V, classifier_tmp.W,
                           classifier_tmp.classId, lower_valid, upper_valid,
                           label_valid, self.gamma, self.oper)
            er = rest.summis / len(label_valid)

            if er < minEr:
                minEr = er
                baseClassifier = classifier_tmp

        return baseClassifier
    def pruning(self, X_Val, classId_Val):
        """
        prunning routine for GFMM classifier - Hyperboxes having the number of corrected patterns lower than that of uncorrected samples are prunned
        
        INPUT
            X_Val           Validation data
            ClassId_Val     Validation data class labels (crisp)
            
        OUTPUT
            Lower and upperbounds (V and W), classId, cardin are retained
        """
        # test the model on validation data
        result = predict(self.V, self.W, self.classId, X_Val, X_Val,
                         classId_Val, self.gamma, self.oper)
        mem = result.mem

        # find indexes of hyperboxes corresponding to max memberships for all validation patterns
        indmax = mem.argmax(axis=1)

        numBoxes = self.V.shape[0]
        corrinc = np.zeros((numBoxes, 2))

        # for each hyperbox calculate the number of validation patterns classified correctly and incorrectly
        for ii in range(numBoxes):
            sampleLabelsInBox = classId_Val[indmax == ii]
            if len(sampleLabelsInBox) > 0:
                corrinc[ii, 0] = np.sum(sampleLabelsInBox == self.classId[ii])
                corrinc[ii, 1] = len(sampleLabelsInBox) - corrinc[ii, 0]

        # retain only the hyperboxes which classify at least the same number of patterns correctly as incorrectly
        indRetainedBoxes = np.nonzero(corrinc[:, 0] > corrinc[:, 1])[0]

        self.V = self.V[indRetainedBoxes, :]
        self.W = self.W[indRetainedBoxes, :]
        self.classId = self.classId[indRetainedBoxes]
        self.cardin = self.cardin[indRetainedBoxes]

        return self
    def fit(self, X_l, X_u, patClassId):
        """
        Training the classifier

         Xl             Input data lower bounds (rows = objects, columns = features)
         Xu             Input data upper bounds (rows = objects, columns = features)
         patClassId     Input data class labels (crisp). patClassId[i] = 0 corresponds to an unlabeled item

        """
        print('--Online Learning--')

        if self.isNorm == True:
            X_l, X_u = self.dataPreprocessing(X_l, X_u)
        #X_l = X_l.astype(np.float32)
        #X_u = X_u.astype(np.float32)
        time_start = time.perf_counter()

        yX, xX = X_l.shape
        teta = self.teta

        mark = np.array([
            '*', 'o', 'x', '+', '.', ',', 'v', '^', '<', '>', '1', '2', '3',
            '4', '8', 's', 'p', 'P', 'h', 'H', 'X', 'D', '|', '_'
        ])
        mark_col = np.array(['r', 'g', 'b', 'y', 'c', 'm', 'k'])

        listLines = list()
        listInputSamplePoints = list()

        if self.isDraw:
            drawing_canvas = self.initializeCanvasGraph(
                "GFMM - Online learning", xX)

            if self.V.size > 0:
                # draw existed hyperboxes
                color_ = np.array(['k'] * len(self.classId), dtype=object)
                for c in range(len(self.classId)):
                    if self.classId[c] < len(mark_col):
                        color_[c] = mark_col[self.classId[c]]

                hyperboxes = drawbox(self.V[:, 0:np.minimum(xX, 3)],
                                     self.W[:, 0:np.minimum(xX, 3)],
                                     drawing_canvas, color_)
                listLines.extend(hyperboxes)
                self.delay()

        self.misclass = 1

        while self.misclass > 0 and teta >= self.tMin:
            # for each input sample
            for i in range(yX):
                classOfX = patClassId[i]
                # draw input samples
                if self.isDraw:
                    if i == 0 and len(listInputSamplePoints) > 0:
                        # reset input point drawing
                        for point in listInputSamplePoints:
                            point.remove()
                        listInputSamplePoints.clear()

                    color_ = 'k'
                    if classOfX < len(mark_col):
                        color_ = mark_col[classOfX]

                    if (X_l[i, :] == X_u[i, :]).all():
                        marker_ = 'd'
                        if classOfX < len(mark):
                            marker_ = mark[classOfX]

                        if xX == 2:
                            inputPoint = drawing_canvas.plot(X_l[i, 0],
                                                             X_l[i, 1],
                                                             color=color_,
                                                             marker=marker_)
                        else:
                            inputPoint = drawing_canvas.plot([X_l[i, 0]],
                                                             [X_l[i, 1]],
                                                             [X_l[i, 2]],
                                                             color=color_,
                                                             marker=marker_)

                        #listInputSamplePoints.append(inputPoint)
                    else:
                        inputPoint = drawbox(
                            np.asmatrix(X_l[i, 0:np.minimum(xX, 3)]),
                            np.asmatrix(X_u[i, 0:np.minimum(xX, 3)]),
                            drawing_canvas, color_)

                    listInputSamplePoints.append(inputPoint[0])
                    self.delay()

                if self.V.size == 0:  # no model provided - starting from scratch
                    self.V = np.array([X_l[0]])
                    self.W = np.array([X_u[0]])
                    self.classId = np.array([patClassId[0]])

                    if self.isDraw == True:
                        # draw hyperbox
                        box_color = 'k'
                        if patClassId[0] < len(mark_col):
                            box_color = mark_col[patClassId[0]]

                        hyperbox = drawbox(
                            np.asmatrix(self.V[0, 0:np.minimum(xX, 3)]),
                            np.asmatrix(self.W[0, 0:np.minimum(xX, 3)]),
                            drawing_canvas, box_color)
                        listLines.append(hyperbox[0])
                        self.delay()

                else:
                    id_lb_sameX = np.logical_or(
                        self.classId == classOfX,
                        self.classId == UNLABELED_CLASS)

                    if id_lb_sameX.any() == True:
                        V_sameX = self.V[id_lb_sameX]
                        W_sameX = self.W[id_lb_sameX]
                        lb_sameX = self.classId[id_lb_sameX]
                        id_range = np.arange(len(self.classId))
                        id_processing = id_range[id_lb_sameX]

                        b = memberG(X_l[i], X_u[i],
                                    np.minimum(V_sameX, W_sameX),
                                    np.maximum(V_sameX, W_sameX), self.gamma)
                        index = np.argsort(b)[::-1]
                        bSort = b[index]

                        if bSort[0] != 1 or (classOfX != lb_sameX[index[0]]
                                             and classOfX != UNLABELED_CLASS):
                            adjust = False
                            for j in id_processing[index]:
                                # test violation of max hyperbox size and class labels
                                if (classOfX == self.classId[j]
                                        or self.classId[j] == UNLABELED_CLASS
                                        or classOfX == UNLABELED_CLASS) and (
                                            (np.maximum(self.W[j], X_u[i]) -
                                             np.minimum(self.V[j], X_l[i])) <=
                                            teta).all() == True:
                                    # adjust the j-th hyperbox
                                    self.V[j] = np.minimum(self.V[j], X_l[i])
                                    self.W[j] = np.maximum(self.W[j], X_u[i])
                                    indOfWinner = j
                                    adjust = True
                                    if classOfX != UNLABELED_CLASS and self.classId[
                                            j] == UNLABELED_CLASS:
                                        self.classId[j] = classOfX

                                    if self.isDraw:
                                        # Handle drawing graph
                                        box_color = 'k'
                                        if self.classId[j] < len(mark_col):
                                            box_color = mark_col[
                                                self.classId[j]]

                                        try:
                                            listLines[j].remove()
                                        except:
                                            pass

                                        hyperbox = drawbox(
                                            np.asmatrix(
                                                self.V[j,
                                                       0:np.minimum(xX, 3)]),
                                            np.asmatrix(
                                                self.W[j,
                                                       0:np.minimum(xX, 3)]),
                                            drawing_canvas, box_color)
                                        listLines[j] = hyperbox[0]
                                        self.delay()

                                    break

                            # if i-th sample did not fit into any existing box, create a new one
                            if not adjust:
                                self.V = np.concatenate(
                                    (self.V, X_l[i].reshape(1, -1)), axis=0)
                                self.W = np.concatenate(
                                    (self.W, X_u[i].reshape(1, -1)), axis=0)
                                self.classId = np.concatenate(
                                    (self.classId, [classOfX]))

                                if self.isDraw:
                                    # handle drawing graph
                                    box_color = 'k'
                                    if self.classId[-1] < len(mark_col):
                                        box_color = mark_col[self.classId[-1]]

                                    hyperbox = drawbox(
                                        np.asmatrix(X_l[i,
                                                        0:np.minimum(xX, 3)]),
                                        np.asmatrix(X_u[i,
                                                        0:np.minimum(xX, 3)]),
                                        drawing_canvas, box_color)
                                    listLines.append(hyperbox[0])
                                    self.delay()

                            elif self.V.shape[0] > 1:
                                for ii in range(self.V.shape[0]):
                                    if ii != indOfWinner and (
                                            self.classId[ii] !=
                                            self.classId[indOfWinner]
                                            or self.classId[indOfWinner]
                                            == UNLABELED_CLASS):
                                        caseDim = hyperboxOverlapTest(
                                            self.V, self.W, indOfWinner,
                                            ii)  # overlap test

                                        if caseDim.size > 0:
                                            self.V, self.W = hyperboxContraction(
                                                self.V, self.W, caseDim, ii,
                                                indOfWinner)
                                            if self.isDraw:
                                                # Handle graph drawing
                                                boxii_color = boxwin_color = 'k'
                                                if self.classId[ii] < len(
                                                        mark_col):
                                                    boxii_color = mark_col[
                                                        self.classId[ii]]

                                                if self.classId[
                                                        indOfWinner] < len(
                                                            mark_col):
                                                    boxwin_color = mark_col[
                                                        self.
                                                        classId[indOfWinner]]

                                                try:
                                                    listLines[ii].remove()
                                                    listLines[
                                                        indOfWinner].remove()
                                                except:
                                                    pass

                                                hyperboxes = drawbox(
                                                    self.V[
                                                        [ii, indOfWinner],
                                                        0:np.minimum(xX, 3)],
                                                    self.W[
                                                        [ii, indOfWinner],
                                                        0:np.minimum(xX, 3)],
                                                    drawing_canvas, [
                                                        boxii_color,
                                                        boxwin_color
                                                    ])
                                                listLines[ii] = hyperboxes[0]
                                                listLines[
                                                    indOfWinner] = hyperboxes[
                                                        1]
                                                self.delay()

                    else:
                        self.V = np.concatenate(
                            (self.V, X_l[i].reshape(1, -1)), axis=0)
                        self.W = np.concatenate(
                            (self.W, X_u[i].reshape(1, -1)), axis=0)
                        self.classId = np.concatenate(
                            (self.classId, [classOfX]))

                        if self.isDraw:
                            # handle drawing graph
                            box_color = 'k'
                            if self.classId[-1] < len(mark_col):
                                box_color = mark_col[self.classId[-1]]

                            hyperbox = drawbox(
                                np.asmatrix(X_l[i, 0:np.minimum(xX, 3)]),
                                np.asmatrix(X_u[i, 0:np.minimum(xX, 3)]),
                                drawing_canvas, box_color)
                            listLines.append(hyperbox[0])
                            self.delay()

            teta = teta * 0.9
            if teta >= self.tMin:
                result = predict(self.V, self.W, self.classId, X_l, X_u,
                                 patClassId, self.gamma, self.oper)
                self.misclass = result.summis

        # Draw last result


#        if self.isDraw == True:
#            # Handle drawing graph
#            drawing_canvas.cla()
#            color_ = np.empty(len(self.classId), dtype = object)
#            for c in range(len(self.classId)):
#                color_[c] = mark_col[self.classId[c]]
#
#            drawbox(self.V[:, 0:np.minimum(xX, 3)], self.W[:, 0:np.minimum(xX, 3)], drawing_canvas, color_)
#            self.delay()
#
#        if self.isDraw:
#            plt.show()

        time_end = time.perf_counter()
        self.elapsed_training_time = time_end - time_start

        return self
Ejemplo n.º 6
0
    def training(self, X_tr, X_val, isDeleteContainedHyperbox=True):
        """
        Training a base classifier using K-fold cross-validation. This method is used when the input data are preprocessed and partitioned into k parts
        
        INPUT
            X_tr       An object contains training data with the Bunch datatype, its attributes:
                        + lower:    lower bounds
                        + upper:    upper bounds
                        + label:    class labels
                        
            X_val      An object contains validation data with the Bunch datatype, its attributes:
                        + lower:    lower bounds
                        + upper:    upper bounds
                        + label:    class labels
                    X_tr, X_val should be normalized (if needed) beforehand using this function
        
            isDeleteContainedHyperbox   Identify if hyperboxes contained in other hyperboxes are discarded or not?
        """
        V_train = X_tr.lower
        W_train = X_tr.upper
        classId_train = X_tr.label

        V_val = X_val.lower
        W_val = X_val.upper
        classId_val = X_val.label

        bthres = self.bthres
        self.numHyperboxes = 0

        N = int(self.numClassifier / 2) + 1
        delta_thres = (self.bthres - self.bthres_min) / N

        minEr_Tr = 2
        minEr_Val = 2
        opt_Tr = None
        opt_Val = None

        for k in range(N):
            classifier_Tr = AccelBatchGFMM(self.gamma, self.teta, bthres,
                                           self.simil, self.sing, False,
                                           self.oper, False)
            classifier_Tr.fit(V_train, W_train, classId_train)

            classifier_Val = AccelBatchGFMM(self.gamma, self.teta, bthres,
                                            self.simil, self.sing, False,
                                            self.oper, False)
            classifier_Val.fit(V_val, W_val, classId_val)

            rest_Tr = predict(classifier_Tr.V, classifier_Tr.W,
                              classifier_Tr.classId, V_val, W_val, classId_val,
                              self.gamma, self.oper)
            rest_Val = predict(classifier_Val.V, classifier_Val.W,
                               classifier_Val.classId, V_train, W_train,
                               classId_train, self.gamma, self.oper)

            err_Tr = rest_Tr.summis / len(classifier_Val.classId)
            err_Val = rest_Val.summis / len(classifier_Tr.classId)

            if err_Tr < minEr_Tr:
                minEr_Tr = err_Tr
                opt_Tr = classifier_Tr

            if err_Val < minEr_Val:
                minEr_Val = err_Val
                opt_Val = classifier_Val

            V_train = classifier_Tr.V
            W_train = classifier_Tr.W
            classId_train = classifier_Tr.classId

            V_val = classifier_Val.V
            W_val = classifier_Val.W
            classId_val = classifier_Val.classId

            bthres = bthres - delta_thres

        self.V = np.concatenate((opt_Tr.V, opt_Val.V), axis=0)
        self.W = np.concatenate((opt_Tr.W, opt_Val.W), axis=0)
        self.classId = np.concatenate((opt_Tr.classId, opt_Val.classId))

        if isDeleteContainedHyperbox == True:
            self.removeContainedHyperboxes()

        self.overlapResolve()

        # training using AGGLO-2
        combClassifier = AccelBatchGFMM(self.gamma, self.teta, self.bthres_min,
                                        self.simil, self.sing, False,
                                        self.oper, False)
        combClassifier.fit(self.V, self.W, self.classId)

        self.V = combClassifier.V
        self.W = combClassifier.W
        self.classId = combClassifier.classId
        self.cardin = combClassifier.cardin
        self.clusters = combClassifier.clusters
        self.numHyperboxes = len(self.classId)

        return self
Ejemplo n.º 7
0
    def pruning_val(self,
                    XlT,
                    XuT,
                    patClassIdTest,
                    accuracy_threshold=0.5,
                    newVerPredict=True):
        """
        pruning handling based on validation (validation routine) with hyperboxes stored in self. V, W, classId
    
          result = pruning_val(XlT,XuT,patClassIdTest)
    
            INPUT
              XlT               Test data lower bounds (rows = objects, columns = features)
              XuT               Test data upper bounds (rows = objects, columns = features)
              patClassIdTest    Test data class labels (crisp)
              accuracy_threshold  The minimum accuracy for each hyperbox
              newVerPredict     + True: using Manhattan distance in addition to fuzzy membership
                                + False: No using Manhattan distance
        """

        #initialization
        yX = XlT.shape[0]
        mem = np.zeros((yX, self.V.shape[0]))
        no_predicted_samples_hyperboxes = np.zeros((len(self.classId), 2))
        # classifications
        for i in range(yX):
            mem[i, :] = memberG(
                XlT[i, :], XuT[i, :], self.V, self.W, self.gamma,
                self.oper)  # calculate memberships for all hyperboxes
            bmax = mem[i, :].max()  # get max membership value
            maxVind = np.nonzero(mem[i, :] == bmax)[
                0]  # get indexes of all hyperboxes with max membership

            if len(maxVind) == 1:
                # Only one hyperbox with the highest membership function

                if self.classId[maxVind[0]] == patClassIdTest[i]:
                    no_predicted_samples_hyperboxes[
                        maxVind[0],
                        0] = no_predicted_samples_hyperboxes[maxVind[0], 0] + 1
                else:
                    no_predicted_samples_hyperboxes[
                        maxVind[0],
                        1] = no_predicted_samples_hyperboxes[maxVind[0], 1] + 1
            else:
                # More than one hyperbox with highest membership => random choosing
                id_min = maxVind[np.random.randint(len(maxVind))]

                if self.classId[id_min] != patClassIdTest[
                        i] and patClassIdTest[i] != 0:
                    no_predicted_samples_hyperboxes[
                        id_min,
                        1] = no_predicted_samples_hyperboxes[id_min, 1] + 1
                else:
                    no_predicted_samples_hyperboxes[
                        id_min,
                        0] = no_predicted_samples_hyperboxes[id_min, 0] + 1

        # pruning handling based on the validation results
        tmp_no_box = no_predicted_samples_hyperboxes.shape[0]
        accuracy_larger_half = np.zeros(tmp_no_box).astype(np.bool)
        accuracy_larger_half_keep_nojoin = np.zeros(tmp_no_box).astype(np.bool)
        for i in range(tmp_no_box):
            if (no_predicted_samples_hyperboxes[i, 0] +
                    no_predicted_samples_hyperboxes[i, 1] !=
                    0) and no_predicted_samples_hyperboxes[i, 0] / (
                        no_predicted_samples_hyperboxes[i, 0] +
                        no_predicted_samples_hyperboxes[i, 1]
                    ) >= accuracy_threshold:
                accuracy_larger_half[i] = True
                accuracy_larger_half_keep_nojoin[i] = True
            if (no_predicted_samples_hyperboxes[i, 0] +
                    no_predicted_samples_hyperboxes[i, 1] == 0):
                accuracy_larger_half_keep_nojoin[i] = True

        # keep one hyperbox for class prunned all
        current_classes = np.unique(self.classId)
        class_tmp = self.classId[accuracy_larger_half]
        for c in current_classes:
            if c not in class_tmp:
                pos = np.nonzero(self.classId == c)[0]
                id_kept = np.random.randint(len(pos))
                # keep pos[id_kept]
                accuracy_larger_half[pos[id_kept]] = True

        V_prun_remove = self.V[accuracy_larger_half]
        W_prun_remove = self.W[accuracy_larger_half]
        classId_prun_remove = self.classId[accuracy_larger_half]

        W_prun_keep = self.W[accuracy_larger_half_keep_nojoin]
        V_prun_keep = self.V[accuracy_larger_half_keep_nojoin]
        classId_prun_keep = self.classId[accuracy_larger_half_keep_nojoin]

        if newVerPredict == True:
            result_prun_remove = predict_with_manhattan(
                V_prun_remove, W_prun_remove, classId_prun_remove, XlT, XuT,
                patClassIdTest, self.gamma, self.oper)
            result_prun_keep_nojoin = predict_with_manhattan(
                V_prun_keep, W_prun_keep, classId_prun_keep, XlT, XuT,
                patClassIdTest, self.gamma, self.oper)
        else:
            result_prun_remove = predict(V_prun_remove, W_prun_remove,
                                         classId_prun_remove, XlT, XuT,
                                         patClassIdTest, self.gamma, self.oper)
            result_prun_keep_nojoin = predict(V_prun_keep, W_prun_keep,
                                              classId_prun_keep, XlT, XuT,
                                              patClassIdTest, self.gamma,
                                              self.oper)

        if (result_prun_remove.summis <= result_prun_keep_nojoin.summis):
            self.V = V_prun_remove
            self.W = W_prun_remove
            self.classId = classId_prun_remove
        else:
            self.V = V_prun_keep
            self.W = W_prun_keep
            self.classId = classId_prun_keep
Ejemplo n.º 8
0
 def pruning_val(self, XlT, XuT, patClassIdTest, accuracy_threshold = 0.5, newVerPredict = True):
     """
     pruning handling based on validation (validation routine) with hyperboxes stored in self. V, W, classId
 
       result = pruning_val(XlT,XuT,patClassIdTest)
 
         INPUT
           XlT               Test data lower bounds (rows = objects, columns = features)
           XuT               Test data upper bounds (rows = objects, columns = features)
           patClassIdTest    Test data class labels (crisp)
           accuracy_threshold  The minimum accuracy for each hyperbox
           newVerPredict     + True: using probability formula for prediction in addition to fuzzy membership
                             + False: No using probability formula for prediction
     """
 
     #initialization
     yX = XlT.shape[0]
     no_predicted_samples_hyperboxes = np.zeros((len(self.classId), 2))
     # classifications
     for i in range(yX):
         mem = memberG(XlT[i, :], XuT[i, :], self.V, self.W, self.gamma, self.oper) # calculate memberships for all hyperboxes
         bmax = mem.max()	                                          # get max membership value
         maxVind = np.nonzero(mem == bmax)[0]                         # get indexes of all hyperboxes with max membership
         
         if len(maxVind) == 1:
             # Only one hyperbox with the highest membership function
             
             if self.classId[maxVind[0]] == patClassIdTest[i]:
                 no_predicted_samples_hyperboxes[maxVind[0], 0] = no_predicted_samples_hyperboxes[maxVind[0], 0] + 1                 
             else:
                 no_predicted_samples_hyperboxes[maxVind[0], 1] = no_predicted_samples_hyperboxes[maxVind[0], 1] + 1
         else:
             if newVerPredict == True:
                 cls_same_mem = np.unique(self.classId[maxVind])
                 if len(cls_same_mem) > 1:
                     is_find_prob_val = True
                     if bmax == 1:
                         id_box_with_one_sample = np.nonzero(self.counter[maxVind] == 1)[0]
                         if len(id_box_with_one_sample) > 0:
                             is_find_prob_val = False
                             id_min = random.choice(maxVind[id_box_with_one_sample])
                     
                     if is_find_prob_val == True:
                         sum_prod_denum = (mem[maxVind] * self.counter[maxVind]).sum()
                         max_prob = -1
                         pre_id_cls = None
                         for c in cls_same_mem:
                             id_cls = np.nonzero(self.classId[maxVind] == c)[0]
                             sum_pro_num = (mem[maxVind[id_cls]] * self.counter[maxVind[id_cls]]).sum()
                             tmp = sum_pro_num / sum_prod_denum
                             
                             if tmp > max_prob or (tmp == max_prob and pre_id_cls is not None and self.counter[maxVind[id_cls]].sum() > self.counter[maxVind[pre_id_cls]].sum()):
                                 max_prob = tmp
                                 pre_id_cls = id_cls
                                 id_min = random.choice(maxVind[id_cls])
                 else:
                     id_min = random.choice(maxVind)
             else:
                 # More than one hyperbox with highest membership => random choosing
                 id_min = maxVind[np.random.randint(len(maxVind))]
                     
             if self.classId[id_min] != patClassIdTest[i] and patClassIdTest[i] != UNLABELED_CLASS:
                 no_predicted_samples_hyperboxes[id_min, 1] = no_predicted_samples_hyperboxes[id_min, 1] + 1
             else:
                 no_predicted_samples_hyperboxes[id_min, 0] = no_predicted_samples_hyperboxes[id_min, 0] + 1
                 
     # pruning handling based on the validation results
     tmp_no_box = no_predicted_samples_hyperboxes.shape[0]
     accuracy_larger_half = np.zeros(tmp_no_box).astype(np.bool)
     accuracy_larger_half_keep_nojoin = np.zeros(tmp_no_box).astype(np.bool)
     for i in range(tmp_no_box):
         if (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1] != 0) and no_predicted_samples_hyperboxes[i, 0] / (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1]) >= accuracy_threshold:
             accuracy_larger_half[i] = True
             accuracy_larger_half_keep_nojoin[i] = True
         if (no_predicted_samples_hyperboxes[i, 0] + no_predicted_samples_hyperboxes[i, 1] == 0):
             accuracy_larger_half_keep_nojoin[i] = True
     
     # keep one hyperbox for class prunned all
     current_classes = np.unique(self.classId)
     class_tmp = self.classId[accuracy_larger_half]
     class_tmp_keep = self.classId[accuracy_larger_half_keep_nojoin]
     for c in current_classes:
         if c not in class_tmp:
             pos = np.nonzero(self.classId == c)
             id_kept = np.random.randint(len(pos))
             # keep pos[id_kept]
             accuracy_larger_half[pos[id_kept]] = True
         if c not in class_tmp_keep:
             pos = np.nonzero(self.classId == c)
             id_kept = np.random.randint(len(pos))
             accuracy_larger_half_keep_nojoin[pos[id_kept]] = True
     
     V_prun_remove = self.V[accuracy_larger_half]
     W_prun_remove = self.W[accuracy_larger_half]
     classId_prun_remove = self.classId[accuracy_larger_half]
     numSample_prun_remove = self.counter[accuracy_larger_half]
     
     W_prun_keep = self.W[accuracy_larger_half_keep_nojoin]
     V_prun_keep = self.V[accuracy_larger_half_keep_nojoin]
     
     classId_prun_keep = self.classId[accuracy_larger_half_keep_nojoin]
     numSample_prun_keep = self.classId[accuracy_larger_half_keep_nojoin]
     
     if newVerPredict == True:
         result_prun_remove = predict_with_probability(V_prun_remove, W_prun_remove, classId_prun_remove, numSample_prun_remove, XlT, XuT, patClassIdTest, self.gamma, self.oper)
         result_prun_keep_nojoin = predict_with_probability(V_prun_keep, W_prun_keep, classId_prun_keep, numSample_prun_keep, XlT, XuT, patClassIdTest, self.gamma, self.oper)
     else:
         result_prun_remove = predict(V_prun_remove, W_prun_remove, classId_prun_remove, XlT, XuT, patClassIdTest, self.gamma, self.oper)
         result_prun_keep_nojoin = predict(V_prun_keep, W_prun_keep, classId_prun_keep, XlT, XuT, patClassIdTest, self.gamma, self.oper)
     
     if (result_prun_remove.summis <= result_prun_keep_nojoin.summis):
         self.V = V_prun_remove
         self.W = W_prun_remove
         self.classId = classId_prun_remove
         self.counter = numSample_prun_remove
     else:
         self.V = V_prun_keep
         self.W = W_prun_keep
         self.classId = classId_prun_keep
         self.counter = numSample_prun_keep
Ejemplo n.º 9
0
                id_kept = np.random.randint(len(pos))
                accuracy_larger_half_keep_nojoin[pos[id_kept]] = True
        
        V_prun_remove = self.V[accuracy_larger_half]
        W_prun_remove = self.W[accuracy_larger_half]
        classId_prun_remove = self.classId[accuracy_larger_half]
        
        W_prun_keep = self.W[accuracy_larger_half_keep_nojoin]
        V_prun_keep = self.V[accuracy_larger_half_keep_nojoin]
        classId_prun_keep = self.classId[accuracy_larger_half_keep_nojoin]
        
        if newVerPredict == True:
            result_prun_remove = predict_with_manhattan(V_prun_remove, W_prun_remove, classId_prun_remove, XlT, XuT, patClassIdTest, self.gamma, self.oper)
            result_prun_keep_nojoin = predict_with_manhattan(V_prun_keep, W_prun_keep, classId_prun_keep, XlT, XuT, patClassIdTest, self.gamma, self.oper)
        else:
            result_prun_remove = predict(V_prun_remove, W_prun_remove, classId_prun_remove, XlT, XuT, patClassIdTest, self.gamma, self.oper)
            result_prun_keep_nojoin = predict(V_prun_keep, W_prun_keep, classId_prun_keep, XlT, XuT, patClassIdTest, self.gamma, self.oper)
        
        if (result_prun_remove.summis <= result_prun_keep_nojoin.summis):
            self.V = V_prun_remove
            self.W = W_prun_remove
            self.classId = classId_prun_remove
        else:
            self.V = V_prun_keep
            self.W = W_prun_keep
            self.classId = classId_prun_keep


if __name__ == '__main__':
    """
    INPUT parameters from command line
    def training(self, X_tr, X_val):
        """
        Training a base classifier using K-fold cross-validation. This method is used when the input data are preprocessed and partitioned into k parts
        
        INPUT
            X_tr       An object contains training data with the Bunch datatype, its attributes:
                        + lower:    lower bounds
                        + upper:    upper bounds
                        + label:    class labels
                        
            X_val      An object contains validation data with the Bunch datatype, its attributes:
                        + lower:    lower bounds
                        + upper:    upper bounds
                        + label:    class labels
                    X_tr, X_val should be normalized (if needed) beforehand using this function
        """
        V_train = X_tr.lower
        W_train = X_tr.upper
        classId_train = X_tr.label

        V_val = X_val.lower
        W_val = X_val.upper
        classId_val = X_val.label

        delta_thres = (self.bthres - self.bthres_min) / self.numClassifier
        bthres = self.bthres
        self.numHyperboxes = 0

        for k in range(self.numClassifier):
            classifier_Tr = AccelBatchGFMM(self.gamma, self.teta, bthres,
                                           self.simil, self.sing, False,
                                           self.oper, False)
            classifier_Tr.fit(V_train, W_train, classId_train)

            classifier_Val = AccelBatchGFMM(self.gamma, self.teta, bthres,
                                            self.simil, self.sing, False,
                                            self.oper, False)
            classifier_Val.fit(V_val, W_val, classId_val)

            rest_Tr = predict(classifier_Tr.V, classifier_Tr.W,
                              classifier_Tr.classId, V_val, W_val, classId_val,
                              self.gamma, self.oper)
            rest_Val = predict(classifier_Val.V, classifier_Val.W,
                               classifier_Val.classId, V_train, W_train,
                               classId_train, self.gamma, self.oper)

            err_Tr = rest_Tr.summis / len(classifier_Val.classId)
            err_Val = rest_Val.summis / len(classifier_Tr.classId)

            if err_Tr < err_Val:
                self.baseClassifiers[k] = classifier_Tr
            else:
                self.baseClassifiers[k] = classifier_Val

            self.numHyperboxes = self.numHyperboxes + len(
                self.baseClassifiers[k].classId)
            V_train = classifier_Tr.V
            W_train = classifier_Tr.W
            classId_train = classifier_Tr.classId

            V_val = classifier_Val.V
            W_val = classifier_Val.W
            classId_val = classifier_Val.classId

            bthres = bthres - delta_thres

        return self.baseClassifiers