예제 #1
0
    def training(self, partitionedXtr):
        """
        Training a base classifier using K-fold cross-validation. This method is used when the input data are preprocessed and partitioned into k parts
        
        INPUT
            partitionedXtr      An numpy array contains k sub-arrays, in which each subarray is Bunch datatype:
                                + lower:    lower bounds
                                + upper:    upper bounds
                                + label:    class labels
                                partitionedXtr should be normalized (if needed) beforehand using this function
                                
        OUTPUT
            baseClassifier     base classifier was validated using K-fold cross-validation
        """
        baseClassifier = None
        minEr = 2
        for k in range(self.numFold):
            classifier_tmp = AccelBatchGFMM(self.gamma, self.teta, self.bthres,
                                            self.simil, self.sing, False,
                                            self.oper, False)
            classifier_tmp.fit(partitionedXtr[k].lower,
                               partitionedXtr[k].upper,
                               partitionedXtr[k].label)

            # Create the validation set being the remaining training data
            for l in range(self.numFold):
                if l == k:
                    continue
                else:
                    if (k == 0 and l == 1) or (l == 0 and k != 0):
                        lower_valid = partitionedXtr[l].lower
                        upper_valid = partitionedXtr[l].upper
                        label_valid = partitionedXtr[l].label
                    else:
                        lower_valid = np.concatenate(
                            (lower_valid, partitionedXtr[l].lower), axis=0)
                        upper_valid = np.concatenate(
                            (upper_valid, partitionedXtr[l].upper), axis=0)
                        label_valid = np.concatenate(
                            (label_valid, partitionedXtr[l].label))

            # validate the trained model
            rest = predict(classifier_tmp.V, classifier_tmp.W,
                           classifier_tmp.classId, lower_valid, upper_valid,
                           label_valid, self.gamma, self.oper)
            er = rest.summis / len(label_valid)

            if er < minEr:
                minEr = er
                baseClassifier = classifier_tmp

        return baseClassifier
    def fit(self, Xl_onl, Xu_onl, patClassId_onl, Xl_off, Xu_off,
            patClassId_off):
        """
        Input data need to be normalized before using this function
        
        Xl_onl              Input data lower bounds (rows = objects, columns = features) for online learning
        Xu_onl              Input data upper bounds (rows = objects, columns = features) for online learning
        patClassId_onl      Input data class labels (crisp) for online learning
        
        Xl_off              Input data lower bounds (rows = objects, columns = features) for agglomerative learning
        Xu_off              Input data upper bounds (rows = objects, columns = features) for agglomerative learning
        patClassId_off      Input data class labels (crisp) for agglomerative learning
        """

        time_start = time.clock()
        # Perform agglomerative learning
        aggloClassifier = AccelBatchGFMM(self.gamma,
                                         self.teta_agglo,
                                         bthres=self.bthres,
                                         simil=self.simil,
                                         sing=self.sing,
                                         isDraw=self.isDraw,
                                         oper=self.oper,
                                         isNorm=False)
        aggloClassifier.fit(Xl_off, Xu_off, patClassId_off)
        self.offClassifier = Bunch(V=aggloClassifier.V,
                                   W=aggloClassifier.W,
                                   classId=aggloClassifier.classId)

        # Perform online learning
        onlClassifier = OnlineGFMM(self.gamma,
                                   self.teta_onl,
                                   self.teta_onl,
                                   isDraw=self.isDraw,
                                   oper=self.oper,
                                   isNorm=False,
                                   norm_range=[self.loLim, self.hiLim])
        onlClassifier.fit(Xl_onl, Xu_onl, patClassId_onl)
        self.onlClassifier = Bunch(V=onlClassifier.V,
                                   W=onlClassifier.W,
                                   classId=onlClassifier.classId)

        time_end = time.clock()
        self.elapsed_training_time = time_end - time_start

        return self
예제 #3
0
 def fit(self, Xl_onl, Xu_onl, patClassId_onl, Xl_off, Xu_off, patClassId_off, typeOfAgglo = 1):
     """
     The input data need to be normalized before using this function
     
     Xl_onl              Input data lower bounds (rows = objects, columns = features) for online learning
     Xu_onl              Input data upper bounds (rows = objects, columns = features) for online learning
     patClassId_onl      Input data class labels (crisp) for online learning
     
     Xl_off              Input data lower bounds (rows = objects, columns = features) for agglomerative learning
     Xu_off              Input data upper bounds (rows = objects, columns = features) for agglomerative learning
     patClassId_off      Input data class labels (crisp) for agglomerative learning
     
     typeOfAgglo         The used type of agglomerative learning algorithms
     """
     
     time_start = time.clock()
     
     # Perform agglomerative learning
     if typeOfAgglo == 1:
         aggloClassifier = AccelBatchGFMM(self.gamma, self.teta_agglo, bthres = self.bthres, simil = self.simil, sing = self.sing, isDraw = self.isDraw, oper = self.oper, isNorm = False)
     elif typeOfAgglo == 2:
         aggloClassifier = BatchGFMMV2(self.gamma, self.teta_agglo, bthres = self.bthres, simil = self.simil, sing = self.sing, isDraw = self.isDraw, oper = self.oper, isNorm = False)
     else:
         aggloClassifier = BatchGFMMV1(self.gamma, self.teta_agglo, bthres = self.bthres, simil = self.simil, sing = self.sing, isDraw = self.isDraw, oper = self.oper, isNorm = False)
         
     aggloClassifier.fit(Xl_off, Xu_off, patClassId_off)
     
     self.V = aggloClassifier.V
     self.W = aggloClassifier.W
     self.classId = aggloClassifier.classId
     
     # Perform online learning
     onlClassifier = OnlineGFMM(self.gamma, self.teta_onl, self.teta_onl, isDraw = self.isDraw, oper = self.oper, isNorm = False, norm_range = [self.loLim, self.hiLim], V = self.V, W = self.W, classId = self.classId)
     # training for online GFMM
     onlClassifier.fit(Xl_onl, Xu_onl, patClassId_onl)
     
     self.V = onlClassifier.V
     self.W = onlClassifier.W
     self.classId = onlClassifier.classId
     
     time_end = time.clock()
     self.elapsed_training_time = time_end - time_start
     
     return self
def running_agglo_2(xTr,
                    patClassIdTr,
                    xVal,
                    patClassIdVal,
                    teta=0.26,
                    sigma=0.5,
                    simil='short'):
    accelClassifier = AccelBatchGFMM(gamma=1,
                                     teta=teta,
                                     bthres=sigma,
                                     simil=simil,
                                     sing='max',
                                     isDraw=False,
                                     oper='min',
                                     isNorm=False)
    accelClassifier.fit(xTr, xTr, patClassIdTr)

    result = accelClassifier.predict(xVal, xVal, patClassIdVal)
    err = 1
    if result != None:
        err = result.summis / len(patClassIdVal)

    return err
예제 #5
0
    def training(self, X_tr, X_val, isDeleteContainedHyperbox=True):
        """
        Training a base classifier using K-fold cross-validation. This method is used when the input data are preprocessed and partitioned into k parts
        
        INPUT
            X_tr       An object contains training data with the Bunch datatype, its attributes:
                        + lower:    lower bounds
                        + upper:    upper bounds
                        + label:    class labels
                        
            X_val      An object contains validation data with the Bunch datatype, its attributes:
                        + lower:    lower bounds
                        + upper:    upper bounds
                        + label:    class labels
                    X_tr, X_val should be normalized (if needed) beforehand using this function
        
            isDeleteContainedHyperbox   Identify if hyperboxes contained in other hyperboxes are discarded or not?
        """
        V_train = X_tr.lower
        W_train = X_tr.upper
        classId_train = X_tr.label

        V_val = X_val.lower
        W_val = X_val.upper
        classId_val = X_val.label

        bthres = self.bthres
        self.numHyperboxes = 0

        N = int(self.numClassifier / 2) + 1
        delta_thres = (self.bthres - self.bthres_min) / N

        minEr_Tr = 2
        minEr_Val = 2
        opt_Tr = None
        opt_Val = None

        for k in range(N):
            classifier_Tr = AccelBatchGFMM(self.gamma, self.teta, bthres,
                                           self.simil, self.sing, False,
                                           self.oper, False)
            classifier_Tr.fit(V_train, W_train, classId_train)

            classifier_Val = AccelBatchGFMM(self.gamma, self.teta, bthres,
                                            self.simil, self.sing, False,
                                            self.oper, False)
            classifier_Val.fit(V_val, W_val, classId_val)

            rest_Tr = predict(classifier_Tr.V, classifier_Tr.W,
                              classifier_Tr.classId, V_val, W_val, classId_val,
                              self.gamma, self.oper)
            rest_Val = predict(classifier_Val.V, classifier_Val.W,
                               classifier_Val.classId, V_train, W_train,
                               classId_train, self.gamma, self.oper)

            err_Tr = rest_Tr.summis / len(classifier_Val.classId)
            err_Val = rest_Val.summis / len(classifier_Tr.classId)

            if err_Tr < minEr_Tr:
                minEr_Tr = err_Tr
                opt_Tr = classifier_Tr

            if err_Val < minEr_Val:
                minEr_Val = err_Val
                opt_Val = classifier_Val

            V_train = classifier_Tr.V
            W_train = classifier_Tr.W
            classId_train = classifier_Tr.classId

            V_val = classifier_Val.V
            W_val = classifier_Val.W
            classId_val = classifier_Val.classId

            bthres = bthres - delta_thres

        self.V = np.concatenate((opt_Tr.V, opt_Val.V), axis=0)
        self.W = np.concatenate((opt_Tr.W, opt_Val.W), axis=0)
        self.classId = np.concatenate((opt_Tr.classId, opt_Val.classId))

        if isDeleteContainedHyperbox == True:
            self.removeContainedHyperboxes()

        self.overlapResolve()

        # training using AGGLO-2
        combClassifier = AccelBatchGFMM(self.gamma, self.teta, self.bthres_min,
                                        self.simil, self.sing, False,
                                        self.oper, False)
        combClassifier.fit(self.V, self.W, self.classId)

        self.V = combClassifier.V
        self.W = combClassifier.W
        self.classId = combClassifier.classId
        self.cardin = combClassifier.cardin
        self.clusters = combClassifier.clusters
        self.numHyperboxes = len(self.classId)

        return self
예제 #6
0
    def fit(self, X_l, X_u, patClassId, typeOfAgglo=1):
        """
        Xl              Input data lower bounds (rows = objects, columns = features)
        Xu              Input data upper bounds (rows = objects, columns = features)
        patClassId      Input data class labels (crisp)
        typeOfAgglo     Type of agglomerative learning
                         + 1: Accelerated agglomerative learning AGGLO-2
                         + 2: Full batch learning slower version
                         + 3: Full batch learning faster version
        """
        if self.isNorm == True:
            X_l, X_u = self.dataPreprocessing(X_l, X_u)

        time_start = time.clock()
        # Perform online learning
        onlClassifier = OnlineGFMM(self.gamma,
                                   self.teta_onl,
                                   self.teta_onl,
                                   isDraw=self.isDraw,
                                   oper=self.oper,
                                   isNorm=False,
                                   norm_range=[self.loLim, self.hiLim],
                                   V=self.V,
                                   W=self.W,
                                   classId=self.classId)
        # training for online GFMM
        onlClassifier.fit(X_l, X_u, patClassId)

        self.V = onlClassifier.V
        self.W = onlClassifier.W
        self.classId = onlClassifier.classId
        # print('No. hyperboxes after online learning:', len(self.classId))
        self.num_hyperbox_after_online = len(self.classId)

        # Perform agglomerative learning
        if typeOfAgglo == 1:
            aggloClassifier = AccelBatchGFMM(self.gamma,
                                             self.teta_agglo,
                                             bthres=self.bthres,
                                             simil=self.simil,
                                             sing=self.sing,
                                             isDraw=self.isDraw,
                                             oper=self.oper,
                                             isNorm=False)
        elif typeOfAgglo == 2:
            aggloClassifier = BatchGFMMV2(self.gamma,
                                          self.teta_agglo,
                                          bthres=self.bthres,
                                          simil=self.simil,
                                          sing=self.sing,
                                          isDraw=self.isDraw,
                                          oper=self.oper,
                                          isNorm=False)
        else:
            aggloClassifier = BatchGFMMV1(self.gamma,
                                          self.teta_agglo,
                                          bthres=self.bthres,
                                          simil=self.simil,
                                          sing=self.sing,
                                          isDraw=self.isDraw,
                                          oper=self.oper,
                                          isNorm=False)

        aggloClassifier.fit(self.V, self.W, self.classId)

        self.V = aggloClassifier.V
        self.W = aggloClassifier.W
        self.classId = aggloClassifier.classId
        #print('No. hyperboxes after the agglomerative learning:', len(self.classId))
        self.num_hyperbox_after_agglo = len(self.classId)

        time_end = time.clock()
        self.elapsed_training_time = time_end - time_start

        return self
        numhyperbox_midmin_si_save = np.array([], dtype=np.int64)
        training_time_midmin_si_save = np.array([])
        testing_error_midmin_si_save = np.array([])

        for simil_thres in np.arange(0.02, 1, 0.02):
            simil_save = np.append(simil_save, simil_thres)
            accelClassifier = AccelBatchGFMM(gamma=1,
                                             teta=teta,
                                             bthres=simil_thres,
                                             simil='short',
                                             sing='max',
                                             isDraw=False,
                                             oper='min',
                                             isNorm=False)
            accelClassifier.fit(Xtr, Xtr, patClassIdTr)

            training_time_short_si_save = np.append(
                training_time_short_si_save,
                accelClassifier.elapsed_training_time)
            numhyperbox_short_si_save = np.append(numhyperbox_short_si_save,
                                                  len(accelClassifier.classId))

            result = accelClassifier.predict(Xtest, Xtest, patClassIdTest)
            if result != None:
                numTestSample = Xtest.shape[0]
                err = result.summis / numTestSample
                testing_error_short_si_save = np.append(
                    testing_error_short_si_save, err)

        for simil_thres in np.arange(0.02, 1, 0.02):
            result = olnClassifier.predict(Xtest, Xtest, patClassIdTest)
            if result != None:
                err = result.summis / numTestSample
                testing_error_online_save = np.append(
                    testing_error_online_save, err)

            #  Do accelerated learning
            accelClassifier = AccelBatchGFMM(gamma=1,
                                             teta=teta,
                                             bthres=simil_thres,
                                             simil='short',
                                             sing='max',
                                             isDraw=False,
                                             oper='min',
                                             isNorm=False)
            accelClassifier.fit(Xtr_time_i, Xtr_time_i, pathClassIdTr_time_i)

            training_time_agglo_save = np.append(
                training_time_agglo_save,
                accelClassifier.elapsed_training_time)
            numhyperbox_agglo_save = np.append(numhyperbox_agglo_save,
                                               len(accelClassifier.classId))

            result = accelClassifier.predict(Xtest, Xtest, patClassIdTest)
            if result != None:
                err = result.summis / numTestSample
                testing_error_agglo_save = np.append(testing_error_agglo_save,
                                                     err)

            # Do online training before agglo
            olnAggloClassifier = OnlineAggloGFMM(gamma=1,
     
     start_t = time.perf_counter()
     olnClassifier.pruning_val(validationData, validationData, validationLabel)
     end_t = time.perf_counter()
     
     training_time_online_gfmm_save = np.append(training_time_online_gfmm_save, olnClassifier.elapsed_training_time + (end_t - start_t))
     numhyperbox_online_gfmm_save = np.append(numhyperbox_online_gfmm_save, len(olnClassifier.classId))            
         
     result = olnClassifier.predict(testingData, testingData, testingLabel)
     if result != None:
         err = np.round(result.summis / numTestSample * 100, 3)
         testing_error_online_gfmm_save = np.append(testing_error_online_gfmm_save, err)
     
     # agglo-2
     accelClassifier = AccelBatchGFMM(gamma = 1, teta = tetaAGGLO2, bthres = 0, simil = 'long', sing = 'min', isDraw = False, oper = 'min', isNorm = False)
     accelClassifier.fit(trainingData, trainingData, trainingLabel)
     
     numhyperbox_before_prun_accel_agglo_save = np.append(numhyperbox_before_prun_accel_agglo_save, len(accelClassifier.classId))
      
     result = accelClassifier.predict(testingData, testingData, testingLabel)
 
     if result != None:
         err = np.round(result.summis / numTestSample * 100, 3)
         testing_error_before_prun_accel_agglo_save = np.append(testing_error_before_prun_accel_agglo_save, err)                
     
     start_t = time.perf_counter()
     accelClassifier.pruning_val(validationData, validationData, validationLabel)
     end_t = time.perf_counter()
     
     training_time_accel_agglo_save = np.append(training_time_accel_agglo_save, accelClassifier.elapsed_training_time + (end_t - start_t))
     numhyperbox_accel_agglo_save = np.append(numhyperbox_accel_agglo_save, len(accelClassifier.classId))
    def fit(self,
            X_l,
            X_u,
            patClassId,
            typeOfSplitting=1,
            isRemoveContainedHyperboxes=True):
        """
        Training the ensemble model at decision level. This method is used when the input data are not partitioned into k parts
        
        INPUT
                X_l                 Input data lower bounds (rows = objects, columns = features)
                X_u                 Input data upper bounds (rows = objects, columns = features)
                patClassId          Input data class labels (crisp)
                typeOfSplitting     The way of splitting datasets
                                        + 1: random split on whole dataset - do not care the classes
                                        + otherwise: random split according to each class label
                isRemoveContainedHyperboxes:  Identify if hyperboxes contained in other hyperboxes are discarded or not?
        """

        X_l, X_u = self.dataPreprocessing(X_l, X_u)

        time_start = time.clock()

        for i in range(self.numClassifier):

            if typeOfSplitting == 1:
                partitionedXtr = splitDatasetRndToKPart(
                    X_l, X_u, patClassId, self.numFold)
            else:
                partitionedXtr = splitDatasetRndClassBasedToKPart(
                    X_l, X_u, patClassId, self.numFold)

            predictor = self.training(partitionedXtr)

            if i == 0:
                self.V = predictor.V
                self.W = predictor.W
                self.classId = predictor.classId
                self.cardin = predictor.cardin
                self.clusters = predictor.clusters
            else:
                self.V = np.concatenate((self.V, predictor.V), axis=0)
                self.W = np.concatenate((self.W, predictor.W), axis=0)
                self.classId = np.concatenate(
                    (self.classId, predictor.classId))
                self.cardin = np.concatenate((self.cardin, predictor.cardin))
                self.clusters = np.concatenate(
                    (self.clusters, predictor.clusters))

        if isRemoveContainedHyperboxes == True:
            self.removeContainedHyperboxes()

        self.overlapResolve()

        # training using AGGLO-2
        combClassifier = AccelBatchGFMM(self.gamma, self.teta, self.bthres,
                                        self.simil, self.sing, False,
                                        self.oper, False)
        combClassifier.fit(self.V, self.W, self.classId)

        self.V = combClassifier.V
        self.W = combClassifier.W
        self.classId = combClassifier.classId
        self.cardin = combClassifier.cardin
        self.clusters = combClassifier.clusters
        self.numHyperboxes = len(self.classId)

        time_end = time.clock()
        self.elapsed_training_time = time_end - time_start

        return self
    def training(self, X_tr, X_val):
        """
        Training a base classifier using K-fold cross-validation. This method is used when the input data are preprocessed and partitioned into k parts
        
        INPUT
            X_tr       An object contains training data with the Bunch datatype, its attributes:
                        + lower:    lower bounds
                        + upper:    upper bounds
                        + label:    class labels
                        
            X_val      An object contains validation data with the Bunch datatype, its attributes:
                        + lower:    lower bounds
                        + upper:    upper bounds
                        + label:    class labels
                    X_tr, X_val should be normalized (if needed) beforehand using this function
        """
        V_train = X_tr.lower
        W_train = X_tr.upper
        classId_train = X_tr.label

        V_val = X_val.lower
        W_val = X_val.upper
        classId_val = X_val.label

        delta_thres = (self.bthres - self.bthres_min) / self.numClassifier
        bthres = self.bthres
        self.numHyperboxes = 0

        for k in range(self.numClassifier):
            classifier_Tr = AccelBatchGFMM(self.gamma, self.teta, bthres,
                                           self.simil, self.sing, False,
                                           self.oper, False)
            classifier_Tr.fit(V_train, W_train, classId_train)

            classifier_Val = AccelBatchGFMM(self.gamma, self.teta, bthres,
                                            self.simil, self.sing, False,
                                            self.oper, False)
            classifier_Val.fit(V_val, W_val, classId_val)

            rest_Tr = predict(classifier_Tr.V, classifier_Tr.W,
                              classifier_Tr.classId, V_val, W_val, classId_val,
                              self.gamma, self.oper)
            rest_Val = predict(classifier_Val.V, classifier_Val.W,
                               classifier_Val.classId, V_train, W_train,
                               classId_train, self.gamma, self.oper)

            err_Tr = rest_Tr.summis / len(classifier_Val.classId)
            err_Val = rest_Val.summis / len(classifier_Tr.classId)

            if err_Tr < err_Val:
                self.baseClassifiers[k] = classifier_Tr
            else:
                self.baseClassifiers[k] = classifier_Val

            self.numHyperboxes = self.numHyperboxes + len(
                self.baseClassifiers[k].classId)
            V_train = classifier_Tr.V
            W_train = classifier_Tr.W
            classId_train = classifier_Tr.classId

            V_val = classifier_Val.V
            W_val = classifier_Val.W
            classId_val = classifier_Val.classId

            bthres = bthres - delta_thres

        return self.baseClassifiers