def training(self, partitionedXtr): """ Training a base classifier using K-fold cross-validation. This method is used when the input data are preprocessed and partitioned into k parts INPUT partitionedXtr An numpy array contains k sub-arrays, in which each subarray is Bunch datatype: + lower: lower bounds + upper: upper bounds + label: class labels partitionedXtr should be normalized (if needed) beforehand using this function OUTPUT baseClassifier base classifier was validated using K-fold cross-validation """ baseClassifier = None minEr = 2 for k in range(self.numFold): classifier_tmp = AccelBatchGFMM(self.gamma, self.teta, self.bthres, self.simil, self.sing, False, self.oper, False) classifier_tmp.fit(partitionedXtr[k].lower, partitionedXtr[k].upper, partitionedXtr[k].label) # Create the validation set being the remaining training data for l in range(self.numFold): if l == k: continue else: if (k == 0 and l == 1) or (l == 0 and k != 0): lower_valid = partitionedXtr[l].lower upper_valid = partitionedXtr[l].upper label_valid = partitionedXtr[l].label else: lower_valid = np.concatenate( (lower_valid, partitionedXtr[l].lower), axis=0) upper_valid = np.concatenate( (upper_valid, partitionedXtr[l].upper), axis=0) label_valid = np.concatenate( (label_valid, partitionedXtr[l].label)) # validate the trained model rest = predict(classifier_tmp.V, classifier_tmp.W, classifier_tmp.classId, lower_valid, upper_valid, label_valid, self.gamma, self.oper) er = rest.summis / len(label_valid) if er < minEr: minEr = er baseClassifier = classifier_tmp return baseClassifier
def fit(self, Xl_onl, Xu_onl, patClassId_onl, Xl_off, Xu_off, patClassId_off): """ Input data need to be normalized before using this function Xl_onl Input data lower bounds (rows = objects, columns = features) for online learning Xu_onl Input data upper bounds (rows = objects, columns = features) for online learning patClassId_onl Input data class labels (crisp) for online learning Xl_off Input data lower bounds (rows = objects, columns = features) for agglomerative learning Xu_off Input data upper bounds (rows = objects, columns = features) for agglomerative learning patClassId_off Input data class labels (crisp) for agglomerative learning """ time_start = time.clock() # Perform agglomerative learning aggloClassifier = AccelBatchGFMM(self.gamma, self.teta_agglo, bthres=self.bthres, simil=self.simil, sing=self.sing, isDraw=self.isDraw, oper=self.oper, isNorm=False) aggloClassifier.fit(Xl_off, Xu_off, patClassId_off) self.offClassifier = Bunch(V=aggloClassifier.V, W=aggloClassifier.W, classId=aggloClassifier.classId) # Perform online learning onlClassifier = OnlineGFMM(self.gamma, self.teta_onl, self.teta_onl, isDraw=self.isDraw, oper=self.oper, isNorm=False, norm_range=[self.loLim, self.hiLim]) onlClassifier.fit(Xl_onl, Xu_onl, patClassId_onl) self.onlClassifier = Bunch(V=onlClassifier.V, W=onlClassifier.W, classId=onlClassifier.classId) time_end = time.clock() self.elapsed_training_time = time_end - time_start return self
def fit(self, Xl_onl, Xu_onl, patClassId_onl, Xl_off, Xu_off, patClassId_off, typeOfAgglo = 1): """ The input data need to be normalized before using this function Xl_onl Input data lower bounds (rows = objects, columns = features) for online learning Xu_onl Input data upper bounds (rows = objects, columns = features) for online learning patClassId_onl Input data class labels (crisp) for online learning Xl_off Input data lower bounds (rows = objects, columns = features) for agglomerative learning Xu_off Input data upper bounds (rows = objects, columns = features) for agglomerative learning patClassId_off Input data class labels (crisp) for agglomerative learning typeOfAgglo The used type of agglomerative learning algorithms """ time_start = time.clock() # Perform agglomerative learning if typeOfAgglo == 1: aggloClassifier = AccelBatchGFMM(self.gamma, self.teta_agglo, bthres = self.bthres, simil = self.simil, sing = self.sing, isDraw = self.isDraw, oper = self.oper, isNorm = False) elif typeOfAgglo == 2: aggloClassifier = BatchGFMMV2(self.gamma, self.teta_agglo, bthres = self.bthres, simil = self.simil, sing = self.sing, isDraw = self.isDraw, oper = self.oper, isNorm = False) else: aggloClassifier = BatchGFMMV1(self.gamma, self.teta_agglo, bthres = self.bthres, simil = self.simil, sing = self.sing, isDraw = self.isDraw, oper = self.oper, isNorm = False) aggloClassifier.fit(Xl_off, Xu_off, patClassId_off) self.V = aggloClassifier.V self.W = aggloClassifier.W self.classId = aggloClassifier.classId # Perform online learning onlClassifier = OnlineGFMM(self.gamma, self.teta_onl, self.teta_onl, isDraw = self.isDraw, oper = self.oper, isNorm = False, norm_range = [self.loLim, self.hiLim], V = self.V, W = self.W, classId = self.classId) # training for online GFMM onlClassifier.fit(Xl_onl, Xu_onl, patClassId_onl) self.V = onlClassifier.V self.W = onlClassifier.W self.classId = onlClassifier.classId time_end = time.clock() self.elapsed_training_time = time_end - time_start return self
def running_agglo_2(xTr, patClassIdTr, xVal, patClassIdVal, teta=0.26, sigma=0.5, simil='short'): accelClassifier = AccelBatchGFMM(gamma=1, teta=teta, bthres=sigma, simil=simil, sing='max', isDraw=False, oper='min', isNorm=False) accelClassifier.fit(xTr, xTr, patClassIdTr) result = accelClassifier.predict(xVal, xVal, patClassIdVal) err = 1 if result != None: err = result.summis / len(patClassIdVal) return err
def training(self, X_tr, X_val, isDeleteContainedHyperbox=True): """ Training a base classifier using K-fold cross-validation. This method is used when the input data are preprocessed and partitioned into k parts INPUT X_tr An object contains training data with the Bunch datatype, its attributes: + lower: lower bounds + upper: upper bounds + label: class labels X_val An object contains validation data with the Bunch datatype, its attributes: + lower: lower bounds + upper: upper bounds + label: class labels X_tr, X_val should be normalized (if needed) beforehand using this function isDeleteContainedHyperbox Identify if hyperboxes contained in other hyperboxes are discarded or not? """ V_train = X_tr.lower W_train = X_tr.upper classId_train = X_tr.label V_val = X_val.lower W_val = X_val.upper classId_val = X_val.label bthres = self.bthres self.numHyperboxes = 0 N = int(self.numClassifier / 2) + 1 delta_thres = (self.bthres - self.bthres_min) / N minEr_Tr = 2 minEr_Val = 2 opt_Tr = None opt_Val = None for k in range(N): classifier_Tr = AccelBatchGFMM(self.gamma, self.teta, bthres, self.simil, self.sing, False, self.oper, False) classifier_Tr.fit(V_train, W_train, classId_train) classifier_Val = AccelBatchGFMM(self.gamma, self.teta, bthres, self.simil, self.sing, False, self.oper, False) classifier_Val.fit(V_val, W_val, classId_val) rest_Tr = predict(classifier_Tr.V, classifier_Tr.W, classifier_Tr.classId, V_val, W_val, classId_val, self.gamma, self.oper) rest_Val = predict(classifier_Val.V, classifier_Val.W, classifier_Val.classId, V_train, W_train, classId_train, self.gamma, self.oper) err_Tr = rest_Tr.summis / len(classifier_Val.classId) err_Val = rest_Val.summis / len(classifier_Tr.classId) if err_Tr < minEr_Tr: minEr_Tr = err_Tr opt_Tr = classifier_Tr if err_Val < minEr_Val: minEr_Val = err_Val opt_Val = classifier_Val V_train = classifier_Tr.V W_train = classifier_Tr.W classId_train = classifier_Tr.classId V_val = classifier_Val.V W_val = classifier_Val.W classId_val = classifier_Val.classId bthres = bthres - delta_thres self.V = np.concatenate((opt_Tr.V, opt_Val.V), axis=0) self.W = np.concatenate((opt_Tr.W, opt_Val.W), axis=0) self.classId = np.concatenate((opt_Tr.classId, opt_Val.classId)) if isDeleteContainedHyperbox == True: self.removeContainedHyperboxes() self.overlapResolve() # training using AGGLO-2 combClassifier = AccelBatchGFMM(self.gamma, self.teta, self.bthres_min, self.simil, self.sing, False, self.oper, False) combClassifier.fit(self.V, self.W, self.classId) self.V = combClassifier.V self.W = combClassifier.W self.classId = combClassifier.classId self.cardin = combClassifier.cardin self.clusters = combClassifier.clusters self.numHyperboxes = len(self.classId) return self
def fit(self, X_l, X_u, patClassId, typeOfAgglo=1): """ Xl Input data lower bounds (rows = objects, columns = features) Xu Input data upper bounds (rows = objects, columns = features) patClassId Input data class labels (crisp) typeOfAgglo Type of agglomerative learning + 1: Accelerated agglomerative learning AGGLO-2 + 2: Full batch learning slower version + 3: Full batch learning faster version """ if self.isNorm == True: X_l, X_u = self.dataPreprocessing(X_l, X_u) time_start = time.clock() # Perform online learning onlClassifier = OnlineGFMM(self.gamma, self.teta_onl, self.teta_onl, isDraw=self.isDraw, oper=self.oper, isNorm=False, norm_range=[self.loLim, self.hiLim], V=self.V, W=self.W, classId=self.classId) # training for online GFMM onlClassifier.fit(X_l, X_u, patClassId) self.V = onlClassifier.V self.W = onlClassifier.W self.classId = onlClassifier.classId # print('No. hyperboxes after online learning:', len(self.classId)) self.num_hyperbox_after_online = len(self.classId) # Perform agglomerative learning if typeOfAgglo == 1: aggloClassifier = AccelBatchGFMM(self.gamma, self.teta_agglo, bthres=self.bthres, simil=self.simil, sing=self.sing, isDraw=self.isDraw, oper=self.oper, isNorm=False) elif typeOfAgglo == 2: aggloClassifier = BatchGFMMV2(self.gamma, self.teta_agglo, bthres=self.bthres, simil=self.simil, sing=self.sing, isDraw=self.isDraw, oper=self.oper, isNorm=False) else: aggloClassifier = BatchGFMMV1(self.gamma, self.teta_agglo, bthres=self.bthres, simil=self.simil, sing=self.sing, isDraw=self.isDraw, oper=self.oper, isNorm=False) aggloClassifier.fit(self.V, self.W, self.classId) self.V = aggloClassifier.V self.W = aggloClassifier.W self.classId = aggloClassifier.classId #print('No. hyperboxes after the agglomerative learning:', len(self.classId)) self.num_hyperbox_after_agglo = len(self.classId) time_end = time.clock() self.elapsed_training_time = time_end - time_start return self
numhyperbox_midmin_si_save = np.array([], dtype=np.int64) training_time_midmin_si_save = np.array([]) testing_error_midmin_si_save = np.array([]) for simil_thres in np.arange(0.02, 1, 0.02): simil_save = np.append(simil_save, simil_thres) accelClassifier = AccelBatchGFMM(gamma=1, teta=teta, bthres=simil_thres, simil='short', sing='max', isDraw=False, oper='min', isNorm=False) accelClassifier.fit(Xtr, Xtr, patClassIdTr) training_time_short_si_save = np.append( training_time_short_si_save, accelClassifier.elapsed_training_time) numhyperbox_short_si_save = np.append(numhyperbox_short_si_save, len(accelClassifier.classId)) result = accelClassifier.predict(Xtest, Xtest, patClassIdTest) if result != None: numTestSample = Xtest.shape[0] err = result.summis / numTestSample testing_error_short_si_save = np.append( testing_error_short_si_save, err) for simil_thres in np.arange(0.02, 1, 0.02):
result = olnClassifier.predict(Xtest, Xtest, patClassIdTest) if result != None: err = result.summis / numTestSample testing_error_online_save = np.append( testing_error_online_save, err) # Do accelerated learning accelClassifier = AccelBatchGFMM(gamma=1, teta=teta, bthres=simil_thres, simil='short', sing='max', isDraw=False, oper='min', isNorm=False) accelClassifier.fit(Xtr_time_i, Xtr_time_i, pathClassIdTr_time_i) training_time_agglo_save = np.append( training_time_agglo_save, accelClassifier.elapsed_training_time) numhyperbox_agglo_save = np.append(numhyperbox_agglo_save, len(accelClassifier.classId)) result = accelClassifier.predict(Xtest, Xtest, patClassIdTest) if result != None: err = result.summis / numTestSample testing_error_agglo_save = np.append(testing_error_agglo_save, err) # Do online training before agglo olnAggloClassifier = OnlineAggloGFMM(gamma=1,
start_t = time.perf_counter() olnClassifier.pruning_val(validationData, validationData, validationLabel) end_t = time.perf_counter() training_time_online_gfmm_save = np.append(training_time_online_gfmm_save, olnClassifier.elapsed_training_time + (end_t - start_t)) numhyperbox_online_gfmm_save = np.append(numhyperbox_online_gfmm_save, len(olnClassifier.classId)) result = olnClassifier.predict(testingData, testingData, testingLabel) if result != None: err = np.round(result.summis / numTestSample * 100, 3) testing_error_online_gfmm_save = np.append(testing_error_online_gfmm_save, err) # agglo-2 accelClassifier = AccelBatchGFMM(gamma = 1, teta = tetaAGGLO2, bthres = 0, simil = 'long', sing = 'min', isDraw = False, oper = 'min', isNorm = False) accelClassifier.fit(trainingData, trainingData, trainingLabel) numhyperbox_before_prun_accel_agglo_save = np.append(numhyperbox_before_prun_accel_agglo_save, len(accelClassifier.classId)) result = accelClassifier.predict(testingData, testingData, testingLabel) if result != None: err = np.round(result.summis / numTestSample * 100, 3) testing_error_before_prun_accel_agglo_save = np.append(testing_error_before_prun_accel_agglo_save, err) start_t = time.perf_counter() accelClassifier.pruning_val(validationData, validationData, validationLabel) end_t = time.perf_counter() training_time_accel_agglo_save = np.append(training_time_accel_agglo_save, accelClassifier.elapsed_training_time + (end_t - start_t)) numhyperbox_accel_agglo_save = np.append(numhyperbox_accel_agglo_save, len(accelClassifier.classId))
def fit(self, X_l, X_u, patClassId, typeOfSplitting=1, isRemoveContainedHyperboxes=True): """ Training the ensemble model at decision level. This method is used when the input data are not partitioned into k parts INPUT X_l Input data lower bounds (rows = objects, columns = features) X_u Input data upper bounds (rows = objects, columns = features) patClassId Input data class labels (crisp) typeOfSplitting The way of splitting datasets + 1: random split on whole dataset - do not care the classes + otherwise: random split according to each class label isRemoveContainedHyperboxes: Identify if hyperboxes contained in other hyperboxes are discarded or not? """ X_l, X_u = self.dataPreprocessing(X_l, X_u) time_start = time.clock() for i in range(self.numClassifier): if typeOfSplitting == 1: partitionedXtr = splitDatasetRndToKPart( X_l, X_u, patClassId, self.numFold) else: partitionedXtr = splitDatasetRndClassBasedToKPart( X_l, X_u, patClassId, self.numFold) predictor = self.training(partitionedXtr) if i == 0: self.V = predictor.V self.W = predictor.W self.classId = predictor.classId self.cardin = predictor.cardin self.clusters = predictor.clusters else: self.V = np.concatenate((self.V, predictor.V), axis=0) self.W = np.concatenate((self.W, predictor.W), axis=0) self.classId = np.concatenate( (self.classId, predictor.classId)) self.cardin = np.concatenate((self.cardin, predictor.cardin)) self.clusters = np.concatenate( (self.clusters, predictor.clusters)) if isRemoveContainedHyperboxes == True: self.removeContainedHyperboxes() self.overlapResolve() # training using AGGLO-2 combClassifier = AccelBatchGFMM(self.gamma, self.teta, self.bthres, self.simil, self.sing, False, self.oper, False) combClassifier.fit(self.V, self.W, self.classId) self.V = combClassifier.V self.W = combClassifier.W self.classId = combClassifier.classId self.cardin = combClassifier.cardin self.clusters = combClassifier.clusters self.numHyperboxes = len(self.classId) time_end = time.clock() self.elapsed_training_time = time_end - time_start return self
def training(self, X_tr, X_val): """ Training a base classifier using K-fold cross-validation. This method is used when the input data are preprocessed and partitioned into k parts INPUT X_tr An object contains training data with the Bunch datatype, its attributes: + lower: lower bounds + upper: upper bounds + label: class labels X_val An object contains validation data with the Bunch datatype, its attributes: + lower: lower bounds + upper: upper bounds + label: class labels X_tr, X_val should be normalized (if needed) beforehand using this function """ V_train = X_tr.lower W_train = X_tr.upper classId_train = X_tr.label V_val = X_val.lower W_val = X_val.upper classId_val = X_val.label delta_thres = (self.bthres - self.bthres_min) / self.numClassifier bthres = self.bthres self.numHyperboxes = 0 for k in range(self.numClassifier): classifier_Tr = AccelBatchGFMM(self.gamma, self.teta, bthres, self.simil, self.sing, False, self.oper, False) classifier_Tr.fit(V_train, W_train, classId_train) classifier_Val = AccelBatchGFMM(self.gamma, self.teta, bthres, self.simil, self.sing, False, self.oper, False) classifier_Val.fit(V_val, W_val, classId_val) rest_Tr = predict(classifier_Tr.V, classifier_Tr.W, classifier_Tr.classId, V_val, W_val, classId_val, self.gamma, self.oper) rest_Val = predict(classifier_Val.V, classifier_Val.W, classifier_Val.classId, V_train, W_train, classId_train, self.gamma, self.oper) err_Tr = rest_Tr.summis / len(classifier_Val.classId) err_Val = rest_Val.summis / len(classifier_Tr.classId) if err_Tr < err_Val: self.baseClassifiers[k] = classifier_Tr else: self.baseClassifiers[k] = classifier_Val self.numHyperboxes = self.numHyperboxes + len( self.baseClassifiers[k].classId) V_train = classifier_Tr.V W_train = classifier_Tr.W classId_train = classifier_Tr.classId V_val = classifier_Val.V W_val = classifier_Val.W classId_val = classifier_Val.classId bthres = bthres - delta_thres return self.baseClassifiers