class Manager(object): def __init__(self, sourceFile, targetFile): self.SDataBufferArr = None #2D array representation of self.SDataBuffer self.SDataLabels = None self.TDataBufferArr = None #2D array representation of self.TDataBuffer self.TDataLabels = None self.useKliepCVSigma = Properties.useKliepCVSigma self.kliep = None self.useSvmCVParams = Properties.useSvmCVParams self.ensemble = Ensemble(Properties.ENSEMBLE_SIZE) self.initialWindowSize = int(Properties.INITIAL_DATA_SIZE) self.maxWindowSize = int(Properties.MAX_WINDOW_SIZE) self.enableForceUpdate = int(Properties.enableForceUpdate) self.forceUpdatePeriod = int(Properties.forceUpdatePeriod) """ - simulate source and target streams from corresponding files. """ print("Reading the Source Dataset") self.source = Stream(sourceFile, Properties.INITIAL_DATA_SIZE) print("Reading the Target Dataset") self.target = Stream(targetFile, Properties.INITIAL_DATA_SIZE) print("Finished Reading the Target Dataset") Properties.MAXVAR = self.source.initialData.shape[0] """ Detect drift on a given data stream. Returns the change point index on the stream array. """ def __detectDrift(self, slidingWindow, flagStream): changePoint = -1 if flagStream == 0: changePoint = self.changeDetector.detectSourceChange(slidingWindow) elif flagStream == 1: changePoint = self.changeDetector.detectTargetChange(slidingWindow) else: raise Exception('flagStream var has value ' + str(flagStream) + ' that is not supported.') return changePoint """ Write value (accuracy or confidence) to a file with DatasetName as an identifier. """ def __saveResult(self, acc, datasetName): with open(datasetName + '_' + Properties.OUTFILENAME, 'a') as f: f.write(str(acc) + "\n") f.close() def convListOfDictToNDArray(self, listOfDict): arrayRep = [] if not listOfDict: return arrayRep arrayRep = np.array([[float(v)] for k, v in listOfDict[0].items() if k != -1]) for i in range(1, len(listOfDict)): arrayRep = np.append(arrayRep, np.array([[float(v)] for k, v in listOfDict[i].items() if k != -1]), axis=1) return arrayRep def collectLabels(self, listOfDict): labels = [] for d in listOfDict: labels.append(str(d[-1])) return labels """ The main method handling multistream classification using KLIEP. """ def startFusion(self, datasetName, probFromSource): #save the timestamp globalStartTime = time.time() Properties.logger.info('Global Start Time: ' + datetime.datetime.fromtimestamp(globalStartTime) .strftime('%Y-%m-%d %H:%M:%S')) #open files for saving accuracy and confidence fAcc = open(datasetName + '_' + Properties.OUTFILENAME, 'w') fConf = open( datasetName + '_confidence' + '_' + Properties.OUTFILENAME, 'w') #initialize gaussian models gmOld = gm.GaussianModel() gmUpdated = gm.GaussianModel() #variable to track forceupdate period idxLastUpdate = 0 #Get data buffer self.SDataBufferArr = self.source.initialData self.SDataLabels = self.source.initialDataLabels self.TDataBufferArr = self.target.initialData #first choose a suitable value for sigma self.kliep = Kliep(Properties.kliepParEta, Properties.kliepParLambda, Properties.kliepParB, Properties.kliepParThreshold, Properties.kliepDefSigma) #self.kliep = Kliep(Properties.kliepParEta, Properties.kliepParLambda, Properties.kliepParB, Properties.MAXVAR*Properties.kliepParThreshold, Properties.kliepDefSigma) if self.useKliepCVSigma == 1: self.kliep.kliepDefSigma = self.kliep.chooseSigma( self.SDataBufferArr, self.TDataBufferArr) #calculate alpha values #self.kliep.kliepDefSigma = 0.1 Properties.logger.info('Estimating initial DRM') gmOld.alphah, kernelMatSrcData, kernelMatTrgData, gmOld.refPoints = self.kliep.KLIEP( self.SDataBufferArr, self.TDataBufferArr) #initialize the updated gaussian model gmUpdated.setAlpha(gmOld.alphah) gmUpdated.setRefPoints(gmOld.refPoints) #now resize the windows appropriately self.SDataBufferArr = self.SDataBufferArr[:, -Properties.MAX_WINDOW_SIZE:] self.SDataLabels = self.SDataLabels[-Properties.MAX_WINDOW_SIZE:] self.TDataBufferArr = self.TDataBufferArr[:, -Properties.MAX_WINDOW_SIZE:] kernelMatSrcData = kernelMatSrcData[-Properties.MAX_WINDOW_SIZE:, :] kernelMatTrgData = kernelMatTrgData[-Properties.MAX_WINDOW_SIZE:, :] #meanDistSrcData = self.kliep.colWiseMeanTransposed(kernelMatSrcData) Properties.logger.info('Initializing Ensemble with the first model') #target model #first calculate weight for source instances weightSrcData = self.kliep.calcInstanceWeights(kernelMatSrcData, gmUpdated.alphah) #since weightSrcData is a column matrix, convert it to a list before sending to generating new model SDataBufferArrTransposed = self.SDataBufferArr.T TDataBufferArrTransposed = self.TDataBufferArr.T if self.useSvmCVParams == 1: params = {'gamma': [2**2, 2**-16], 'C': [2**-6, 2**15]} svr = svm.SVC() opt = grid_search.GridSearchCV(svr, params) opt.fit(SDataBufferArrTransposed.tolist(), self.SDataLabels) optParams = opt.best_params_ self.ensemble.generateNewModelKLIEP(SDataBufferArrTransposed, self.SDataLabels, TDataBufferArrTransposed, weightSrcData[0].tolist(), optParams['C'], optParams['gamma']) else: self.ensemble.generateNewModelKLIEP( SDataBufferArrTransposed.tolist(), self.SDataLabels, TDataBufferArrTransposed.tolist(), weightSrcData[0].tolist(), Properties.svmDefC, Properties.svmDefGamma, Properties.svmKernel) Properties.logger.info(self.ensemble.getEnsembleSummary()) sDataIndex = 0 tDataIndex = 0 trueTargetNum = 0 targetConfSum = 0 #enoughInstToUpdate is used to see if there are enough instances in the windows to #estimate the weights Properties.logger.info( 'Starting MultiStream Classification with FUSION') while self.target.data.shape[1] > tDataIndex: """ if source stream is not empty, do proper sampling. Otherwise, just take the new instance from the target isntance. """ if self.source.data.shape[1] > sDataIndex: fromSource = random.uniform(0, 1) < probFromSource else: print("\nsource stream sampling not possible") fromSource = False if fromSource: # Source Stream: '.' means sampling from source print('.', end="") #print("Source data index: ", sDataIndex) #print("\nlen(self.SDataBufferList) = ", len(self.SDataBufferList), ": source window slides") #remove the first instance, and add the new instance in the buffers newSrcDataArr = self.source.data[:, sDataIndex][np.newaxis].T self.SDataBufferArr = self.SDataBufferArr[:, 1:] self.SDataLabels = self.SDataLabels[1:] kernelMatSrcData = kernelMatSrcData[1:, :] #add new instance to the buffers self.SDataBufferArr = np.append(self.SDataBufferArr, newSrcDataArr, axis=1) self.SDataLabels.append(self.source.dataLabels[sDataIndex]) #update kernelMatSrcData dist_tmp = np.power( np.tile(newSrcDataArr, (1, gmUpdated.refPoints.shape[1])) - gmUpdated.refPoints, 2) dist_2 = np.sum(dist_tmp, axis=0, dtype='float64') kernelSDataNewFromRefs = np.exp( -dist_2 / (2 * math.pow(self.kliep.kliepDefSigma, 2)), dtype='float64') kernelMatSrcData = np.append( kernelMatSrcData, kernelSDataNewFromRefs[np.newaxis], axis=0) #print("Satisfying the constrains.") gmUpdated.alphah, kernelMatSrcData = self.kliep.satConstraints( self.SDataBufferArr, self.TDataBufferArr, gmUpdated.refPoints, gmUpdated.alphah, kernelMatSrcData) sDataIndex += 1 else: # Target Stream print('#', end="") # '#' indicates new point from target newTargetDataArr = self.target.data[:, tDataIndex][np.newaxis].T # get Target Accuracy on the new instance resTarget = self.ensemble.evaluateEnsembleKLIEP( np.reshape(newTargetDataArr, (1, -1))) if isinstance(resTarget[0], float) and abs( resTarget[0] - self.target.dataLabels[tDataIndex]) < 0.0001: trueTargetNum += 1 elif resTarget[0] == self.target.dataLabels[tDataIndex]: trueTargetNum += 1 acc = float(trueTargetNum) / (tDataIndex + 1) if (tDataIndex % 100) == 0: Properties.logger.info('\nTotal test instance: ' + str(tDataIndex + 1) + ', correct: ' + str(trueTargetNum) + ', accuracy: ' + str(acc)) fAcc.write(str(acc) + "\n") conf = resTarget[1] # confidence # save confidence targetConfSum += conf fConf.write( str(float(targetConfSum) / (tDataIndex + 1)) + "\n") #update alpha, and satisfy constraints #print("Update alpha and satisfy constrains") gmUpdated.alphah, kernelMatSrcData = self.kliep.updateAlpha( self.SDataBufferArr, self.TDataBufferArr, newTargetDataArr, gmUpdated.refPoints, gmUpdated.alphah, kernelMatSrcData) #print("\nlen(self.TDataBufferList) = ", len(self.TDataBufferList), ": target window slides") #remove the first instance from buffers self.TDataBufferArr = self.TDataBufferArr[:, 1:] #update ref points gmUpdated.refPoints = gmUpdated.refPoints[:, 1:] # update kernelMatSrcData, as ref points has been updated kernelMatSrcData = kernelMatSrcData[:, 1:] # update kernelMatTrgData, as ref points has been updated kernelMatTrgData = kernelMatTrgData[1:, 1:] #update ref points gmUpdated.refPoints = np.append(gmUpdated.refPoints, newTargetDataArr, axis=1) #add to kernelMatSrcData for the last ref point dist_tmp = np.power( np.tile(newTargetDataArr, (1, self.SDataBufferArr.shape[1])) - self.SDataBufferArr, 2) dist_2 = np.sum(dist_tmp, axis=0, dtype='float64') kernel_dist_2 = np.exp( -dist_2 / (2 * math.pow(self.kliep.kliepDefSigma, 2)), dtype='float64') kernelMatSrcData = np.append(kernelMatSrcData, kernel_dist_2[np.newaxis].T, axis=1) #now update kernelMatTrgData, as ref points has been updated #first add distance from the new ref points to all the target points dist_tmp = np.power( np.tile(newTargetDataArr, (1, self.TDataBufferArr.shape[1])) - self.TDataBufferArr, 2) dist_2 = np.sum(dist_tmp, axis=0, dtype='float64') kernel_dist_2 = np.exp( -dist_2 / (2 * math.pow(self.kliep.kliepDefSigma, 2)), dtype='float64') kernelMatTrgData = np.append(kernelMatTrgData, kernel_dist_2[np.newaxis].T, axis=1) #now add distances for the newly added instance to all the ref points #add the new instance to the buffers self.TDataBufferArr = np.append(self.TDataBufferArr, newTargetDataArr, axis=1) dist_tmp = np.power( np.tile(newTargetDataArr, (1, gmUpdated.refPoints.shape[1])) - gmUpdated.refPoints, 2) dist_2 = np.sum(dist_tmp, axis=0, dtype='float64') kernelTDataNewFromRefs = np.exp( -dist_2 / (2 * math.pow(self.kliep.kliepDefSigma, 2)), dtype='float64') kernelMatTrgData = np.append( kernelMatTrgData, kernelTDataNewFromRefs[np.newaxis], axis=0) tDataIndex += 1 #print "sDataIndex: ", str(sDataIndex), ", tDataIndex: ", str(tDataIndex) enoughInstToUpdate = self.SDataBufferArr.shape[ 1] >= Properties.kliepParB and self.TDataBufferArr.shape[ 1] >= Properties.kliepParB if enoughInstToUpdate: #print("Enough points in source and target sliding windows. Attempting to detect any change of distribution.") changeDetected, changeScore, kernelMatTrgData = self.kliep.changeDetection( self.TDataBufferArr, gmOld.refPoints, gmOld.alphah, gmUpdated.refPoints, gmUpdated.alphah, kernelMatTrgData) #print("Change Score: ", changeScore) #instances from more than one class are needed for svm training if len(set(self.SDataLabels)) > 1 and ( changeDetected or (self.enableForceUpdate and (tDataIndex + sDataIndex - idxLastUpdate) > self.forceUpdatePeriod) ): #or (tDataIndex>0 and (targetConfSum/tDataIndex)<0.1): fConf.write(str(7777777.0) + "\n") Properties.logger.info( '\n-------------------------- Change of Distribution ------------------------------------' ) Properties.logger.info('Change of distribution found') Properties.logger.info('sDataIndex=' + str(sDataIndex) + '\ttDataIndex=' + str(tDataIndex)) Properties.logger.info('Change Detection Score: ' + str(changeScore) + ', Threshold: ' + str(self.kliep.kliepParThreshold)) #Build a new model #First calculate the weights for each source instances gmOld.alphah, kernelMatSrcData, kernelMatTrgData, gmOld.refPoints = self.kliep.KLIEP( self.SDataBufferArr, self.TDataBufferArr) #update the updated gaussian model as well gmUpdated.setAlpha(gmOld.alphah) gmUpdated.setRefPoints(gmOld.refPoints) weightSrcData = self.kliep.calcInstanceWeights( kernelMatSrcData, gmUpdated.alphah) #Build a new model Properties.logger.info( 'Training a model due to change detection') SDataBufferArrTransposed = self.SDataBufferArr.T TDataBufferArrTransposed = self.TDataBufferArr.T if self.useSvmCVParams == 1: params = {'gamma': [2**2, 2**-16], 'C': [2**-6, 2**15]} svr = svm.SVC() opt = grid_search.GridSearchCV(svr, params) opt.fit(SDataBufferArrTransposed.tolist(), self.SDataLabels) optParams = opt.best_params_ self.ensemble.generateNewModelKLIEP( SDataBufferArrTransposed.tolist(), self.SDataLabels, TDataBufferArrTransposed.tolist(), weightSrcData[0].tolist(), optParams['C'], optParams['gamma']) else: self.ensemble.generateNewModelKLIEP( SDataBufferArrTransposed.tolist(), self.SDataLabels, TDataBufferArrTransposed.tolist(), weightSrcData[0].tolist(), Properties.svmDefC, Properties.svmDefGamma, Properties.svmKernel) Properties.logger.info(self.ensemble.getEnsembleSummary()) #update the idx idxLastUpdate = tDataIndex + sDataIndex changeDetected = False #keep the latest 1/4th of data and update the arrays and lists #Properties.logger.info('Updating source and target sliding windows') """ In the target window, we want to keep (3x/4) instances, where x is the number of gaussian kernel centers, So that we will try for detecting change point again after (x/4) instances. Since there might be a diff between arrival rate in the source and target, we calculate number of points to retain in the source keeping that in mind. """ #numberOfPointsInTargetToRetain = Properties.kliepParB - int(((1-probFromSource)*3*Properties.kliepParB)/4) #numberOfPointsInSourceToRetain = Properties.kliepParB - int((probFromSource*3*Properties.kliepParB)/4) #save the timestamp fConf.close() fAcc.close() globalEndTime = time.time() Properties.logger.info('\nGlobal Start Time: ' + datetime.datetime.fromtimestamp(globalEndTime). strftime('%Y-%m-%d %H:%M:%S')) Properties.logger.info('Total Time Spent: ' + str(globalEndTime - globalStartTime) + ' seconds') Properties.logger.info('Done !!')
def start_trgonly(self, datasetName): #Get initial data buffer self.SInitialDataBuffer = self.source.initialData self.TInitialDataBuffer = self.target.initialData #Initialize Ensembles trgEnsemble = Ensemble(Properties.ENSEMBLE_SIZE) Properties.logger.info('Initializing Ensemble ...') #target model trgEnsemble.generateNewModel(self.SInitialDataBuffer, self.TInitialDataBuffer, False) Properties.logger.info('Target Ensemble') Properties.logger.info(trgEnsemble.getEnsembleSummary()) dataIndex = 0 trueTargetNum = 0 targetConfSum = 0 Properties.logger.info('Starting trgonly-MDC ...') while (len(self.source.data) > dataIndex): print('.', end="") #Source Stream sdata = self.source.data[dataIndex] self.SDataBuffer.append(sdata) resSource = trgEnsemble.evaluateEnsemble(sdata, True) self.SWindow.append(resSource[0]) #prediction of 0 or 1 #Target Stream tdata = self.target.data[dataIndex] self.TDataBuffer.append(tdata) resTarget = trgEnsemble.evaluateEnsemble(tdata, False) conf = resTarget[1] #confidence # If conf is very close to 0.0 or 1.0, beta probability might become zero, which can make problems in change detection. Handling this scenario. if conf < 0.1: self.TWindow.append(0.1) elif conf > 0.995: self.TWindow.append(0.995) else: self.TWindow.append(resTarget[1]) self.TPredictWindow.append(resTarget[0]) #get Target Accuracy if resTarget[0] == tdata[-1]: trueTargetNum += 1 acc = float(trueTargetNum) / (dataIndex + 1) self.__saveResult(acc, datasetName) #save confidence targetConfSum += conf self.__saveResult( float(targetConfSum) / (dataIndex + 1), datasetName + '_confidence') #Drift detection start = time.time() # srcCP = self.__detectDrift(self.SWindow, 0) # trgCP = self.__detectDrift(self.TWindow, 1) srcCP = self.__detectDriftJava(self.SWindow, 0) trgCP = self.__detectDriftJava(self.TWindow, 1) end = time.time() # print(int(end - start), end="") if srcCP != -1: self.__saveResult(5555555.0, datasetName + '_confidence') Properties.logger.info( '-------------------------- S O U R C E D R I F T ------------------------------------' ) Properties.logger.info('\nDrift found on source stream.') Properties.logger.info('dataIndex=' + str(dataIndex) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP)) #remove data from buffer till srcCP for i in xrange(srcCP): del self.SDataBuffer[0] del self.SWindow[0] #Exception with srcCP=0 (windowsize hit max or avg error is less than cutoff). #Keep atleast cushion number of instances if srcCP == 0: while len(self.SDataBuffer) > Properties.CUSHION: del self.SDataBuffer[0] del self.SWindow[0] Properties.logger.info( 'Instances left in source sliding window : ' + str(len(self.SDataBuffer))) Properties.logger.info( 'Instances left in target sliding window : ' + str(len(self.TDataBuffer))) #Updating source Ensemble Properties.logger.info('Updating source ensemble weights') trgEnsemble.updateWeight(self.SDataBuffer, True) Properties.logger.info('Training a model for source stream') trgEnsemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, False) Properties.logger.info('Source Ensemble') Properties.logger.info(trgEnsemble.getEnsembleSummary()) if trgCP != -1: self.__saveResult(7777777.0, datasetName + '_confidence') Properties.logger.info( '-------------------------- T A R G E T D R I F T ------------------------------------' ) Properties.logger.info('Drift found on target stream.') Properties.logger.info('dataIndex=' + str(dataIndex) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP)) #remove data from buffer till trgCP for i in xrange(trgCP): del self.TDataBuffer[0] del self.TWindow[0] del self.TPredictWindow[0] #Exception with trgCP=0 (windowsize hit max or avg error is less than cutoff). #Keep atleast cushion number of instances if trgCP == 0: while len(self.TDataBuffer) > Properties.CUSHION: del self.TDataBuffer[0] del self.TWindow[0] del self.TPredictWindow[0] Properties.logger.info( 'Instances left in source sliding window : ' + str(len(self.SDataBuffer))) Properties.logger.info( 'Instances left in target sliding window : ' + str(len(self.TDataBuffer))) Properties.logger.info('Updating target ensemble weights') trgEnsemble.updateWeight(self.TDataBuffer, False) Properties.logger.info('Training a model for target stream') trgEnsemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, False) Properties.logger.info('Target Ensemble') Properties.logger.info(trgEnsemble.getEnsembleSummary()) dataIndex += 1 if dataIndex % 100 == 0: print('') Properties.logger.info('Done !!')
class Manager(object): def __init__(self, sourceFile, targetFile): self.SDataBufferArr = None #2D array representation of self.SDataBuffer self.SDataLabels = None self.TDataBufferArr = None #2D array representation of self.TDataBuffer self.TDataLabels = None self.useKliepCVSigma = Properties.useKliepCVSigma self.arulsifAlpha = Properties.arulsifAlpha self.useSvmCVParams = Properties.useSvmCVParams self.ensemble = Ensemble(Properties.ENSEMBLE_SIZE) self.initialWindowSize = int(Properties.INITIAL_DATA_SIZE) self.maxWindowSize = int(Properties.MAX_WINDOW_SIZE) self.enableForceUpdate = int(Properties.enableForceUpdate) self.forceUpdatePeriod = int(Properties.forceUpdatePeriod) """ - simulate source and target streams from corresponding files. """ print("Reading the Source Dataset") self.source = Stream(sourceFile, Properties.INITIAL_DATA_SIZE) print("Reading the Target Dataset") self.target = Stream(targetFile, Properties.INITIAL_DATA_SIZE) print("Finished Reading the Target Dataset") Properties.MAXVAR = self.source.data.shape[0] """ Write value (accuracy or confidence) to a file with DatasetName as an identifier. """ def __saveResult(self, acc, datasetName): with open(datasetName + '_' + Properties.OUTFILENAME, 'a') as f: f.write(str(acc) + "\n") f.close() def convListOfDictToNDArray(self, listOfDict): arrayRep = [] if not listOfDict: return arrayRep arrayRep = np.array([[float(v)] for k, v in listOfDict[0].items() if k != -1]) for i in range(1, len(listOfDict)): arrayRep = np.append(arrayRep, np.array([[float(v)] for k, v in listOfDict[i].items() if k != -1]), axis=1) return arrayRep def collectLabels(self, listOfDict): labels = [] for d in listOfDict: labels.append(str(d[-1])) return labels """ The main method handling multistream classification using KLIEP. """ def startClassification(self, datasetName, method='kliep'): #save the timestamp globalStartTime = time.time() Properties.logger.info('Global Start Time: ' + datetime.datetime.fromtimestamp(globalStartTime) .strftime('%Y-%m-%d %H:%M:%S')) #open files for saving accuracy and confidence fAcc = open(datasetName + '_' + Properties.OUTFILENAME, 'w') fConf = open( datasetName + '_confidence' + '_' + Properties.OUTFILENAME, 'w') #Get data buffer self.SDataBufferArr = self.source.data self.SDataLabels = self.source.dataLabels self.TDataBufferArr = self.target.data # now resize the windows appropriately self.SDataBufferArr = self.SDataBufferArr[:, -Properties.MAX_WINDOW_SIZE:] self.SDataLabels = self.SDataLabels[-Properties.MAX_WINDOW_SIZE:] self.TDataBufferArr = self.TDataBufferArr[:, -Properties.MAX_WINDOW_SIZE:] weightSrcData = np.zeros(shape=(1, len(self.SDataBufferArr))) if 'kliep' in method: Properties.logger.info( 'Using KLIEP method for covariate shift correction.') # initialize gaussian models gmodel = gm.GaussianModel() #first choose a suitable value for sigma kliep = Kliep(Properties.kliepParEta, Properties.kliepParLambda, Properties.kliepParB, Properties.kliepParThreshold, Properties.kliepDefSigma) if self.useKliepCVSigma == 1: kliep.kliepDefSigma = kliep.chooseSigma( self.SDataBufferArr, self.TDataBufferArr) #calculate alpha values #self.kliep.kliepDefSigma = 0.1 Properties.logger.info('Estimating initial DRM') gmodel.alphah, kernelMatSrcData, kernelMatTrgData, gmodel.refPoints = kliep.KLIEP( self.SDataBufferArr, self.TDataBufferArr) #initialize the updated gaussian model kernelMatSrcData = kernelMatSrcData[ -Properties.MAX_WINDOW_SIZE:, :] kernelMatTrgData = kernelMatTrgData[ -Properties.MAX_WINDOW_SIZE:, :] Properties.logger.info( 'Initializing Ensemble with the first model') #target model #first calculate weight for source instances weightSrcData = kliep.calcInstanceWeights(kernelMatSrcData, gmodel.alphah) #since weightSrcData is a column matrix, convert it to a list before sending to generating new model elif 'kmm' in method: Properties.logger.info( 'Using KMM method for covariate shift correction.') kmm = KMM() gammab = kmm.computeKernelWidth(self.SDataBufferArr) Xtrain = self.SDataBufferArr.T.tolist() Xtest = self.TDataBufferArr.T.tolist() beta = kmm.kmm(Xtrain, Xtest, gammab) weightSrcData = np.array(beta).reshape(1, len(beta)) elif 'arulsif' in method: Properties.logger.info( 'Using alpha-relative-uLSIF method for covariate shift correction.' ) arulsif = Alpha_RULSIF() beta = arulsif.R_ULSIF(self.SDataBufferArr, self.TDataBufferArr, self.arulsifAlpha) weightSrcData = np.array(beta).reshape(1, len(beta)) else: print('Incorrect method. Please try again') return SDataBufferArrTransposed = self.SDataBufferArr.T TDataBufferArrTransposed = self.TDataBufferArr.T if self.useSvmCVParams == 1: params = {'gamma': [2**2, 2**-16], 'C': [2**-6, 2**15]} svr = svm.SVC() opt = grid_search.GridSearchCV(svr, params) opt.fit(SDataBufferArrTransposed.tolist(), self.SDataLabels) optParams = opt.best_params_ self.ensemble.generateNewModel(SDataBufferArrTransposed.tolist(), self.SDataLabels, TDataBufferArrTransposed, weightSrcData[0].tolist(), optParams['C'], optParams['gamma'], Properties.svmKernel) else: self.ensemble.generateNewModel( SDataBufferArrTransposed.tolist(), self.SDataLabels, TDataBufferArrTransposed, weightSrcData[0].tolist(), Properties.svmDefC, Properties.svmDefGamma, Properties.svmKernel) Properties.logger.info(self.ensemble.getEnsembleSummary()) tDataIndex = 0 trueTargetNum = 0 targetConfSum = 0 #enoughInstToUpdate is used to see if there are enough instances in the windows to #estimate the weights while self.target.data.shape[1] > tDataIndex: # Target Stream print('#', end="") # '#' indicates new point from target newTargetDataArr = self.target.data[:, tDataIndex][np.newaxis].T # get Target Accuracy on the new instance resTarget = self.ensemble.evaluateEnsemble( np.reshape(newTargetDataArr, (1, -1))) if isinstance(resTarget[0], float) and abs( resTarget[0] - self.target.dataLabels[tDataIndex]) < 0.0001: trueTargetNum += 1 elif resTarget[0] == self.target.dataLabels[tDataIndex]: trueTargetNum += 1 acc = float(trueTargetNum) / (tDataIndex + 1) if (tDataIndex % 100) == 0: Properties.logger.info('\nTotal test instance: ' + str(tDataIndex + 1) + ', correct: ' + str(trueTargetNum) + ', accuracy: ' + str(acc)) fAcc.write(str(acc) + "\n") conf = resTarget[1] # confidence # save confidence targetConfSum += conf fConf.write(str(float(targetConfSum) / (tDataIndex + 1)) + "\n") tDataIndex += 1 #save the timestamp fConf.close() fAcc.close() globalEndTime = time.time() Properties.logger.info('\nGlobal Start Time: ' + datetime.datetime.fromtimestamp(globalEndTime). strftime('%Y-%m-%d %H:%M:%S')) Properties.logger.info('Total Time Spent: ' + str(globalEndTime - globalStartTime) + ' seconds') Properties.logger.info('Done !!')
class Manager(object): def __init__(self, sourceFile, targetFile): self.SWindow = [] self.TWindow = [] self.TPredictWindow = [] self.SDataBuffer = [] #Queue self.TDataBuffer = [] #Queue self.SInitialDataBuffer = [] self.TInitialDataBuffer = [] self.changeDetector = ChangeDetection(Properties.GAMMA, Properties.SENSITIVITY, Properties.MAX_WINDOW_SIZE) self.ensemble = Ensemble(Properties.ENSEMBLE_SIZE) classNameList = [] self.source = Stream(sourceFile, classNameList, Properties.INITIAL_DATA_SIZE) self.target = Stream(targetFile, classNameList, Properties.INITIAL_DATA_SIZE) Properties.MAXVAR = self.source.MAXVAR self.gateway = JavaGateway( start_callback_server=True, gateway_parameters=GatewayParameters(port=Properties.PY4JPORT), callback_server_parameters=CallbackServerParameters( port=Properties.PY4JPORT + 1)) self.app = self.gateway.entry_point """ Detect drift on a given data stream. Returns the change point index on the stream array. """ def __detectDrift(self, slidingWindow, flagStream): changePoint = -1 if flagStream == 0: changePoint = self.changeDetector.detectSourceChange(slidingWindow) elif flagStream == 1: changePoint = self.changeDetector.detectTargetChange(slidingWindow) else: raise Exception('flagStream var has value ' + str(flagStream) + ' that is not supported.') return changePoint def __detectDriftJava(self, slidingWindow, flagStream): changePoint = -1 sw = self.gateway.jvm.java.util.ArrayList() for i in xrange(len(slidingWindow)): sw.append(float(slidingWindow[i])) if flagStream == 0: changePoint = self.app.detectSourceChange(sw) elif flagStream == 1: changePoint = self.app.detectTargetChange(sw) else: raise Exception('flagStream var has value ' + str(flagStream) + ' that is not supported.') # print('ChangePoint = ' + str(changePoint)) return changePoint """ Write value (accuracy or confidence) to a file with DatasetName as an identifier. """ def __saveResult(self, acc, datasetName): with open(datasetName + '_' + Properties.OUTFILENAME, 'a') as f: f.write(str(acc) + '\n') f.close() """ The main method handling MDC logic (using single ensemble). """ def start(self, datasetName): #Get initial data buffer self.SInitialDataBuffer = self.source.initialData self.TInitialDataBuffer = self.target.initialData Properties.logger.info('Initializing Ensemble ...') #source model self.ensemble.generateNewModel(self.SInitialDataBuffer, self.TInitialDataBuffer, True) #target model self.ensemble.generateNewModel(self.SInitialDataBuffer, self.TInitialDataBuffer, False) Properties.logger.info(self.ensemble.getEnsembleSummary()) dataIndex = 0 trueTargetNum = 0 targetConfSum = 0 Properties.logger.info('Starting MDC ...') while len(self.source.data) > dataIndex: print('.', end="") #Source Stream sdata = self.source.data[dataIndex] self.SDataBuffer.append(sdata) resSource = self.ensemble.evaluateEnsemble(sdata, True) self.SWindow.append(resSource[0]) #prediction of 0 or 1 #Target Stream tdata = self.target.data[dataIndex] self.TDataBuffer.append(tdata) resTarget = self.ensemble.evaluateEnsemble(tdata, False) conf = resTarget[1] #confidence # If conf is very close to 0.0 or 1.0, beta probability might become zero, which can make problems in change detection. Handling this scenario. if conf < 0.1: self.TWindow.append(0.1) elif conf > 0.995: self.TWindow.append(0.995) else: self.TWindow.append(resTarget[1]) self.TPredictWindow.append(resTarget[0]) #get Target Accuracy if resTarget[0] == tdata[-1]: trueTargetNum += 1 acc = float(trueTargetNum) / (dataIndex + 1) self.__saveResult(acc, datasetName) #save confidence targetConfSum += conf self.__saveResult( float(targetConfSum) / (dataIndex + 1), datasetName + '_confidence') #Drift detection start = time.time() # srcCP = self.__detectDrift(self.SWindow, 0) # trgCP = self.__detectDrift(self.TWindow, 1) srcCP = self.__detectDriftJava(self.SWindow, 0) trgCP = self.__detectDriftJava(self.TWindow, 1) end = time.time() # print(int(end - start), end="") if srcCP != -1: self.__saveResult(5555555.0, datasetName + '_confidence') Properties.logger.info( '-------------------------- S O U R C E D R I F T ------------------------------------' ) Properties.logger.info('\nDrift found on source stream.') Properties.logger.info('dataIndex=' + str(dataIndex) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP)) #remove data from buffer till srcCP for i in xrange(srcCP): del self.SDataBuffer[0] del self.SWindow[0] #Exception with srcCP=0 (windowsize hit max or avg error is less than cutoff). #Keep atleast cushion number of instances if srcCP == 0: while len(self.SDataBuffer) > Properties.CUSHION: del self.SDataBuffer[0] del self.SWindow[0] Properties.logger.info( 'Instances left in source sliding window : ' + str(len(self.SDataBuffer))) Properties.logger.info( 'Instances left in target sliding window : ' + str(len(self.TDataBuffer))) Properties.logger.info('Updating ensemble weights') self.ensemble.updateWeight(self.SDataBuffer, True) Properties.logger.info('Training a model for source stream') self.ensemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, True) Properties.logger.info(self.ensemble.getEnsembleSummary()) if trgCP != -1: self.__saveResult(7777777.0, datasetName + '_confidence') Properties.logger.info( '-------------------------- T A R G E T D R I F T ------------------------------------' ) Properties.logger.info('Drift found on target stream.') Properties.logger.info('dataIndex=' + str(dataIndex) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP)) #remove data from buffer till trgCP for i in xrange(trgCP): del self.TDataBuffer[0] del self.TWindow[0] del self.TPredictWindow[0] #Exception with trgCP=0 (windowsize hit max or avg error is less than cutoff). #Keep atleast cushion number of instances if trgCP == 0: while len(self.TDataBuffer) > Properties.CUSHION: del self.TDataBuffer[0] del self.TWindow[0] del self.TPredictWindow[0] Properties.logger.info( 'Instances left in source sliding window : ' + str(len(self.SDataBuffer))) Properties.logger.info( 'Instances left in target sliding window : ' + str(len(self.TDataBuffer))) Properties.logger.info('Updating ensemble weights') self.ensemble.updateWeight(self.TDataBuffer, False) Properties.logger.info('Training a model for target stream') self.ensemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, False) Properties.logger.info(self.ensemble.getEnsembleSummary()) dataIndex += 1 if dataIndex % 100 == 0: print('') Properties.logger.info('Done !!') """ Main module for MDC2 logic (using two separate ensembles) """ def start2(self, datasetName): #Get initial data buffer self.SInitialDataBuffer = self.source.initialData self.TInitialDataBuffer = self.target.initialData #Initialize Ensembles srcEnsemble = Ensemble(Properties.ENSEMBLE_SIZE) trgEnsemble = Ensemble(Properties.ENSEMBLE_SIZE) Properties.logger.info('Initializing Ensemble ...') #source model srcEnsemble.generateNewModel(self.SInitialDataBuffer, self.TInitialDataBuffer, True) Properties.logger.info('Source Ensemble') Properties.logger.info(srcEnsemble.getEnsembleSummary()) #target model trgEnsemble.generateNewModel(self.SInitialDataBuffer, self.TInitialDataBuffer, False) Properties.logger.info('Target Ensemble') Properties.logger.info(trgEnsemble.getEnsembleSummary()) dataIndex = 0 trueTargetNum = 0 targetConfSum = 0 Properties.logger.info('Starting MDC2 ...') while (len(self.source.data) > dataIndex): print('.', end="") #Source Stream sdata = self.source.data[dataIndex] self.SDataBuffer.append(sdata) resSource = srcEnsemble.evaluateEnsemble(sdata, True) self.SWindow.append(resSource[0]) #prediction of 0 or 1 #Target Stream tdata = self.target.data[dataIndex] self.TDataBuffer.append(tdata) resTarget = trgEnsemble.evaluateEnsemble(tdata, False) conf = resTarget[1] #confidence # If conf is very close to 0.0 or 1.0, beta probability might become zero, which can make problems in change detection. Handling this scenario. if conf < 0.1: self.TWindow.append(0.1) elif conf > 0.995: self.TWindow.append(0.995) else: self.TWindow.append(resTarget[1]) self.TPredictWindow.append(resTarget[0]) #get Target Accuracy if resTarget[0] == tdata[-1]: trueTargetNum += 1 acc = float(trueTargetNum) / (dataIndex + 1) self.__saveResult(acc, datasetName) #save confidence targetConfSum += conf self.__saveResult( float(targetConfSum) / (dataIndex + 1), datasetName + '_confidence') #Drift detection start = time.time() # srcCP = self.__detectDrift(self.SWindow, 0) # trgCP = self.__detectDrift(self.TWindow, 1) srcCP = self.__detectDriftJava(self.SWindow, 0) trgCP = self.__detectDriftJava(self.TWindow, 1) end = time.time() # print(int(end - start), end="") if srcCP != -1: self.__saveResult(5555555.0, datasetName + '_confidence') Properties.logger.info( '-------------------------- S O U R C E D R I F T ------------------------------------' ) Properties.logger.info('\nDrift found on source stream.') Properties.logger.info('dataIndex=' + str(dataIndex) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP)) #remove data from buffer till srcCP for i in xrange(srcCP): del self.SDataBuffer[0] del self.SWindow[0] #Exception with srcCP=0 (windowsize hit max or avg error is less than cutoff). #Keep atleast cushion number of instances if srcCP == 0: while len(self.SDataBuffer) > Properties.CUSHION: del self.SDataBuffer[0] del self.SWindow[0] Properties.logger.info( 'Instances left in source sliding window : ' + str(len(self.SDataBuffer))) Properties.logger.info( 'Instances left in target sliding window : ' + str(len(self.TDataBuffer))) #Updating source Ensemble Properties.logger.info('Updating source ensemble weights') srcEnsemble.updateWeight(self.SDataBuffer, True) Properties.logger.info('Training a model for source stream') srcEnsemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, True) Properties.logger.info('Source Ensemble') Properties.logger.info(srcEnsemble.getEnsembleSummary()) if trgCP != -1: self.__saveResult(7777777.0, datasetName + '_confidence') Properties.logger.info( '-------------------------- T A R G E T D R I F T ------------------------------------' ) Properties.logger.info('Drift found on target stream.') Properties.logger.info('dataIndex=' + str(dataIndex) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP)) #remove data from buffer till trgCP for i in xrange(trgCP): del self.TDataBuffer[0] del self.TWindow[0] del self.TPredictWindow[0] #Exception with trgCP=0 (windowsize hit max or avg error is less than cutoff). #Keep atleast cushion number of instances if trgCP == 0: while len(self.TDataBuffer) > Properties.CUSHION: del self.TDataBuffer[0] del self.TWindow[0] del self.TPredictWindow[0] Properties.logger.info( 'Instances left in source sliding window : ' + str(len(self.SDataBuffer))) Properties.logger.info( 'Instances left in target sliding window : ' + str(len(self.TDataBuffer))) Properties.logger.info('Updating target ensemble weights') trgEnsemble.updateWeight(self.TDataBuffer, False) Properties.logger.info('Training a model for target stream') trgEnsemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, False) Properties.logger.info('Target Ensemble') Properties.logger.info(trgEnsemble.getEnsembleSummary()) dataIndex += 1 if dataIndex % 100 == 0: print('') Properties.logger.info('Done !!') """ Baseline skmm (single target model with initial train only) """ def start_skmm(self, datasetName): #Get initial data buffer self.SInitialDataBuffer = self.source.initialData self.TInitialDataBuffer = self.target.initialData #Initialize Model model = Model() model.train(self.SInitialDataBuffer, self.TInitialDataBuffer, Properties.MAXVAR) dataIndex = 0 trueTargetNum = 0 Properties.logger.info('Starting skmm baseline ...') while (len(self.source.data) > dataIndex): print('.', end="") #Source Stream sdata = self.source.data[dataIndex] self.SDataBuffer.append(sdata) #Target Stream tdata = self.target.data[dataIndex] self.TDataBuffer.append(tdata) #test data instance in each model resTarget = model.test([tdata], Properties.MAXVAR) #get Target Accuracy if resTarget[0][0] == tdata[-1]: trueTargetNum += 1 acc = float(trueTargetNum) / (dataIndex + 1) self.__saveResult(acc, datasetName) dataIndex += 1 if dataIndex % 100 == 0: print('') Properties.logger.info('Done !!') """ Baseline mkmm (single target model trained periodically) """ def start_mkmm(self, datasetName): #Get initial data buffer self.SInitialDataBuffer = self.source.initialData self.TInitialDataBuffer = self.target.initialData #Initialize Model model = Model() model.train(self.SInitialDataBuffer, self.TInitialDataBuffer, Properties.MAXVAR) dataIndex = 0 trueTargetNum = 0 Properties.logger.info('Starting skmm baseline ...') while (len(self.source.data) > dataIndex): print('.', end="") #Source Stream sdata = self.source.data[dataIndex] self.SDataBuffer.append(sdata) #Target Stream tdata = self.target.data[dataIndex] self.TDataBuffer.append(tdata) #test data instance in each model resTarget = model.test([tdata], Properties.MAXVAR) #get Target Accuracy if resTarget[0][0] == tdata[-1]: trueTargetNum += 1 acc = float(trueTargetNum) / (dataIndex + 1) self.__saveResult(acc, datasetName) dataIndex += 1 if dataIndex % 100 == 0: print('') if dataIndex % Properties.MAX_WINDOW_SIZE == 0: model = Model() model.train(self.SDataBuffer, self.TDataBuffer, Properties.MAXVAR) self.SDataBuffer = [] self.TDataBuffer = [] Properties.logger.info('Done !!') """ Baseline srconly using an ensemble of only source classifiers. Target labels predicted from this ensemble using its target weights. """ def start_srconly(self, datasetName): #Get initial data buffer self.SInitialDataBuffer = self.source.initialData self.TInitialDataBuffer = self.target.initialData #Initialize Ensembles srcEnsemble = Ensemble(Properties.ENSEMBLE_SIZE) Properties.logger.info('Initializing Ensemble ...') #source model srcEnsemble.generateNewModel(self.SInitialDataBuffer, self.TInitialDataBuffer, True) Properties.logger.info('Source Ensemble') Properties.logger.info(srcEnsemble.getEnsembleSummary()) dataIndex = 0 trueTargetNum = 0 targetConfSum = 0 Properties.logger.info('Starting srconly-MDC ...') while (len(self.source.data) > dataIndex): print('.', end="") #Source Stream sdata = self.source.data[dataIndex] self.SDataBuffer.append(sdata) resSource = srcEnsemble.evaluateEnsemble(sdata, True) self.SWindow.append(resSource[0]) #prediction of 0 or 1 #Target Stream tdata = self.target.data[dataIndex] self.TDataBuffer.append(tdata) resTarget = srcEnsemble.evaluateEnsemble(tdata, False) conf = resTarget[1] #confidence # If conf is very close to 0.0 or 1.0, beta probability might become zero, which can make problems in change detection. Handling this scenario. if conf < 0.1: self.TWindow.append(0.1) elif conf > 0.995: self.TWindow.append(0.995) else: self.TWindow.append(resTarget[1]) self.TPredictWindow.append(resTarget[0]) #get Target Accuracy if resTarget[0] == tdata[-1]: trueTargetNum += 1 acc = float(trueTargetNum) / (dataIndex + 1) self.__saveResult(acc, datasetName) #save confidence targetConfSum += conf self.__saveResult( float(targetConfSum) / (dataIndex + 1), datasetName + '_confidence') #Drift detection start = time.time() # srcCP = self.__detectDrift(self.SWindow, 0) # trgCP = self.__detectDrift(self.TWindow, 1) srcCP = self.__detectDriftJava(self.SWindow, 0) trgCP = self.__detectDriftJava(self.TWindow, 1) end = time.time() # print(int(end - start), end="") if srcCP != -1: self.__saveResult(5555555.0, datasetName + '_confidence') Properties.logger.info( '-------------------------- S O U R C E D R I F T ------------------------------------' ) Properties.logger.info('\nDrift found on source stream.') Properties.logger.info('dataIndex=' + str(dataIndex) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP)) #remove data from buffer till srcCP for i in xrange(srcCP): del self.SDataBuffer[0] del self.SWindow[0] #Exception with srcCP=0 (windowsize hit max or avg error is less than cutoff). #Keep atleast cushion number of instances if srcCP == 0: while len(self.SDataBuffer) > Properties.CUSHION: del self.SDataBuffer[0] del self.SWindow[0] Properties.logger.info( 'Instances left in source sliding window : ' + str(len(self.SDataBuffer))) Properties.logger.info( 'Instances left in target sliding window : ' + str(len(self.TDataBuffer))) #Updating source Ensemble Properties.logger.info('Updating source ensemble weights') srcEnsemble.updateWeight(self.SDataBuffer, True) Properties.logger.info('Training a model for source stream') srcEnsemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, True) Properties.logger.info('Source Ensemble') Properties.logger.info(srcEnsemble.getEnsembleSummary()) if trgCP != -1: self.__saveResult(7777777.0, datasetName + '_confidence') Properties.logger.info( '-------------------------- T A R G E T D R I F T ------------------------------------' ) Properties.logger.info('Drift found on target stream.') Properties.logger.info('dataIndex=' + str(dataIndex) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP)) #remove data from buffer till trgCP for i in xrange(trgCP): del self.TDataBuffer[0] del self.TWindow[0] del self.TPredictWindow[0] #Exception with trgCP=0 (windowsize hit max or avg error is less than cutoff). #Keep atleast cushion number of instances if trgCP == 0: while len(self.TDataBuffer) > Properties.CUSHION: del self.TDataBuffer[0] del self.TWindow[0] del self.TPredictWindow[0] Properties.logger.info( 'Instances left in source sliding window : ' + str(len(self.SDataBuffer))) Properties.logger.info( 'Instances left in target sliding window : ' + str(len(self.TDataBuffer))) Properties.logger.info('Updating target ensemble weights') srcEnsemble.updateWeight(self.TDataBuffer, False) Properties.logger.info('Training a model for target stream') srcEnsemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, True) Properties.logger.info('Target Ensemble') Properties.logger.info(srcEnsemble.getEnsembleSummary()) dataIndex += 1 if dataIndex % 100 == 0: print('') Properties.logger.info('Done !!') """ Baseline trgonly using an ensemble of only target classifiers. Target labels predicted from this ensemble using its target weights. Source drift is computed using source-weighted ensemble prediction. """ def start_trgonly(self, datasetName): #Get initial data buffer self.SInitialDataBuffer = self.source.initialData self.TInitialDataBuffer = self.target.initialData #Initialize Ensembles trgEnsemble = Ensemble(Properties.ENSEMBLE_SIZE) Properties.logger.info('Initializing Ensemble ...') #target model trgEnsemble.generateNewModel(self.SInitialDataBuffer, self.TInitialDataBuffer, False) Properties.logger.info('Target Ensemble') Properties.logger.info(trgEnsemble.getEnsembleSummary()) dataIndex = 0 trueTargetNum = 0 targetConfSum = 0 Properties.logger.info('Starting trgonly-MDC ...') while (len(self.source.data) > dataIndex): print('.', end="") #Source Stream sdata = self.source.data[dataIndex] self.SDataBuffer.append(sdata) resSource = trgEnsemble.evaluateEnsemble(sdata, True) self.SWindow.append(resSource[0]) #prediction of 0 or 1 #Target Stream tdata = self.target.data[dataIndex] self.TDataBuffer.append(tdata) resTarget = trgEnsemble.evaluateEnsemble(tdata, False) conf = resTarget[1] #confidence # If conf is very close to 0.0 or 1.0, beta probability might become zero, which can make problems in change detection. Handling this scenario. if conf < 0.1: self.TWindow.append(0.1) elif conf > 0.995: self.TWindow.append(0.995) else: self.TWindow.append(resTarget[1]) self.TPredictWindow.append(resTarget[0]) #get Target Accuracy if resTarget[0] == tdata[-1]: trueTargetNum += 1 acc = float(trueTargetNum) / (dataIndex + 1) self.__saveResult(acc, datasetName) #save confidence targetConfSum += conf self.__saveResult( float(targetConfSum) / (dataIndex + 1), datasetName + '_confidence') #Drift detection start = time.time() # srcCP = self.__detectDrift(self.SWindow, 0) # trgCP = self.__detectDrift(self.TWindow, 1) srcCP = self.__detectDriftJava(self.SWindow, 0) trgCP = self.__detectDriftJava(self.TWindow, 1) end = time.time() # print(int(end - start), end="") if srcCP != -1: self.__saveResult(5555555.0, datasetName + '_confidence') Properties.logger.info( '-------------------------- S O U R C E D R I F T ------------------------------------' ) Properties.logger.info('\nDrift found on source stream.') Properties.logger.info('dataIndex=' + str(dataIndex) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP)) #remove data from buffer till srcCP for i in xrange(srcCP): del self.SDataBuffer[0] del self.SWindow[0] #Exception with srcCP=0 (windowsize hit max or avg error is less than cutoff). #Keep atleast cushion number of instances if srcCP == 0: while len(self.SDataBuffer) > Properties.CUSHION: del self.SDataBuffer[0] del self.SWindow[0] Properties.logger.info( 'Instances left in source sliding window : ' + str(len(self.SDataBuffer))) Properties.logger.info( 'Instances left in target sliding window : ' + str(len(self.TDataBuffer))) #Updating source Ensemble Properties.logger.info('Updating source ensemble weights') trgEnsemble.updateWeight(self.SDataBuffer, True) Properties.logger.info('Training a model for source stream') trgEnsemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, False) Properties.logger.info('Source Ensemble') Properties.logger.info(trgEnsemble.getEnsembleSummary()) if trgCP != -1: self.__saveResult(7777777.0, datasetName + '_confidence') Properties.logger.info( '-------------------------- T A R G E T D R I F T ------------------------------------' ) Properties.logger.info('Drift found on target stream.') Properties.logger.info('dataIndex=' + str(dataIndex) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP)) #remove data from buffer till trgCP for i in xrange(trgCP): del self.TDataBuffer[0] del self.TWindow[0] del self.TPredictWindow[0] #Exception with trgCP=0 (windowsize hit max or avg error is less than cutoff). #Keep atleast cushion number of instances if trgCP == 0: while len(self.TDataBuffer) > Properties.CUSHION: del self.TDataBuffer[0] del self.TWindow[0] del self.TPredictWindow[0] Properties.logger.info( 'Instances left in source sliding window : ' + str(len(self.SDataBuffer))) Properties.logger.info( 'Instances left in target sliding window : ' + str(len(self.TDataBuffer))) Properties.logger.info('Updating target ensemble weights') trgEnsemble.updateWeight(self.TDataBuffer, False) Properties.logger.info('Training a model for target stream') trgEnsemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, False) Properties.logger.info('Target Ensemble') Properties.logger.info(trgEnsemble.getEnsembleSummary()) dataIndex += 1 if dataIndex % 100 == 0: print('') Properties.logger.info('Done !!')