def start_trgonly(self, datasetName): #Get initial data buffer self.SInitialDataBuffer = self.source.initialData self.TInitialDataBuffer = self.target.initialData #Initialize Ensembles trgEnsemble = Ensemble(Properties.ENSEMBLE_SIZE) Properties.logger.info('Initializing Ensemble ...') #target model trgEnsemble.generateNewModel(self.SInitialDataBuffer, self.TInitialDataBuffer, False) Properties.logger.info('Target Ensemble') Properties.logger.info(trgEnsemble.getEnsembleSummary()) dataIndex = 0 trueTargetNum = 0 targetConfSum = 0 Properties.logger.info('Starting trgonly-MDC ...') while (len(self.source.data) > dataIndex): print('.', end="") #Source Stream sdata = self.source.data[dataIndex] self.SDataBuffer.append(sdata) resSource = trgEnsemble.evaluateEnsemble(sdata, True) self.SWindow.append(resSource[0]) #prediction of 0 or 1 #Target Stream tdata = self.target.data[dataIndex] self.TDataBuffer.append(tdata) resTarget = trgEnsemble.evaluateEnsemble(tdata, False) conf = resTarget[1] #confidence # If conf is very close to 0.0 or 1.0, beta probability might become zero, which can make problems in change detection. Handling this scenario. if conf < 0.1: self.TWindow.append(0.1) elif conf > 0.995: self.TWindow.append(0.995) else: self.TWindow.append(resTarget[1]) self.TPredictWindow.append(resTarget[0]) #get Target Accuracy if resTarget[0] == tdata[-1]: trueTargetNum += 1 acc = float(trueTargetNum) / (dataIndex + 1) self.__saveResult(acc, datasetName) #save confidence targetConfSum += conf self.__saveResult( float(targetConfSum) / (dataIndex + 1), datasetName + '_confidence') #Drift detection start = time.time() # srcCP = self.__detectDrift(self.SWindow, 0) # trgCP = self.__detectDrift(self.TWindow, 1) srcCP = self.__detectDriftJava(self.SWindow, 0) trgCP = self.__detectDriftJava(self.TWindow, 1) end = time.time() # print(int(end - start), end="") if srcCP != -1: self.__saveResult(5555555.0, datasetName + '_confidence') Properties.logger.info( '-------------------------- S O U R C E D R I F T ------------------------------------' ) Properties.logger.info('\nDrift found on source stream.') Properties.logger.info('dataIndex=' + str(dataIndex) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP)) #remove data from buffer till srcCP for i in xrange(srcCP): del self.SDataBuffer[0] del self.SWindow[0] #Exception with srcCP=0 (windowsize hit max or avg error is less than cutoff). #Keep atleast cushion number of instances if srcCP == 0: while len(self.SDataBuffer) > Properties.CUSHION: del self.SDataBuffer[0] del self.SWindow[0] Properties.logger.info( 'Instances left in source sliding window : ' + str(len(self.SDataBuffer))) Properties.logger.info( 'Instances left in target sliding window : ' + str(len(self.TDataBuffer))) #Updating source Ensemble Properties.logger.info('Updating source ensemble weights') trgEnsemble.updateWeight(self.SDataBuffer, True) Properties.logger.info('Training a model for source stream') trgEnsemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, False) Properties.logger.info('Source Ensemble') Properties.logger.info(trgEnsemble.getEnsembleSummary()) if trgCP != -1: self.__saveResult(7777777.0, datasetName + '_confidence') Properties.logger.info( '-------------------------- T A R G E T D R I F T ------------------------------------' ) Properties.logger.info('Drift found on target stream.') Properties.logger.info('dataIndex=' + str(dataIndex) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP)) #remove data from buffer till trgCP for i in xrange(trgCP): del self.TDataBuffer[0] del self.TWindow[0] del self.TPredictWindow[0] #Exception with trgCP=0 (windowsize hit max or avg error is less than cutoff). #Keep atleast cushion number of instances if trgCP == 0: while len(self.TDataBuffer) > Properties.CUSHION: del self.TDataBuffer[0] del self.TWindow[0] del self.TPredictWindow[0] Properties.logger.info( 'Instances left in source sliding window : ' + str(len(self.SDataBuffer))) Properties.logger.info( 'Instances left in target sliding window : ' + str(len(self.TDataBuffer))) Properties.logger.info('Updating target ensemble weights') trgEnsemble.updateWeight(self.TDataBuffer, False) Properties.logger.info('Training a model for target stream') trgEnsemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, False) Properties.logger.info('Target Ensemble') Properties.logger.info(trgEnsemble.getEnsembleSummary()) dataIndex += 1 if dataIndex % 100 == 0: print('') Properties.logger.info('Done !!')
class Manager(object): def __init__(self, sourceFile, targetFile): self.SDataBufferArr = None #2D array representation of self.SDataBuffer self.SDataLabels = None self.TDataBufferArr = None #2D array representation of self.TDataBuffer self.TDataLabels = None self.useKliepCVSigma = Properties.useKliepCVSigma self.arulsifAlpha = Properties.arulsifAlpha self.useSvmCVParams = Properties.useSvmCVParams self.ensemble = Ensemble(Properties.ENSEMBLE_SIZE) self.initialWindowSize = int(Properties.INITIAL_DATA_SIZE) self.maxWindowSize = int(Properties.MAX_WINDOW_SIZE) self.enableForceUpdate = int(Properties.enableForceUpdate) self.forceUpdatePeriod = int(Properties.forceUpdatePeriod) """ - simulate source and target streams from corresponding files. """ print("Reading the Source Dataset") self.source = Stream(sourceFile, Properties.INITIAL_DATA_SIZE) print("Reading the Target Dataset") self.target = Stream(targetFile, Properties.INITIAL_DATA_SIZE) print("Finished Reading the Target Dataset") Properties.MAXVAR = self.source.data.shape[0] """ Write value (accuracy or confidence) to a file with DatasetName as an identifier. """ def __saveResult(self, acc, datasetName): with open(datasetName + '_' + Properties.OUTFILENAME, 'a') as f: f.write(str(acc) + "\n") f.close() def convListOfDictToNDArray(self, listOfDict): arrayRep = [] if not listOfDict: return arrayRep arrayRep = np.array([[float(v)] for k, v in listOfDict[0].items() if k != -1]) for i in range(1, len(listOfDict)): arrayRep = np.append(arrayRep, np.array([[float(v)] for k, v in listOfDict[i].items() if k != -1]), axis=1) return arrayRep def collectLabels(self, listOfDict): labels = [] for d in listOfDict: labels.append(str(d[-1])) return labels """ The main method handling multistream classification using KLIEP. """ def startClassification(self, datasetName, method='kliep'): #save the timestamp globalStartTime = time.time() Properties.logger.info('Global Start Time: ' + datetime.datetime.fromtimestamp(globalStartTime) .strftime('%Y-%m-%d %H:%M:%S')) #open files for saving accuracy and confidence fAcc = open(datasetName + '_' + Properties.OUTFILENAME, 'w') fConf = open( datasetName + '_confidence' + '_' + Properties.OUTFILENAME, 'w') #Get data buffer self.SDataBufferArr = self.source.data self.SDataLabels = self.source.dataLabels self.TDataBufferArr = self.target.data # now resize the windows appropriately self.SDataBufferArr = self.SDataBufferArr[:, -Properties.MAX_WINDOW_SIZE:] self.SDataLabels = self.SDataLabels[-Properties.MAX_WINDOW_SIZE:] self.TDataBufferArr = self.TDataBufferArr[:, -Properties.MAX_WINDOW_SIZE:] weightSrcData = np.zeros(shape=(1, len(self.SDataBufferArr))) if 'kliep' in method: Properties.logger.info( 'Using KLIEP method for covariate shift correction.') # initialize gaussian models gmodel = gm.GaussianModel() #first choose a suitable value for sigma kliep = Kliep(Properties.kliepParEta, Properties.kliepParLambda, Properties.kliepParB, Properties.kliepParThreshold, Properties.kliepDefSigma) if self.useKliepCVSigma == 1: kliep.kliepDefSigma = kliep.chooseSigma( self.SDataBufferArr, self.TDataBufferArr) #calculate alpha values #self.kliep.kliepDefSigma = 0.1 Properties.logger.info('Estimating initial DRM') gmodel.alphah, kernelMatSrcData, kernelMatTrgData, gmodel.refPoints = kliep.KLIEP( self.SDataBufferArr, self.TDataBufferArr) #initialize the updated gaussian model kernelMatSrcData = kernelMatSrcData[ -Properties.MAX_WINDOW_SIZE:, :] kernelMatTrgData = kernelMatTrgData[ -Properties.MAX_WINDOW_SIZE:, :] Properties.logger.info( 'Initializing Ensemble with the first model') #target model #first calculate weight for source instances weightSrcData = kliep.calcInstanceWeights(kernelMatSrcData, gmodel.alphah) #since weightSrcData is a column matrix, convert it to a list before sending to generating new model elif 'kmm' in method: Properties.logger.info( 'Using KMM method for covariate shift correction.') kmm = KMM() gammab = kmm.computeKernelWidth(self.SDataBufferArr) Xtrain = self.SDataBufferArr.T.tolist() Xtest = self.TDataBufferArr.T.tolist() beta = kmm.kmm(Xtrain, Xtest, gammab) weightSrcData = np.array(beta).reshape(1, len(beta)) elif 'arulsif' in method: Properties.logger.info( 'Using alpha-relative-uLSIF method for covariate shift correction.' ) arulsif = Alpha_RULSIF() beta = arulsif.R_ULSIF(self.SDataBufferArr, self.TDataBufferArr, self.arulsifAlpha) weightSrcData = np.array(beta).reshape(1, len(beta)) else: print('Incorrect method. Please try again') return SDataBufferArrTransposed = self.SDataBufferArr.T TDataBufferArrTransposed = self.TDataBufferArr.T if self.useSvmCVParams == 1: params = {'gamma': [2**2, 2**-16], 'C': [2**-6, 2**15]} svr = svm.SVC() opt = grid_search.GridSearchCV(svr, params) opt.fit(SDataBufferArrTransposed.tolist(), self.SDataLabels) optParams = opt.best_params_ self.ensemble.generateNewModel(SDataBufferArrTransposed.tolist(), self.SDataLabels, TDataBufferArrTransposed, weightSrcData[0].tolist(), optParams['C'], optParams['gamma'], Properties.svmKernel) else: self.ensemble.generateNewModel( SDataBufferArrTransposed.tolist(), self.SDataLabels, TDataBufferArrTransposed, weightSrcData[0].tolist(), Properties.svmDefC, Properties.svmDefGamma, Properties.svmKernel) Properties.logger.info(self.ensemble.getEnsembleSummary()) tDataIndex = 0 trueTargetNum = 0 targetConfSum = 0 #enoughInstToUpdate is used to see if there are enough instances in the windows to #estimate the weights while self.target.data.shape[1] > tDataIndex: # Target Stream print('#', end="") # '#' indicates new point from target newTargetDataArr = self.target.data[:, tDataIndex][np.newaxis].T # get Target Accuracy on the new instance resTarget = self.ensemble.evaluateEnsemble( np.reshape(newTargetDataArr, (1, -1))) if isinstance(resTarget[0], float) and abs( resTarget[0] - self.target.dataLabels[tDataIndex]) < 0.0001: trueTargetNum += 1 elif resTarget[0] == self.target.dataLabels[tDataIndex]: trueTargetNum += 1 acc = float(trueTargetNum) / (tDataIndex + 1) if (tDataIndex % 100) == 0: Properties.logger.info('\nTotal test instance: ' + str(tDataIndex + 1) + ', correct: ' + str(trueTargetNum) + ', accuracy: ' + str(acc)) fAcc.write(str(acc) + "\n") conf = resTarget[1] # confidence # save confidence targetConfSum += conf fConf.write(str(float(targetConfSum) / (tDataIndex + 1)) + "\n") tDataIndex += 1 #save the timestamp fConf.close() fAcc.close() globalEndTime = time.time() Properties.logger.info('\nGlobal Start Time: ' + datetime.datetime.fromtimestamp(globalEndTime). strftime('%Y-%m-%d %H:%M:%S')) Properties.logger.info('Total Time Spent: ' + str(globalEndTime - globalStartTime) + ' seconds') Properties.logger.info('Done !!')
class Manager(object): def __init__(self, sourceFile, targetFile): self.SWindow = [] self.TWindow = [] self.TPredictWindow = [] self.SDataBuffer = [] #Queue self.TDataBuffer = [] #Queue self.SInitialDataBuffer = [] self.TInitialDataBuffer = [] self.changeDetector = ChangeDetection(Properties.GAMMA, Properties.SENSITIVITY, Properties.MAX_WINDOW_SIZE) self.ensemble = Ensemble(Properties.ENSEMBLE_SIZE) classNameList = [] self.source = Stream(sourceFile, classNameList, Properties.INITIAL_DATA_SIZE) self.target = Stream(targetFile, classNameList, Properties.INITIAL_DATA_SIZE) Properties.MAXVAR = self.source.MAXVAR self.gateway = JavaGateway( start_callback_server=True, gateway_parameters=GatewayParameters(port=Properties.PY4JPORT), callback_server_parameters=CallbackServerParameters( port=Properties.PY4JPORT + 1)) self.app = self.gateway.entry_point """ Detect drift on a given data stream. Returns the change point index on the stream array. """ def __detectDrift(self, slidingWindow, flagStream): changePoint = -1 if flagStream == 0: changePoint = self.changeDetector.detectSourceChange(slidingWindow) elif flagStream == 1: changePoint = self.changeDetector.detectTargetChange(slidingWindow) else: raise Exception('flagStream var has value ' + str(flagStream) + ' that is not supported.') return changePoint def __detectDriftJava(self, slidingWindow, flagStream): changePoint = -1 sw = self.gateway.jvm.java.util.ArrayList() for i in xrange(len(slidingWindow)): sw.append(float(slidingWindow[i])) if flagStream == 0: changePoint = self.app.detectSourceChange(sw) elif flagStream == 1: changePoint = self.app.detectTargetChange(sw) else: raise Exception('flagStream var has value ' + str(flagStream) + ' that is not supported.') # print('ChangePoint = ' + str(changePoint)) return changePoint """ Write value (accuracy or confidence) to a file with DatasetName as an identifier. """ def __saveResult(self, acc, datasetName): with open(datasetName + '_' + Properties.OUTFILENAME, 'a') as f: f.write(str(acc) + '\n') f.close() """ The main method handling MDC logic (using single ensemble). """ def start(self, datasetName): #Get initial data buffer self.SInitialDataBuffer = self.source.initialData self.TInitialDataBuffer = self.target.initialData Properties.logger.info('Initializing Ensemble ...') #source model self.ensemble.generateNewModel(self.SInitialDataBuffer, self.TInitialDataBuffer, True) #target model self.ensemble.generateNewModel(self.SInitialDataBuffer, self.TInitialDataBuffer, False) Properties.logger.info(self.ensemble.getEnsembleSummary()) dataIndex = 0 trueTargetNum = 0 targetConfSum = 0 Properties.logger.info('Starting MDC ...') while len(self.source.data) > dataIndex: print('.', end="") #Source Stream sdata = self.source.data[dataIndex] self.SDataBuffer.append(sdata) resSource = self.ensemble.evaluateEnsemble(sdata, True) self.SWindow.append(resSource[0]) #prediction of 0 or 1 #Target Stream tdata = self.target.data[dataIndex] self.TDataBuffer.append(tdata) resTarget = self.ensemble.evaluateEnsemble(tdata, False) conf = resTarget[1] #confidence # If conf is very close to 0.0 or 1.0, beta probability might become zero, which can make problems in change detection. Handling this scenario. if conf < 0.1: self.TWindow.append(0.1) elif conf > 0.995: self.TWindow.append(0.995) else: self.TWindow.append(resTarget[1]) self.TPredictWindow.append(resTarget[0]) #get Target Accuracy if resTarget[0] == tdata[-1]: trueTargetNum += 1 acc = float(trueTargetNum) / (dataIndex + 1) self.__saveResult(acc, datasetName) #save confidence targetConfSum += conf self.__saveResult( float(targetConfSum) / (dataIndex + 1), datasetName + '_confidence') #Drift detection start = time.time() # srcCP = self.__detectDrift(self.SWindow, 0) # trgCP = self.__detectDrift(self.TWindow, 1) srcCP = self.__detectDriftJava(self.SWindow, 0) trgCP = self.__detectDriftJava(self.TWindow, 1) end = time.time() # print(int(end - start), end="") if srcCP != -1: self.__saveResult(5555555.0, datasetName + '_confidence') Properties.logger.info( '-------------------------- S O U R C E D R I F T ------------------------------------' ) Properties.logger.info('\nDrift found on source stream.') Properties.logger.info('dataIndex=' + str(dataIndex) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP)) #remove data from buffer till srcCP for i in xrange(srcCP): del self.SDataBuffer[0] del self.SWindow[0] #Exception with srcCP=0 (windowsize hit max or avg error is less than cutoff). #Keep atleast cushion number of instances if srcCP == 0: while len(self.SDataBuffer) > Properties.CUSHION: del self.SDataBuffer[0] del self.SWindow[0] Properties.logger.info( 'Instances left in source sliding window : ' + str(len(self.SDataBuffer))) Properties.logger.info( 'Instances left in target sliding window : ' + str(len(self.TDataBuffer))) Properties.logger.info('Updating ensemble weights') self.ensemble.updateWeight(self.SDataBuffer, True) Properties.logger.info('Training a model for source stream') self.ensemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, True) Properties.logger.info(self.ensemble.getEnsembleSummary()) if trgCP != -1: self.__saveResult(7777777.0, datasetName + '_confidence') Properties.logger.info( '-------------------------- T A R G E T D R I F T ------------------------------------' ) Properties.logger.info('Drift found on target stream.') Properties.logger.info('dataIndex=' + str(dataIndex) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP)) #remove data from buffer till trgCP for i in xrange(trgCP): del self.TDataBuffer[0] del self.TWindow[0] del self.TPredictWindow[0] #Exception with trgCP=0 (windowsize hit max or avg error is less than cutoff). #Keep atleast cushion number of instances if trgCP == 0: while len(self.TDataBuffer) > Properties.CUSHION: del self.TDataBuffer[0] del self.TWindow[0] del self.TPredictWindow[0] Properties.logger.info( 'Instances left in source sliding window : ' + str(len(self.SDataBuffer))) Properties.logger.info( 'Instances left in target sliding window : ' + str(len(self.TDataBuffer))) Properties.logger.info('Updating ensemble weights') self.ensemble.updateWeight(self.TDataBuffer, False) Properties.logger.info('Training a model for target stream') self.ensemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, False) Properties.logger.info(self.ensemble.getEnsembleSummary()) dataIndex += 1 if dataIndex % 100 == 0: print('') Properties.logger.info('Done !!') """ Main module for MDC2 logic (using two separate ensembles) """ def start2(self, datasetName): #Get initial data buffer self.SInitialDataBuffer = self.source.initialData self.TInitialDataBuffer = self.target.initialData #Initialize Ensembles srcEnsemble = Ensemble(Properties.ENSEMBLE_SIZE) trgEnsemble = Ensemble(Properties.ENSEMBLE_SIZE) Properties.logger.info('Initializing Ensemble ...') #source model srcEnsemble.generateNewModel(self.SInitialDataBuffer, self.TInitialDataBuffer, True) Properties.logger.info('Source Ensemble') Properties.logger.info(srcEnsemble.getEnsembleSummary()) #target model trgEnsemble.generateNewModel(self.SInitialDataBuffer, self.TInitialDataBuffer, False) Properties.logger.info('Target Ensemble') Properties.logger.info(trgEnsemble.getEnsembleSummary()) dataIndex = 0 trueTargetNum = 0 targetConfSum = 0 Properties.logger.info('Starting MDC2 ...') while (len(self.source.data) > dataIndex): print('.', end="") #Source Stream sdata = self.source.data[dataIndex] self.SDataBuffer.append(sdata) resSource = srcEnsemble.evaluateEnsemble(sdata, True) self.SWindow.append(resSource[0]) #prediction of 0 or 1 #Target Stream tdata = self.target.data[dataIndex] self.TDataBuffer.append(tdata) resTarget = trgEnsemble.evaluateEnsemble(tdata, False) conf = resTarget[1] #confidence # If conf is very close to 0.0 or 1.0, beta probability might become zero, which can make problems in change detection. Handling this scenario. if conf < 0.1: self.TWindow.append(0.1) elif conf > 0.995: self.TWindow.append(0.995) else: self.TWindow.append(resTarget[1]) self.TPredictWindow.append(resTarget[0]) #get Target Accuracy if resTarget[0] == tdata[-1]: trueTargetNum += 1 acc = float(trueTargetNum) / (dataIndex + 1) self.__saveResult(acc, datasetName) #save confidence targetConfSum += conf self.__saveResult( float(targetConfSum) / (dataIndex + 1), datasetName + '_confidence') #Drift detection start = time.time() # srcCP = self.__detectDrift(self.SWindow, 0) # trgCP = self.__detectDrift(self.TWindow, 1) srcCP = self.__detectDriftJava(self.SWindow, 0) trgCP = self.__detectDriftJava(self.TWindow, 1) end = time.time() # print(int(end - start), end="") if srcCP != -1: self.__saveResult(5555555.0, datasetName + '_confidence') Properties.logger.info( '-------------------------- S O U R C E D R I F T ------------------------------------' ) Properties.logger.info('\nDrift found on source stream.') Properties.logger.info('dataIndex=' + str(dataIndex) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP)) #remove data from buffer till srcCP for i in xrange(srcCP): del self.SDataBuffer[0] del self.SWindow[0] #Exception with srcCP=0 (windowsize hit max or avg error is less than cutoff). #Keep atleast cushion number of instances if srcCP == 0: while len(self.SDataBuffer) > Properties.CUSHION: del self.SDataBuffer[0] del self.SWindow[0] Properties.logger.info( 'Instances left in source sliding window : ' + str(len(self.SDataBuffer))) Properties.logger.info( 'Instances left in target sliding window : ' + str(len(self.TDataBuffer))) #Updating source Ensemble Properties.logger.info('Updating source ensemble weights') srcEnsemble.updateWeight(self.SDataBuffer, True) Properties.logger.info('Training a model for source stream') srcEnsemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, True) Properties.logger.info('Source Ensemble') Properties.logger.info(srcEnsemble.getEnsembleSummary()) if trgCP != -1: self.__saveResult(7777777.0, datasetName + '_confidence') Properties.logger.info( '-------------------------- T A R G E T D R I F T ------------------------------------' ) Properties.logger.info('Drift found on target stream.') Properties.logger.info('dataIndex=' + str(dataIndex) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP)) #remove data from buffer till trgCP for i in xrange(trgCP): del self.TDataBuffer[0] del self.TWindow[0] del self.TPredictWindow[0] #Exception with trgCP=0 (windowsize hit max or avg error is less than cutoff). #Keep atleast cushion number of instances if trgCP == 0: while len(self.TDataBuffer) > Properties.CUSHION: del self.TDataBuffer[0] del self.TWindow[0] del self.TPredictWindow[0] Properties.logger.info( 'Instances left in source sliding window : ' + str(len(self.SDataBuffer))) Properties.logger.info( 'Instances left in target sliding window : ' + str(len(self.TDataBuffer))) Properties.logger.info('Updating target ensemble weights') trgEnsemble.updateWeight(self.TDataBuffer, False) Properties.logger.info('Training a model for target stream') trgEnsemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, False) Properties.logger.info('Target Ensemble') Properties.logger.info(trgEnsemble.getEnsembleSummary()) dataIndex += 1 if dataIndex % 100 == 0: print('') Properties.logger.info('Done !!') """ Baseline skmm (single target model with initial train only) """ def start_skmm(self, datasetName): #Get initial data buffer self.SInitialDataBuffer = self.source.initialData self.TInitialDataBuffer = self.target.initialData #Initialize Model model = Model() model.train(self.SInitialDataBuffer, self.TInitialDataBuffer, Properties.MAXVAR) dataIndex = 0 trueTargetNum = 0 Properties.logger.info('Starting skmm baseline ...') while (len(self.source.data) > dataIndex): print('.', end="") #Source Stream sdata = self.source.data[dataIndex] self.SDataBuffer.append(sdata) #Target Stream tdata = self.target.data[dataIndex] self.TDataBuffer.append(tdata) #test data instance in each model resTarget = model.test([tdata], Properties.MAXVAR) #get Target Accuracy if resTarget[0][0] == tdata[-1]: trueTargetNum += 1 acc = float(trueTargetNum) / (dataIndex + 1) self.__saveResult(acc, datasetName) dataIndex += 1 if dataIndex % 100 == 0: print('') Properties.logger.info('Done !!') """ Baseline mkmm (single target model trained periodically) """ def start_mkmm(self, datasetName): #Get initial data buffer self.SInitialDataBuffer = self.source.initialData self.TInitialDataBuffer = self.target.initialData #Initialize Model model = Model() model.train(self.SInitialDataBuffer, self.TInitialDataBuffer, Properties.MAXVAR) dataIndex = 0 trueTargetNum = 0 Properties.logger.info('Starting skmm baseline ...') while (len(self.source.data) > dataIndex): print('.', end="") #Source Stream sdata = self.source.data[dataIndex] self.SDataBuffer.append(sdata) #Target Stream tdata = self.target.data[dataIndex] self.TDataBuffer.append(tdata) #test data instance in each model resTarget = model.test([tdata], Properties.MAXVAR) #get Target Accuracy if resTarget[0][0] == tdata[-1]: trueTargetNum += 1 acc = float(trueTargetNum) / (dataIndex + 1) self.__saveResult(acc, datasetName) dataIndex += 1 if dataIndex % 100 == 0: print('') if dataIndex % Properties.MAX_WINDOW_SIZE == 0: model = Model() model.train(self.SDataBuffer, self.TDataBuffer, Properties.MAXVAR) self.SDataBuffer = [] self.TDataBuffer = [] Properties.logger.info('Done !!') """ Baseline srconly using an ensemble of only source classifiers. Target labels predicted from this ensemble using its target weights. """ def start_srconly(self, datasetName): #Get initial data buffer self.SInitialDataBuffer = self.source.initialData self.TInitialDataBuffer = self.target.initialData #Initialize Ensembles srcEnsemble = Ensemble(Properties.ENSEMBLE_SIZE) Properties.logger.info('Initializing Ensemble ...') #source model srcEnsemble.generateNewModel(self.SInitialDataBuffer, self.TInitialDataBuffer, True) Properties.logger.info('Source Ensemble') Properties.logger.info(srcEnsemble.getEnsembleSummary()) dataIndex = 0 trueTargetNum = 0 targetConfSum = 0 Properties.logger.info('Starting srconly-MDC ...') while (len(self.source.data) > dataIndex): print('.', end="") #Source Stream sdata = self.source.data[dataIndex] self.SDataBuffer.append(sdata) resSource = srcEnsemble.evaluateEnsemble(sdata, True) self.SWindow.append(resSource[0]) #prediction of 0 or 1 #Target Stream tdata = self.target.data[dataIndex] self.TDataBuffer.append(tdata) resTarget = srcEnsemble.evaluateEnsemble(tdata, False) conf = resTarget[1] #confidence # If conf is very close to 0.0 or 1.0, beta probability might become zero, which can make problems in change detection. Handling this scenario. if conf < 0.1: self.TWindow.append(0.1) elif conf > 0.995: self.TWindow.append(0.995) else: self.TWindow.append(resTarget[1]) self.TPredictWindow.append(resTarget[0]) #get Target Accuracy if resTarget[0] == tdata[-1]: trueTargetNum += 1 acc = float(trueTargetNum) / (dataIndex + 1) self.__saveResult(acc, datasetName) #save confidence targetConfSum += conf self.__saveResult( float(targetConfSum) / (dataIndex + 1), datasetName + '_confidence') #Drift detection start = time.time() # srcCP = self.__detectDrift(self.SWindow, 0) # trgCP = self.__detectDrift(self.TWindow, 1) srcCP = self.__detectDriftJava(self.SWindow, 0) trgCP = self.__detectDriftJava(self.TWindow, 1) end = time.time() # print(int(end - start), end="") if srcCP != -1: self.__saveResult(5555555.0, datasetName + '_confidence') Properties.logger.info( '-------------------------- S O U R C E D R I F T ------------------------------------' ) Properties.logger.info('\nDrift found on source stream.') Properties.logger.info('dataIndex=' + str(dataIndex) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP)) #remove data from buffer till srcCP for i in xrange(srcCP): del self.SDataBuffer[0] del self.SWindow[0] #Exception with srcCP=0 (windowsize hit max or avg error is less than cutoff). #Keep atleast cushion number of instances if srcCP == 0: while len(self.SDataBuffer) > Properties.CUSHION: del self.SDataBuffer[0] del self.SWindow[0] Properties.logger.info( 'Instances left in source sliding window : ' + str(len(self.SDataBuffer))) Properties.logger.info( 'Instances left in target sliding window : ' + str(len(self.TDataBuffer))) #Updating source Ensemble Properties.logger.info('Updating source ensemble weights') srcEnsemble.updateWeight(self.SDataBuffer, True) Properties.logger.info('Training a model for source stream') srcEnsemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, True) Properties.logger.info('Source Ensemble') Properties.logger.info(srcEnsemble.getEnsembleSummary()) if trgCP != -1: self.__saveResult(7777777.0, datasetName + '_confidence') Properties.logger.info( '-------------------------- T A R G E T D R I F T ------------------------------------' ) Properties.logger.info('Drift found on target stream.') Properties.logger.info('dataIndex=' + str(dataIndex) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP)) #remove data from buffer till trgCP for i in xrange(trgCP): del self.TDataBuffer[0] del self.TWindow[0] del self.TPredictWindow[0] #Exception with trgCP=0 (windowsize hit max or avg error is less than cutoff). #Keep atleast cushion number of instances if trgCP == 0: while len(self.TDataBuffer) > Properties.CUSHION: del self.TDataBuffer[0] del self.TWindow[0] del self.TPredictWindow[0] Properties.logger.info( 'Instances left in source sliding window : ' + str(len(self.SDataBuffer))) Properties.logger.info( 'Instances left in target sliding window : ' + str(len(self.TDataBuffer))) Properties.logger.info('Updating target ensemble weights') srcEnsemble.updateWeight(self.TDataBuffer, False) Properties.logger.info('Training a model for target stream') srcEnsemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, True) Properties.logger.info('Target Ensemble') Properties.logger.info(srcEnsemble.getEnsembleSummary()) dataIndex += 1 if dataIndex % 100 == 0: print('') Properties.logger.info('Done !!') """ Baseline trgonly using an ensemble of only target classifiers. Target labels predicted from this ensemble using its target weights. Source drift is computed using source-weighted ensemble prediction. """ def start_trgonly(self, datasetName): #Get initial data buffer self.SInitialDataBuffer = self.source.initialData self.TInitialDataBuffer = self.target.initialData #Initialize Ensembles trgEnsemble = Ensemble(Properties.ENSEMBLE_SIZE) Properties.logger.info('Initializing Ensemble ...') #target model trgEnsemble.generateNewModel(self.SInitialDataBuffer, self.TInitialDataBuffer, False) Properties.logger.info('Target Ensemble') Properties.logger.info(trgEnsemble.getEnsembleSummary()) dataIndex = 0 trueTargetNum = 0 targetConfSum = 0 Properties.logger.info('Starting trgonly-MDC ...') while (len(self.source.data) > dataIndex): print('.', end="") #Source Stream sdata = self.source.data[dataIndex] self.SDataBuffer.append(sdata) resSource = trgEnsemble.evaluateEnsemble(sdata, True) self.SWindow.append(resSource[0]) #prediction of 0 or 1 #Target Stream tdata = self.target.data[dataIndex] self.TDataBuffer.append(tdata) resTarget = trgEnsemble.evaluateEnsemble(tdata, False) conf = resTarget[1] #confidence # If conf is very close to 0.0 or 1.0, beta probability might become zero, which can make problems in change detection. Handling this scenario. if conf < 0.1: self.TWindow.append(0.1) elif conf > 0.995: self.TWindow.append(0.995) else: self.TWindow.append(resTarget[1]) self.TPredictWindow.append(resTarget[0]) #get Target Accuracy if resTarget[0] == tdata[-1]: trueTargetNum += 1 acc = float(trueTargetNum) / (dataIndex + 1) self.__saveResult(acc, datasetName) #save confidence targetConfSum += conf self.__saveResult( float(targetConfSum) / (dataIndex + 1), datasetName + '_confidence') #Drift detection start = time.time() # srcCP = self.__detectDrift(self.SWindow, 0) # trgCP = self.__detectDrift(self.TWindow, 1) srcCP = self.__detectDriftJava(self.SWindow, 0) trgCP = self.__detectDriftJava(self.TWindow, 1) end = time.time() # print(int(end - start), end="") if srcCP != -1: self.__saveResult(5555555.0, datasetName + '_confidence') Properties.logger.info( '-------------------------- S O U R C E D R I F T ------------------------------------' ) Properties.logger.info('\nDrift found on source stream.') Properties.logger.info('dataIndex=' + str(dataIndex) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP)) #remove data from buffer till srcCP for i in xrange(srcCP): del self.SDataBuffer[0] del self.SWindow[0] #Exception with srcCP=0 (windowsize hit max or avg error is less than cutoff). #Keep atleast cushion number of instances if srcCP == 0: while len(self.SDataBuffer) > Properties.CUSHION: del self.SDataBuffer[0] del self.SWindow[0] Properties.logger.info( 'Instances left in source sliding window : ' + str(len(self.SDataBuffer))) Properties.logger.info( 'Instances left in target sliding window : ' + str(len(self.TDataBuffer))) #Updating source Ensemble Properties.logger.info('Updating source ensemble weights') trgEnsemble.updateWeight(self.SDataBuffer, True) Properties.logger.info('Training a model for source stream') trgEnsemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, False) Properties.logger.info('Source Ensemble') Properties.logger.info(trgEnsemble.getEnsembleSummary()) if trgCP != -1: self.__saveResult(7777777.0, datasetName + '_confidence') Properties.logger.info( '-------------------------- T A R G E T D R I F T ------------------------------------' ) Properties.logger.info('Drift found on target stream.') Properties.logger.info('dataIndex=' + str(dataIndex) + '\tsrcCP=' + str(srcCP) + '\ttrgCP=' + str(trgCP)) #remove data from buffer till trgCP for i in xrange(trgCP): del self.TDataBuffer[0] del self.TWindow[0] del self.TPredictWindow[0] #Exception with trgCP=0 (windowsize hit max or avg error is less than cutoff). #Keep atleast cushion number of instances if trgCP == 0: while len(self.TDataBuffer) > Properties.CUSHION: del self.TDataBuffer[0] del self.TWindow[0] del self.TPredictWindow[0] Properties.logger.info( 'Instances left in source sliding window : ' + str(len(self.SDataBuffer))) Properties.logger.info( 'Instances left in target sliding window : ' + str(len(self.TDataBuffer))) Properties.logger.info('Updating target ensemble weights') trgEnsemble.updateWeight(self.TDataBuffer, False) Properties.logger.info('Training a model for target stream') trgEnsemble.generateNewModel(self.SDataBuffer, self.TDataBuffer, False) Properties.logger.info('Target Ensemble') Properties.logger.info(trgEnsemble.getEnsembleSummary()) dataIndex += 1 if dataIndex % 100 == 0: print('') Properties.logger.info('Done !!')