def local_process(local_input_list, thread, q): filesNames = local_input_list[0] dataPath = local_input_list[1] AIType = local_input_list[2] localMaMain = MlMain() localDataCollector = MlDataCollector() bsm_list = [] for i in tqdm(range(0, int(len(filesNames) * 1.0))): s = filesNames[i] if s.endswith(".bsm"): bsmJsonString = open(dataPath + '/' + s, 'r').read() bsmJsom = json.loads(bsmJsonString) bsm_list.append(bsmJsom) if s.endswith(".lbsm"): bsmJsonString = open(dataPath + '/' + s, 'r').read() listBsmJsom = json.loads(bsmJsonString) for bsmJsom in listBsmJsom: bsm_list.append(bsmJsom) #print(bsm_list[0]['BsmPrint']['Metadata']['generationTime']) #print(bsm_list[-1]['BsmPrint']['Metadata']['generationTime']) bsm_list.sort(key=localMaMain.extract_time, reverse=True) #print(bsm_list[0]['BsmPrint']['Metadata']['generationTime']) #print(bsm_list[-1]['BsmPrint']['Metadata']['generationTime']) for i in tqdm(range(len(bsm_list) - 1, -1, -1)): curArray = localMaMain.getNodeArray(bsm_list[i], AIType) localDataCollector.collectData(curArray) del bsm_list[i] if thread: q[1] = [localDataCollector] else: return localDataCollector
class MlMain: initiated = False curDateStr = datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S") DataCollector = MlDataCollector() Trainer = MlTrainer() Storage = MlArrayStorage() arrayLength = 5 collectDur = 0 deltaCall = 1000 clf = None savePath = './saveFile/saveFile_Mix' #dataPath = '/media/sca-team/ef5ca73c-c8ef-4e03-a88c-a54bcbb15b0e/DataF2MD/Test' dataPath = '/media/sca-team/DATA/DataF2MD/IRT-BSMS-MIX-V1/MDBsms_2018-11-6_19:19:15' #dataPath = '/media/sca-team/ef5ca73c-c8ef-4e03-a88c-a54bcbb15b0e/DataF2MD/IRT-BSMS-MIX-V2/MDBsms_2018-11-5_15:22:52' def init(self, version, AIType): self.savePath = self.savePath + '_' + str(version) self.DataCollector.setCurDateSrt(self.curDateStr) self.DataCollector.setSavePath(self.savePath) self.Trainer.setCurDateSrt(self.curDateStr) self.Trainer.setSavePath(self.savePath) self.Trainer.setAIType(AIType) self.trainedModelExists(AIType) if RTreadDataFromFile: self.ReadDataFromFile(AIType) def mlMain(self): version = "V1" AIType = "neural_network" if not self.initiated: self.init(version, AIType) self.initiated = True return False def trainedModelExists(self, AIType): #filesNames = [f.name for f in scandir(self.savePath) if isfile(join(self.savePath, f.name))] filesNames = [ f for f in tqdm(os.listdir(self.savePath)) if os.path.isfile(join(self.savePath, f)) ] print("trainedModelExists?") for s in filesNames: if s.startswith('clf_' + AIType) and s.endswith(".pkl"): self.curDateStr = s[-23:-4] print("Loading " + AIType + " " + self.curDateStr + " ...") self.clf = joblib.load(self.savePath + '/' + s) self.DataCollector.setCurDateSrt(self.curDateStr) self.Trainer.setCurDateSrt(self.curDateStr) self.DataCollector.loadData() self.Trainer.setValuesCollection( self.DataCollector.getValuesCollection()) self.Trainer.setTargetCollection( self.DataCollector.getTargetCollection()) self.deltaCall = self.DataCollector.valuesCollection.shape[ 0] / 5 print("Loading " + str(self.DataCollector.valuesCollection.shape) + " Finished!") def ReadDataFromFile(self, AIType): print("DataSave And Training " + str(self.dataPath) + " Started ...") #filesNames = [f.name for f in tqdm(scandir(self.dataPath)) if f.is_file()] filesNames = [ f for f in tqdm(os.listdir(self.dataPath)) if os.path.isfile(join(self.dataPath, f)) ] print("bsmDataExists?") ValuesData = [] TargetData = [] for i in tqdm(range(0, len(filesNames))): s = filesNames[i] if s.endswith(".bsm"): bsmJsonString = open(self.dataPath + '/' + s, 'r').read() bsmJsom = json.loads(bsmJsonString) curArray = self.getNodeArray(bsmJsom) ValuesData.append(curArray[0]) TargetData.append(curArray[1]) self.DataCollector.initValuesData(ValuesData) self.DataCollector.initTargetData(TargetData) self.DataCollector.saveData() self.Trainer.setValuesCollection( self.DataCollector.getValuesCollection()) self.Trainer.setTargetCollection( self.DataCollector.getTargetCollection()) self.Trainer.train() self.clf = joblib.load(self.savePath + '/clf_' + AIType + '_' + self.curDateStr + '.pkl') self.deltaCall = self.DataCollector.valuesCollection.shape[0] / 5 print("DataSave And Training " + str(self.dataPath) + " Finished!") def getNodeArray(self, bsmJsom): cur_array = self.getArray(bsmJsom) pseudonym = bsmJsom['BsmPrint']['BSMs'][0]['pseudonym'] time = bsmJsom['BsmPrint']['Metadata']['generationTime'] self.Storage.add_array(pseudonym, time, cur_array) returnArray = self.Storage.get_array(pseudonym, self.arrayLength) #print "cur_array: " + str(cur_array) #print "returnArray: " + str(returnArray) return returnArray def getArray(self, bsmJsom): rP = bsmJsom['BsmPrint']['BsmCheck']['rP'] pP = bsmJsom['BsmPrint']['BsmCheck']['pP'] sP = bsmJsom['BsmPrint']['BsmCheck']['sP'] pC = bsmJsom['BsmPrint']['BsmCheck']['pC'] sC = bsmJsom['BsmPrint']['BsmCheck']['sC'] psC = bsmJsom['BsmPrint']['BsmCheck']['psC'] phC = bsmJsom['BsmPrint']['BsmCheck']['phC'] sA = bsmJsom['BsmPrint']['BsmCheck']['sA'] #sA = 1 bF = bsmJsom['BsmPrint']['BsmCheck']['bF'] inT = 1 for x in bsmJsom['BsmPrint']['BsmCheck']['inT']: if inT > x['uVal']: inT = x['uVal'] time = bsmJsom['BsmPrint']['Metadata']['generationTime'] label = bsmJsom['BsmPrint']['Metadata']['mbType'] #label = 0 if (label == 'Genuine'): numLabel = 0.0 else: numLabel = 1.0 valuesArray = array([rP, pP, sP, pC, sC, psC, phC, sA, bF, inT]) targetArray = array([numLabel]) returnArray = array([valuesArray, targetArray]) #print "returnArray: " + str(returnArray) #returnArray = returnArray.astype(np.float) return returnArray
class MlMain: initiated = False curDateStr = datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S") DataCollector = MlDataCollector() Trainer = MlTrainer() Storage = MlArrayStorage() arrayLength = 20 collectDur = 0 deltaCall = 1000 clf = None savePath = './saveFile/saveFile_Mix_D20' dataPath = './MDBsms_Mix' def init(self, version, AIType): self.savePath = self.savePath + '_' + str(version) self.DataCollector.setCurDateSrt(self.curDateStr) self.DataCollector.setSavePath(self.savePath) self.Trainer.setCurDateSrt(self.curDateStr) self.Trainer.setSavePath(self.savePath) self.Trainer.setAIType(AIType) self.trainedModelExists(AIType) if RTreadDataFromFile: self.ReadDataFromFile(version, AIType) def mlMain(self, version, bsmJsonString, AIType): if not self.initiated: self.init(version, AIType) self.initiated = True bsmJsom = json.loads(bsmJsonString) curArray = self.getNodeArray(bsmJsom) if RTcollectData: if self.collectDur < self.deltaCall: self.collectDur = self.collectDur + 1 self.DataCollector.collectData(curArray) else: print "DataSave And Training " + str( self.deltaCall) + " Started ..." self.collectDur = 0 self.DataCollector.saveData() if RTtrain: self.Trainer.setValuesCollection( self.DataCollector.getValuesCollection()) self.Trainer.setTargetCollection( self.DataCollector.getTargetCollection()) print self.Trainer.valuesCollection.shape self.Trainer.train() self.clf = joblib.load(self.savePath + '/clf_' + AIType + '_' + self.curDateStr + '.pkl') self.deltaCall = self.DataCollector.valuesCollection.shape[ 0] / 5 print "DataSave And Training " + str( self.deltaCall) + " Finished!" if self.clf is None: return False else: if RTpredict: prediction = self.clf.predict(array([curArray[0]])) #print "========================================" if prediction[0] == 0.0: return False else: return True #print prediction #print curArray[1] #print "========================================" return False def trainedModelExists(self, AIType): filesNames = [ f for f in listdir(self.savePath) if isfile(join(self.savePath, f)) ] print "trainedModelExists?" for s in filesNames: if s.startswith('clf_' + AIType) and s.endswith(".pkl"): self.curDateStr = s[-23:-4] print "Loading " + AIType + " " + self.curDateStr + " ..." self.clf = joblib.load(self.savePath + '/' + s) self.DataCollector.setCurDateSrt(self.curDateStr) self.Trainer.setCurDateSrt(self.curDateStr) self.DataCollector.loadData() self.Trainer.setValuesCollection( self.DataCollector.getValuesCollection()) self.Trainer.setTargetCollection( self.DataCollector.getTargetCollection()) #self.deltaCall = self.DataCollector.valuesCollection.shape[0]/5 print "Loading " + str( self.DataCollector.valuesCollection.shape) + " Finished!" def ReadDataFromFile(self, version, AIType): print "DataSave And Training " + str(self.dataPath + '_' + version) + " Started ..." filesNames = [ f for f in tqdm(listdir(self.dataPath + '_' + version)) if isfile(join(self.dataPath + '_' + version, f)) ] print "bsmDataExists?" ValuesData = [] TargetData = [] for i in tqdm(range(0, len(filesNames))): s = filesNames[i] if s.endswith(".bsm"): bsmJsonString = open(self.dataPath + '_' + version + '/' + s, 'r').read() bsmJsom = json.loads(bsmJsonString) curArray = self.getNodeArray(bsmJsom) self.DataCollector.collectData(curArray) self.DataCollector.saveData() self.Trainer.setValuesCollection( self.DataCollector.getValuesCollection()) self.Trainer.setTargetCollection( self.DataCollector.getTargetCollection()) self.Trainer.train() self.clf = joblib.load(self.savePath + '/clf_' + AIType + '_' + self.curDateStr + '.pkl') #self.deltaCall = self.DataCollector.valuesCollection.shape[0]/5 print "DataSave And Training " + str(self.dataPath + '_' + version) + " Finished!" def getNodeArray(self, bsmJsom): cur_array = self.getArray(bsmJsom) pseudonym = bsmJsom['BsmPrint']['BSMs'][0]['pseudonym'] time = bsmJsom['BsmPrint']['Metadata']['generationTime'] self.Storage.add_array(pseudonym, time, cur_array) returnArray = self.Storage.get_array(pseudonym, self.arrayLength) #print "cur_array: " + str(cur_array) #print "returnArray: " + str(returnArray) return returnArray def getArray(self, bsmJsom): rP = bsmJsom['BsmPrint']['BsmCheck']['rP'] pP = bsmJsom['BsmPrint']['BsmCheck']['pP'] sP = bsmJsom['BsmPrint']['BsmCheck']['sP'] pC = bsmJsom['BsmPrint']['BsmCheck']['pC'] sC = bsmJsom['BsmPrint']['BsmCheck']['sC'] psC = bsmJsom['BsmPrint']['BsmCheck']['psC'] phC = bsmJsom['BsmPrint']['BsmCheck']['phC'] sA = bsmJsom['BsmPrint']['BsmCheck']['sA'] #sA = 1 bF = bsmJsom['BsmPrint']['BsmCheck']['bF'] inT = 1 for x in bsmJsom['BsmPrint']['BsmCheck']['inT']: if inT > x['uVal']: inT = x['uVal'] time = bsmJsom['BsmPrint']['Metadata']['generationTime'] label = bsmJsom['BsmPrint']['Metadata']['mbType'] #label = 0 if (label == 'Genuine'): numLabel = 0.0 else: numLabel = 1.0 #valuesArray = array([rP,pP,sP,pC,sC,psC,phC,sA,bF,inT]) valuesArray = array([ 1 - rP, 1 - pP, 1 - sP, 1 - pC, 1 - sC, 1 - psC, 1 - phC, 1 - sA, 1 - bF, 1 - inT, 1 ]) targetArray = array([numLabel]) returnArray = array([valuesArray, targetArray]) #print "returnArray: " + str(returnArray) #returnArray = returnArray.astype(np.float) return returnArray
class MlMain: initiated = False curDateStr = datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S") DataCollector = MlDataCollector() Trainer = MlTrainer() Storage = MlNodeStorage() arrayLength = 40 collectDur = 0 deltaCall = 1000 clf = None savePath = './saveFile/saveFile_D40' dataPath = '/home/sca-team/Projects/F2MD/mdmSave/IRT-BSMs-Reports-V2/MDBsmsList_2018-11-29_18:18:23' def init(self, version, AIType): self.savePath = self.savePath +'_'+ str(version) self.DataCollector.setCurDateSrt(self.curDateStr) self.DataCollector.setSavePath(self.savePath) self.Trainer.setCurDateSrt(self.curDateStr) self.Trainer.setSavePath(self.savePath) self.Trainer.setAIType(AIType) self.trainedModelExists(AIType) if RTreadDataFromFile: self.ReadDataFromFile(AIType) if RTtrainData: self.TrainData(AIType) def mlMain(self): version = "V2" AIType = "MLP_L3N25" if not self.initiated: self.init(version,AIType) self.initiated = True return False def trainedModelExists(self, AIType): #filesNames = [f.name for f in scandir(self.savePath) if isfile(join(self.savePath, f.name))] filesNames = [f for f in tqdm(os.listdir(self.savePath)) if os.path.isfile(join(self.savePath, f))] print ("trainedModelExists?") for s in filesNames: if s.startswith('clf_'+AIType) and s.endswith(".pkl"): self.curDateStr = s[-23:-4] print ("Loading " +AIType + " "+ self.curDateStr+ " ...") self.clf = joblib.load(self.savePath+'/'+s) self.DataCollector.setCurDateSrt(self.curDateStr) self.Trainer.setCurDateSrt(self.curDateStr) self.DataCollector.loadData() self.Trainer.setValuesCollection(self.DataCollector.getValuesCollection()) self.Trainer.setTargetCollection(self.DataCollector.getTargetCollection()) self.deltaCall = self.DataCollector.valuesCollection.shape[0]/5 print ("Loading " + str(self.DataCollector.valuesCollection.shape) + " Finished!") def ReadDataFromFile(self, AIType): print ("DataLoad " + str(self.dataPath) + " Started ...") #filesNames = [f.name for f in tqdm(scandir(self.dataPath)) if f.is_file()] filesNames = [f for f in tqdm(os.listdir(self.dataPath)) if os.path.isfile(join(self.dataPath, f))] print ("bsmDataExists?") ValuesData = [] TargetData = [] for i in tqdm(range(0,len(filesNames))): #for i in tqdm(range(0,3000)): s = filesNames[i] if s.endswith(".bsm"): bsmJsonString = open(self.dataPath+'/' +s, 'r').read() bsmJsom = json.loads(bsmJsonString) curArray = self.getNodeArray(bsmJsom,AIType) self.DataCollector.collectData(curArray) if s.endswith(".lbsm"): bsmJsonString = open(self.dataPath+'/' +s, 'r').read() bsmJsom = json.loads(bsmJsonString) for bsmItem in bsmJsom: curArray = self.getNodeArray(bsmItem,AIType) self.DataCollector.collectData(curArray) self.DataCollector.saveData() print ("DataLoad " + str(self.dataPath) + " Finished!") def TrainData(self, AIType): print ("Training " + str(self.dataPath) + " Started ...") self.Trainer.setValuesCollection(self.DataCollector.getValuesCollection()) self.Trainer.setTargetCollection(self.DataCollector.getTargetCollection()) self.Trainer.train() self.clf = joblib.load(self.savePath+'/clf_'+AIType+'_'+self.curDateStr+'.pkl') self.deltaCall = self.DataCollector.valuesCollection.shape[0]/5 print ("Training " + str(self.dataPath) + " Finished!") def getNodeArray(self,bsmJsom,AIType): receiverId = bsmJsom['BsmPrint']['Metadata']['receiverId'] pseudonym = bsmJsom['BsmPrint']['BSMs'][0]['pseudonym'] time = bsmJsom['BsmPrint']['Metadata']['generationTime'] self.Storage.add_bsm(receiverId,pseudonym, time, bsmJsom) if(AIType == 'SVM'): returnArray = self.Storage.get_array(receiverId,pseudonym, self.arrayLength) if(AIType == 'MLP_L1N15'): returnArray = self.Storage.get_array_MLP_L1N15(receiverId,pseudonym, self.arrayLength) if(AIType == 'MLP_L3N25'): returnArray = self.Storage.get_array_MLP_L3N25(receiverId,pseudonym, self.arrayLength) if(AIType == 'LSTM'): returnArray = self.Storage.get_array_lstm(receiverId,pseudonym, self.arrayLength) #print "cur_array: " + str(cur_array) #print "returnArray: " + str(returnArray) return returnArray
class MlMain: initiated = False curDateStr = datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S") DataCollector = MlDataCollector() Trainer = MlTrainer() Storage = MlNodeStorage() arrayLength = 60 collectDur = 0 deltaCall = 100000 clf = None savePath = './saveFile/saveFile_D60' dataPath = './MDBsmsList_V2_2019-8-27_17:35:33' RTTrainDataFromFile = False meanRuntime = 0 meanRuntime_p = 0 numRuntime = 0 printRuntime = 10000 * 10000 printRuntimeCnt = 0 filterdelta = 0 labels_legacy = [ "Genuine", "LocalAttacker", ] labels_attacks = [ "Genuine", "ConstPos", "ConstPosOffset", "RandomPos", "RandomPosOffset", "ConstSpeed", "ConstSpeedOffset", "RandomSpeed", "RandomSpeedOffset", "EventualStop", "Disruptive", "DataReplay", "StaleMessages", "DoS", "DoSRandom", "DoSDisruptive", "GridSybil", "DataReplaySybil", "DoSRandomSybil", "DoSDisruptiveSybil", ] version_added = False le = MlLabelEncoder() stats = MlStats() varthrelite = MlVarThresholdLite() RTmultipredict = False multi_predict_num = 524288 multi_predict_count = 0 multi_predict_array = [] multi_predict_array_combined = {} multi_predict_label = [] multi_predict_label_combined = {} def init(self, version, AIType): if RTDetectAttackTypes: self.le.fit(self.labels_attacks) else: self.le.fit(self.labels_legacy) if not self.version_added: if RTDetectAttackTypes: self.savePath = self.savePath + '_Attacks_' + str(version) else: self.savePath = self.savePath + '_Legacy_' + str(version) self.version_added = True self.DataCollector.setCurDateSrt(self.curDateStr) self.DataCollector.setSavePath(self.savePath) self.Trainer.setCurDateSrt(self.curDateStr) self.Trainer.setSavePath(self.savePath) self.Trainer.setAIType(AIType) self.trainedModelExists(AIType) if self.RTTrainDataFromFile: if RTaddexistingweights: self.clf = joblib.load(self.savePath + '/clf_' + AIType + '_' + self.curDateStr + '.pkl') self.ReadDataFromFile(AIType) #self.TrainData(AIType) os._exit(0) def mlMain(self, version, bsmJsonString, AIType): if not self.initiated: self.init(version, AIType) self.initiated = True start_time = time.time() bsmJsom = json.loads(bsmJsonString) curArray = self.getNodeArray(bsmJsom, AIType) if RTcollectData: if self.collectDur < self.deltaCall: self.collectDur = self.collectDur + 1 self.DataCollector.collectData(curArray) else: print("DataSave And Training " + str(self.deltaCall) + " Started ...") self.collectDur = 0 self.DataCollector.saveData() if RTtrain: print(len(self.Trainer.dataCollector.ValuesData)) self.Trainer.train(self.DataCollector, self.le) self.clf = joblib.load(self.savePath + '/clf_' + AIType + '_' + self.curDateStr + '.pkl') self.deltaCall = len( self.Trainer.dataCollector.ValuesData) / 2 #self.deltaCall = 10000000 print("DataSave And Training " + str(self.deltaCall) + " Finished!") return_value = "False" #return return_value if self.clf is None: return_value = "False" start_time_p = 0.0 end_time_p = 0.0 else: #self.clf.save(self.savePath + "/model.h5") #self.clf.save_weights(self.savePath + "/model_weights.h5") #os._exit(0) if RTpredict: if ('LSTM' in AIType): self.clf.reset_states() if self.RTmultipredict: start_time_p = 0.0 end_time_p = 0.0 if 'COMBINED' in AIType: cur_shape_0 = curArray[0][0].shape[0] if cur_shape_0 in self.multi_predict_array_combined.keys( ): self.multi_predict_array_combined[ cur_shape_0].append( [curArray[0][0], curArray[0][1]]) self.multi_predict_label_combined[ cur_shape_0].append( self.le.transform([ bsmJsom['BsmPrint']['Metadata'] ['mbType'] ])[0]) else: self.multi_predict_array_combined[cur_shape_0] = [] self.multi_predict_label_combined[cur_shape_0] = [] self.multi_predict_array_combined[ cur_shape_0].append( [curArray[0][0], curArray[0][1]]) self.multi_predict_label_combined[ cur_shape_0].append( self.le.transform([ bsmJsom['BsmPrint']['Metadata'] ['mbType'] ])[0]) else: self.multi_predict_array.append(curArray[0]) self.multi_predict_label.append( self.le.transform([ bsmJsom['BsmPrint']['Metadata']['mbType'] ])[0]) if self.multi_predict_count > self.multi_predict_num: pred_array_list = [] if 'COMBINED' in AIType: for cur_shape_0 in self.multi_predict_array_combined.keys( ): multi_predict_array = self.multi_predict_array_combined[ cur_shape_0] lstm_arrays = np.array( [xi[0] for xi in multi_predict_array]) mlp_arrays = np.array( [xi[1] for xi in multi_predict_array]) #lstm_arrays = np.squeeze(lstm_arrays) #mlp_arrays = np.squeeze(mlp_arrays) pred_array_list.append( self.clf.predict([lstm_arrays, mlp_arrays])) self.multi_predict_label.append( self. multi_predict_label_combined[cur_shape_0]) else: pred_array_list.append( self.clf.predict( array(self.multi_predict_array))) self.multi_predict_label = [ self.multi_predict_label ] for pred_array_index in range(0, len(pred_array_list)): pred_array = pred_array_list[pred_array_index] for index in range(0, len(pred_array)): if 'XGBoost' in AIType or 'SVM' in AIType or 'LogisticRegression' in AIType: prediction = pred_array[index] else: prediction = pred_array[index][ 1 - self.le.transform(['Genuine'])[0]] self.varthrelite.update_stats( prediction, self.multi_predict_label[pred_array_index] [index]) if prediction > Positive_Threshold: #self.stats.update_stats(True,self.multi_predict_label[index]) return_value = "True" else: #self.stats.update_stats(False,self.multi_predict_label[index]) return_value = "False" del self.multi_predict_array[:] del self.multi_predict_label[:] self.multi_predict_array_combined.clear() self.multi_predict_label_combined.clear() self.multi_predict_count = 0 else: self.multi_predict_count = self.multi_predict_count + 1 else: if 'COMBINED' in AIType: array_npy = [ np.array([curArray[0][0]]), np.array([curArray[0][1]]) ] else: array_npy = np.array([curArray[0]]) start_time_p = time.time() pred_array = self.clf.predict(array_npy) end_time_p = time.time() gen_index = self.le.transform(['Genuine'])[0] if 'XGBoost' in AIType or 'SVM' in AIType or 'LogisticRegression' in AIType: prediction = pred_array[0] else: prediction = pred_array[0][1 - gen_index] label_index = self.le.transform( [bsmJsom['BsmPrint']['Metadata']['mbType']])[0] self.varthrelite.update_stats(prediction, label_index) if prediction > Positive_Threshold: self.stats.update_stats(True, label_index) return_value = "True" else: self.stats.update_stats(False, label_index) return_value = "False" #print prediction #print curArray[1] #print "========================================" end_time = time.time() self.meanRuntime = (self.numRuntime * self.meanRuntime + (end_time - start_time)) / (self.numRuntime + 1) self.meanRuntime_p = (self.numRuntime * self.meanRuntime_p + (end_time_p - start_time_p)) / (self.numRuntime + 1) if self.printRuntimeCnt >= self.printRuntime: self.printRuntimeCnt = 0 print('meanRuntime: ' + str(self.meanRuntime) + ' ' + str(self.numRuntime) + ' predict:' + str(self.meanRuntime_p)) self.stats.print_stats() self.varthrelite.print_stats() self.printRuntimeCnt = self.printRuntimeCnt + 1 else: self.printRuntimeCnt = self.printRuntimeCnt + 1 self.numRuntime = self.numRuntime + 1 return return_value def trainedModelExists(self, AIType): filesNames = [ f for f in listdir(self.savePath) if isfile(join(self.savePath, f)) ] print("trainedModelExists?") for s in filesNames: if s.startswith('clf_' + AIType) and s.endswith(".pkl"): print("Loading " + s + " " + AIType + " " + self.curDateStr + " ...") self.clf = joblib.load(self.savePath + '/' + s) if RTcollectData: self.curDateStr = s[-23:-4] self.DataCollector.setCurDateSrt(self.curDateStr) self.Trainer.setCurDateSrt(self.curDateStr) self.DataCollector.loadData() else: self.DataCollector.setCurDateSrt(self.curDateStr) self.Trainer.setCurDateSrt(self.curDateStr) #self.deltaCall = self.DataCollector.valuesCollection.shape[0]/5 print("Loading " + str(len(self.DataCollector.ValuesData)) + " Finished!") def ReadDataFromFile(self, AIType): print("DataSave And Training " + str(self.dataPath) + " Started ...") print("bsmDataExists?") filesNames = [ f for f in tqdm(listdir(self.dataPath)) if isfile(join(self.dataPath, f)) ] numberOfIters = 1 numberOfThreads = 64 multi_processing = True if not RTuseexistingdata: if multi_processing: range_start = 0 range_end = range_start + int( len(filesNames) / (numberOfThreads * numberOfIters)) for it_i in range(0, numberOfIters): print("Iteration " + str(it_i) + " Start ...") input_data_list = [] process_list = [] queue_list = [] for i in range(0, numberOfThreads): local_input_list = [] localfilesNames = filesNames[range_start:range_end] range_start = range_end range_end = range_start + int( len(filesNames) / (numberOfThreads * numberOfIters)) if (i == numberOfThreads - 2) and (it_i == numberOfIters - 1): range_end = len(filesNames) local_input_list = [ localfilesNames, self.dataPath, AIType ] q = Queue() m = Manager() return_dict = m.dict() return_dict[1] = [] p = Process(target=local_process, args=(local_input_list, True, return_dict)) p.start() process_list.append(p) queue_list.append(return_dict) input_data_list.append(local_input_list) #for p in process_list: # p.join() #listDataCollectors=[] print("Getting Results ....") already_parsed = [] while (len(already_parsed) != len(queue_list)): for i_q in range(0, len(queue_list)): if (i_q not in already_parsed) and (len( queue_list[i_q][1]) > 0): already_parsed.append(i_q) tempDataCollector = queue_list[i_q][1][0] print("Getting Results .... " + str(i_q) + " ... " + str(len(tempDataCollector.TargetData))) for i in range( len(tempDataCollector.TargetData) - 1, -1, -1): self.DataCollector.collectData([[ tempDataCollector.ValuesData[0][i], tempDataCollector.ValuesData[1][i] ], tempDataCollector.TargetData[i]]) del tempDataCollector.TargetData[i] del tempDataCollector.ValuesData[0][i] del tempDataCollector.ValuesData[1][i] print("Getting Results Finished!") #self.DataCollector.saveData(it_i) print("Iteration " + str(it_i) + " End!") else: tempDataCollector = local_process( [filesNames, self.dataPath, AIType]) for i in tqdm( range(len(tempDataCollector.TargetData) - 1, -1, -1)): self.DataCollector.collectData([ tempDataCollector.ValuesData[i], tempDataCollector.TargetData[i] ]) del tempDataCollector.TargetData[i] del tempDataCollector.ValuesData[i] self.DataCollector.saveData(0) if RTaddexistingweights: self.Trainer.setSavedModel(self.clf) self.Trainer.train(self.DataCollector, self.le) self.clf = joblib.load(self.savePath + '/clf_' + AIType + '_' + self.curDateStr + '.pkl') #self.deltaCall = self.DataCollector.valuesCollection.shape[0]/5 print("DataSave And Training " + str(self.dataPath) + " Finished!") def extract_time(self, json): try: return float(json['BsmPrint']['Metadata']['generationTime']) except KeyError: return 0 def TrainData(self, AIType): print("Training " + str(self.dataPath) + " Started ...") self.Trainer.train(self.DataCollector, self.le) self.clf = joblib.load(self.savePath + '/clf_' + AIType + '_' + self.curDateStr + '.pkl') self.deltaCall = self.DataCollector.valuesCollection.shape[0] / 5 print("Training " + str(self.dataPath) + " Finished!") def getNodeArray(self, bsmJsom, AIType): receiverId = bsmJsom['BsmPrint']['Metadata']['receiverId'] pseudonym = bsmJsom['BsmPrint']['BSMs'][0]['Pseudonym'] time = bsmJsom['BsmPrint']['Metadata']['generationTime'] if RTDetectAttackTypes: label = bsmJsom['BsmPrint']['Metadata']['attackType'] else: label = bsmJsom['BsmPrint']['Metadata']['mbType'] if label == 'GlobalAttacker': label = 'Genuine' numLabel = np.array(self.le.transform([label])[0], dtype=np.int8) self.Storage.add_bsm(receiverId, pseudonym, time, bsmJsom, self.arrayLength, numLabel) if time - self.filterdelta > RTFilterTime: self.filterdelta = time self.Storage.filter_bsms(time, RTFilterKeepTime) if ('SINGLE' in AIType): returnArray = self.Storage.get_array(receiverId, pseudonym) if ('FEATURES' in AIType): returnArray = self.Storage.get_array_features( receiverId, pseudonym) if ('AVEFEAT' in AIType): returnArray = self.Storage.get_array_MLP_features( receiverId, pseudonym, self.arrayLength) if ('AVERAGE' in AIType): returnArray = self.Storage.get_array_MLP(receiverId, pseudonym, self.arrayLength) if ('RECURRENT' in AIType): returnArray = self.Storage.get_array_lstm(receiverId, pseudonym, self.arrayLength) if ('RECUFEAT' in AIType): returnArray = self.Storage.get_array_lstm_feat( receiverId, pseudonym, self.arrayLength) if ('RECUSIN' in AIType): returnArray = self.Storage.get_array_lstm_sin( receiverId, pseudonym, self.arrayLength) if ('RECUMIX' in AIType): returnArray = self.Storage.get_array_lstm_mix( receiverId, pseudonym, self.arrayLength) if ('RECUALL' in AIType): returnArray = self.Storage.get_array_lstm_all( receiverId, pseudonym, self.arrayLength) if ('COMBINED' in AIType): returnArray = self.Storage.get_array_combined( receiverId, pseudonym, self.arrayLength) #print("cur_array: " + str(cur_array)) #print("returnArray: " + str(returnArray)) return returnArray
class MlMain: initiated = False curDateStr = datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S") DataCollector = MlDataCollector() Trainer = MlTrainer() Storage = MlNodeStorage() arrayLength = 20 collectDur = 0 deltaCall = 1000 clf = None savePath = './saveFile/saveFile_D20' dataPath = './MDBsms_Mix' meanRuntime = 0 numRuntime = 0 printRuntime = 10000 printRuntimeCnt = 0 def init(self, version, AIType): self.savePath = self.savePath + '_' + str(version) self.DataCollector.setCurDateSrt(self.curDateStr) self.DataCollector.setSavePath(self.savePath) self.Trainer.setCurDateSrt(self.curDateStr) self.Trainer.setSavePath(self.savePath) self.Trainer.setAIType(AIType) self.trainedModelExists(AIType) if RTreadDataFromFile: self.ReadDataFromFile(version, AIType) def mlMain(self, version, bsmJsonString, AIType): if not self.initiated: self.init(version, AIType) self.initiated = True start_time = time.time() bsmJsom = json.loads(bsmJsonString) curArray = self.getNodeArray(bsmJsom, AIType) if RTcollectData: if self.collectDur < self.deltaCall: self.collectDur = self.collectDur + 1 self.DataCollector.collectData(curArray) else: print "DataSave And Training " + str( self.deltaCall) + " Started ..." self.collectDur = 0 self.DataCollector.saveData() if RTtrain: self.Trainer.setValuesCollection( self.DataCollector.getValuesCollection()) self.Trainer.setTargetCollection( self.DataCollector.getTargetCollection()) print self.Trainer.valuesCollection.shape self.Trainer.train() self.clf = joblib.load(self.savePath + '/clf_' + AIType + '_' + self.curDateStr + '.pkl') self.deltaCall = self.DataCollector.valuesCollection.shape[ 0] / 5 #self.deltaCall = 10000000 print "DataSave And Training " + str( self.deltaCall) + " Finished!" return_value = False if self.clf is None: return_value = False else: if RTpredict: prediction = self.clf.predict(array([curArray[0]])) #print "======================================== " + str(prediction) + str(prediction[0][0]) + str(prediction[0][1]) if prediction[0][0] > prediction[0][1]: return_value = False else: return_value = True #print prediction #print curArray[1] #print "========================================" end_time = time.time() self.meanRuntime = (self.numRuntime * self.meanRuntime + (end_time - start_time)) / (self.numRuntime + 1) self.numRuntime = self.numRuntime + 1 if self.printRuntimeCnt > self.printRuntime: self.printRuntimeCnt = 0 print 'meanRuntime: ' + str(self.meanRuntime) + ' ' + str( self.numRuntime) else: self.printRuntimeCnt = self.printRuntimeCnt + 1 return return_value def trainedModelExists(self, AIType): filesNames = [ f for f in listdir(self.savePath) if isfile(join(self.savePath, f)) ] print "trainedModelExists?" for s in filesNames: if s.startswith('clf_' + AIType) and s.endswith(".pkl"): self.curDateStr = s[-23:-4] print "Loading " + AIType + " " + self.curDateStr + " ..." self.clf = joblib.load(self.savePath + '/' + s) self.DataCollector.setCurDateSrt(self.curDateStr) self.Trainer.setCurDateSrt(self.curDateStr) self.DataCollector.loadData() self.Trainer.setValuesCollection( self.DataCollector.getValuesCollection()) self.Trainer.setTargetCollection( self.DataCollector.getTargetCollection()) #self.deltaCall = self.DataCollector.valuesCollection.shape[0]/5 print "Loading " + str( self.DataCollector.valuesCollection.shape) + " Finished!" def ReadDataFromFile(self, version, AIType): print "DataSave And Training " + str(self.dataPath + '_' + version) + " Started ..." filesNames = [ f for f in tqdm(listdir(self.dataPath + '_' + version)) if isfile(join(self.dataPath + '_' + version, f)) ] print "bsmDataExists?" ValuesData = [] TargetData = [] for i in tqdm(range(0, len(filesNames))): s = filesNames[i] if s.endswith(".bsm"): bsmJsonString = open(self.dataPath + '_' + version + '/' + s, 'r').read() bsmJsom = json.loads(bsmJsonString) curArray = self.getNodeArray(bsmJsom, AIType) self.DataCollector.collectData(curArray) self.DataCollector.saveData() self.Trainer.setValuesCollection( self.DataCollector.getValuesCollection()) self.Trainer.setTargetCollection( self.DataCollector.getTargetCollection()) self.Trainer.train() self.clf = joblib.load(self.savePath + '/clf_' + AIType + '_' + self.curDateStr + '.pkl') #self.deltaCall = self.DataCollector.valuesCollection.shape[0]/5 print "DataSave And Training " + str(self.dataPath + '_' + version) + " Finished!" def getNodeArray(self, bsmJsom, AIType): receiverId = bsmJsom['BsmPrint']['Metadata']['receiverId'] pseudonym = bsmJsom['BsmPrint']['BSMs'][0]['pseudonym'] time = bsmJsom['BsmPrint']['Metadata']['generationTime'] self.Storage.add_bsm(receiverId, pseudonym, time, bsmJsom) if (AIType == 'SVM'): returnArray = self.Storage.get_array(receiverId, pseudonym, self.arrayLength) if (AIType == 'MLP_L1N15'): returnArray = self.Storage.get_array_MLP_L1N15( receiverId, pseudonym, self.arrayLength) if (AIType == 'MLP_L3N25'): returnArray = self.Storage.get_array_MLP_L3N25( receiverId, pseudonym, self.arrayLength) if (AIType == 'LSTM'): returnArray = self.Storage.get_array_lstm(receiverId, pseudonym, self.arrayLength) #print "cur_array: " + str(cur_array) #print "returnArray: " + str(returnArray) return returnArray