def run(self): logging.info("Running Source Master Manager ...") method_name = "run()" # Get configuration config = Configure() timer = config.get_config()['GeneralParams']['dataSourcesScheduling'] timeout = config.get_config( )['GeneralParams']['dataSourcesNotReadyWaitingTime'] try: # Monitoring interval counter c_interval = 1 # Doing until stop request while not self._stopped_event.isSet(): # init of the monitoring interval t_init_interval = datetime.now() # end of the monitoring interval t_end_interval = t_init_interval + timedelta(seconds=timer) # max time for waiting a source t_max_interval = t_end_interval + timedelta(seconds=timeout) # ts associated to the current monitoring interval ts = dateutils.get_timestamp() # Start a thread to manage the sources ready for the current monitoring interval intervalSourceMonitoringThread = IntervalMonitoringSourceManagerThread( self._sourceManager_instance, t_init_interval, t_end_interval, t_max_interval, ts) intervalSourceMonitoringThread.setName("IntervalThread_" + str(c_interval)) intervalSourceMonitoringThread.start() # Wait for the end of the interval logging.debug("Waiting for the next interval ...") sleep(timer) # Monitoring interval counter c_interval = c_interval + 1 except Exception as detail: logging.error( "Error in processing the data sources. Type: %s, msg: %s", sys.exc_info()[0], detail) exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback, limit=5, file=sys.stdout) raise DataSourceError(self, detail, method_name)
def manage_data(self, data, client_address): """ Depending on the type of the received package this method selects the corresponding procedure to process it. Parameters ---------- data: str the received serialized data client_address: client address """ # Reception timestamp ts_rec = dateutils.get_timestamp() # Client IP client_IP = client_address[0] # De-serialized the received data pack = pickle.loads(data) logging.info( "Data received from sensor: %s (%s) at %s. Package type: %s", pack._header['sid'], client_IP, ts_rec, pack._type) # Data packet if pack._type == Packet.TYPE_D: # Save the packet self.save_packet(pack, client_address, ts_rec) # Command packet elif pack._type == Packet.TYPE_C: # TODO fill additional package fields # Do diagnosis pass else: # Error logging.warn("The received packet is not a valid type. Type: %s", pack._type) return pack
def do_dynamic_calibration(self, **kwargs): """ Starting point for the dynamic calibration procedure Raises ------ SensorError, MSNMError """ method_name = "do_dynamic_calibration()" logging.info("Doing dynamic calibration ...") # Time stamp ts = dateutils.get_timestamp() # Get configuration config = Configure() model_backup_path = config.get_config()['Sensor']['model'] model_backup_file = model_backup_path + "model_" + ts + ".json" try: # Model calibration init self._model.calibrate_dynamically(self._data, **kwargs) # Get the JSON of the model logging.debug("Saving the current model") json_model = datautils.model2json(self._model, ts) # Save the model datautils.save2json(json_model, model_backup_file) except ModelError as me: logging.error("Error doing dynamic calibration: %s", me.get_msg()) raise SensorError(self, me.get_msg(), method_name) except MSNMError as emsnm: logging.error("Error doing dynamic calibration: %s", emsnm.get_msg()) raise emsnm logging.info("End of doing dynamic calibration...")
def do_calibration(self, **kwargs): """ Starting point of the calibration procedure Raises ------ SensorError, MSNMError """ method_name = "do_calibration()" # Time stamp ts = dateutils.get_timestamp() # Get configuration config = Configure() # Get root path for creating data files rootDataPath = config.get_config()['GeneralParams']['rootPath'] model_backup_path = config.get_config()['Sensor']['model'] model_backup_file = rootDataPath + model_backup_path + "model_" + ts + ".json" try: # Model calibration init self._model.calibrate(self._data, **kwargs) # Get the JSON of the model json_model = datautils.model2json(self._model, ts) # Save the model datautils.save2json(json_model, model_backup_file) except ModelError as eme: raise SensorError(self, eme.msg, method_name) except MSNMError as emsnm: raise emsnm
def send_response(self, request, msg): """ Sending the response to the client Parameters ---------- request: client request msg: str response msg """ # Sensor ID. The current sensor ID that sends the response config = Configure() sid = config.get_config()['Sensor']['sid'] # sent timestamp ts = dateutils.get_timestamp() # build a response message pack_resp = ResponsePacket() pack_resp.fill_header({ 'id': self._packet_sent, 'sid': sid, 'ts': ts, 'type': Packet.TYPE_R }) pack_resp.fill_body({'resp': msg}) # Response packet p_serialized = pickle.dumps(pack_resp, 2) request.send(p_serialized) # increment the number of packets sent self._packet_sent = self._packet_sent + 1
def run(self): method_name = "run()" iptables_log = self.config.get_config()['DataSources'][ self._iptables_instance._type][ self._iptables_instance.__class__.__name__]['captures'] iptables_log_raw_folder = self.rootDataPath + self.config.get_config( )['DataSources'][self._iptables_instance._type][ self._iptables_instance.__class__.__name__]['raw'] iptables_log_processed_folder = self.rootDataPath + self.config.get_config( )['DataSources'][self._iptables_instance._type][ self._iptables_instance.__class__.__name__]['processed'] iptables_log_parsed_folder = self.rootDataPath + self.config.get_config( )['DataSources'][self._iptables_instance._type][ self._iptables_instance.__class__.__name__]['parsed'] iptables_flow_parser_config_file = self.config.get_config( )['DataSources'][self._iptables_instance._type][ self._iptables_instance.__class__.__name__]['parserConfig'] # Parser configuration file for iptables timer = self.config.get_config( )['GeneralParams']['dataSourcesScheduling'] try: # Doing until stop request while not self._stopped_event.isSet(): logging.info("Running iptables thread ...") logging.debug("Getting lines from file %s during %s seconds.", iptables_log, timer) # Get the iptables logs log_lines = self._iptables_instance.get_file_to_parse_time( iptables_log, timer) # Time stamp ts = dateutils.get_timestamp() # Path for the backup iptables_raw_log_file = iptables_log_raw_folder + "iptables_" + ts + ".log" self._iptables_instance.save_file(log_lines, iptables_raw_log_file) # Parse it in *.csv format logging.debug("Parsing file %s", iptables_raw_log_file) iptables_log_processed_file = iptables_log_processed_folder + "iptables_" + ts + ".csv" self._iptables_instance.parse(iptables_raw_log_file, iptables_log_processed_file) # Copy CSV file to parsed folder to be parsed by the flow parsed iptables_log_parsed_file = iptables_log_parsed_folder + "iptables_" + ts + ".csv" logging.debug("Copying file %s to %s ", iptables_log_processed_file, iptables_log_parsed_file) shutil.copyfile(iptables_log_processed_file, iptables_log_parsed_file) # Flow parser logging.debug("Running flow parser for %s file config.", iptables_flow_parser_config_file) self._iptables_instance.launch_flow_parser( iptables_flow_parser_config_file) # Add the *.dat output from parser to the dict of generated files self._iptables_instance._files_generated[ ts] = iptables_log_parsed_folder + "output-iptables_" + ts + ".dat" # Remove CSV file once it is parsed successfully logging.debug("Deleting file %s", iptables_log_parsed_file) os.remove(iptables_log_parsed_file) except DataSourceError as edse: logging.error("Error processing iptables source: %s", edse.get_msg()) raise edse except IOError as ioe: logging.error("Error processing iptables source: %s", ioe.get_msg()) raise DataSourceError(self, sys.exc_info()[0], method_name)
def run(self): # Get configuration config = Configure() # Load local data sources running in static Mode local_dict = {} try: src_local = config.get_config()['DataSources']['local'] logging.debug("Loading %s local sources %s.",len(src_local),src_local.keys()) for i in src_local.keys(): if src_local[i]['staticMode']: logging.debug("Local source %s is running in static mode.",i) local_dict[i] = src_local[i] except KeyError as ke: logging.warning("There are no local sources configured: %s", ke) #Test observations per source obsBySource = {} # Get external files for i in local_dict.keys(): if i == 'Netflow': # Get all parsed *.dat files from a specific folder staticFiles = local_dict[i]['staticParsedFilesPath'] # Get the name of the files ordered filesOrdered = np.sort(os.listdir(staticFiles)) logging.debug("Got %s files from source %s ",len(filesOrdered),i) # Remove auxiliar files weights.dat and stats.log filesOrdered = filesOrdered[np.logical_not('stats.log' == filesOrdered)] filesOrdered = filesOrdered[np.logical_not('weights.dat' == filesOrdered)] logging.debug("Removed unuseless files from source %s. Total files to process: %s ",i,len(filesOrdered)) # Generate a dataframe containing all *.dat (all observations) # Date range as index dstart = filesOrdered[0][7:][0:-4]# get initial timestamp from the first file name, e.g., output-20160209t1249.dat dend = filesOrdered[-1][7:][0:-4]# get ending timestamp from the first file name, e.g., output-20160209t1249.dat d_range = pd.date_range(dstart,dend,freq='1min') dfAllObs = pd.DataFrame(filesOrdered,d_range,columns=['obs']) logging.debug("Got all obs from %s to %s",dstart,dend) # Get the test date range date_range_start = local_dict[i]['staticObsRangeStart'] date_range_end = local_dict[i]['staticObsRangeEnd'] obsBySource[i] = dfAllObs[date_range_start:date_range_end] logging.debug("%s observations filtered from %s to %s",len(obsBySource[i]),date_range_start,date_range_end) else: logging.debug("TODO: managing %s local sources",i) # dataSourcesScheduling schedulingTimer = config.get_config()['GeneralParams']['dataSourcesScheduling'] nfcapdTimeFormat = config.get_config()['GeneralParams']['dateFormatNfcapdFiles'] netflow_parsed_folder = config.get_config()['DataSources']['local']['Netflow']['parsed'] netflow_dat_manual_folder = config.get_config()['DataSources']['local']['Netflow']['staticParsedFilesPath'] netflow_captures = config.get_config()['DataSources']['local']['Netflow']['captures'] # flag to finish the simulation end = False # Observations counter obsCounter = 0 # list of observations obsList = list(obsBySource['Netflow']['obs']) while not end and not self._stopped_event.isSet(): try: # Process the observation obsToProcess = obsList[obsCounter] logging.debug("Observation to process: %s",obsToProcess) # ts from obs file tsFile = obsToProcess[7:][0:-4] # ts from the current host ts = dateutils.get_timestamp() # TODO ts for nfcap file tsdatetime = parser.parse(ts) tsdatetimeFile = parser.parse(tsFile) tsFormatted = tsdatetime.strftime(nfcapdTimeFormat) tsFormattedFile = tsdatetimeFile.strftime(nfcapdTimeFormat) logging.debug("Creating nfcapd.current.%s", tsFormatted) # Generate nfcapd synthetic current filefile with equal timestamp as the observation nfcapdCurrent = netflow_captures + os.sep + "nfcapd.current." + tsFormatted with open(nfcapdCurrent,'w') as f: f.write("Dummy nfcapd for static mode current ") # Move current nfcapd file to emulate on_moved event in netflow source #In static mode the emulated nfcapd files has the name like nfcapd.201701302000_2016001182312, where last ts after '_' is the ts of the statid *.dat # file nfcapdDummy = netflow_captures + os.sep + "nfcapd." + tsFormatted + "_" + tsFormattedFile logging.debug("Renaming %s to %s",nfcapdCurrent,nfcapdDummy) shutil.move(nfcapdCurrent, nfcapdDummy) # Copy *.dat file to 'parsed' folder netflow_dat_manual_file = netflow_dat_manual_folder + obsToProcess netflow_parsed_file = netflow_parsed_folder + "output-netflow_" + tsFormattedFile + ".dat" logging.debug("Copying netflow manually generated file %s to %s ",netflow_dat_manual_file, netflow_parsed_file) shutil.copyfile(netflow_dat_manual_file, netflow_parsed_file) logging.debug("Waiting for %s s ...",schedulingTimer) sleep(schedulingTimer) obsCounter = obsCounter + 1 except KeyboardInterrupt: logging.info("KeyboardInterrupt received. Exiting ...") end = True except Exception: logging.warning("Probably we have reached the end of the observations. ERROR: %s", sys.exc_info()[1]) end = True
def launch_monitoring(self, ts): """ Once the parsing (flow parser) procedure is done, this method is in charge of to start the monitoring process Raises ------ MSNMError """ method_name = "launch_monitoring()" # Configuration config = Configure() # Get root path for creating data files rootDataPath = config.get_config()['GeneralParams']['rootPath'] obs_generated_path = rootDataPath + config.get_config()['Sensor'][ 'observation'] # path to save the complete observation joining all data sources batch_obs = config.get_config()['Sensor']['dynamiCalibration'][ 'B'] # number of observation in a batch for EWMA calibration lambda_param = config.get_config()['Sensor']['dynamiCalibration'][ 'lambda'] # fogetting parameter for EWMA calibration dyn_cal_enabled = config.get_config()['Sensor']['dynamiCalibration'][ 'enabled'] # is the dynamic calibration activated? output_generated_path = rootDataPath + config.get_config()['Sensor'][ 'output'] # path to save the Q and T statistics obtained from the previous observation missingDataMethods = config.get_config()['Sensor']['missingData'][ 'missingDataMethods'] # Missing data available methods missingDataSelectedMethod = config.get_config()['Sensor'][ 'missingData']['selected'] # Get the selected missing data method missingDataModule = config.get_config()['Sensor']['missingData'][ 'missingDataModule'] # Missing data available methods valuesFormat = config.get_config()['GeneralParams'][ 'valuesFormat'] # how the variables of the complete observation are saved logging.debug("Launch monitoring for %s ", ts) try: logging.debug("Building the observation at %s for %s sources.", ts, self._sources.keys()) # Build the observation for monitoring test = [] for i in self._sources.keys(): # Get the number of variables of source i i_variables = self.get_number_source_variables( self._sources[i], i) logging.debug("Source %s has %s variables.", i, i_variables) # Get the source output parsed file for the current i_parsed_file = self._sources[i]._files_generated[ts] logging.debug("File generated of source %s at %s: %s", i, ts, i_parsed_file) if i_parsed_file: # Load the file if self._sources[i]._type == Source.TYPE_L: # static mode? # TODO: next version #staticMode = config.get_config()['DataSources'][self._sources[i]._type][i]['staticMode']; staticMode = False if not staticMode: # online or dynamic mode i_test = np.loadtxt(i_parsed_file, comments="#", delimiter=",") else: # offline or static mode # TODO it is just a patch to remove in_npackets_verylow e in_nbytes_verylow like in matlab experiment and just for Netflow!!! # look for a more smart way to do this e.g., by configuration params i_test = np.loadtxt(i_parsed_file, comments="#", delimiter=",", usecols=range( 1, i_variables + 1 + 2)) logging.debug( "Offline mode for source %s. Observation size of %s", i, i_test.shape) mask = np.ones(i_test.shape, dtype=bool) # in_npackets_verylow index in matlab is 119 --> 119 in numpy # in_nbytes_verylow index in matlab is 129 --> 129 in numpy mask[118] = False mask[128] = False i_test = i_test[mask] logging.debug( "Offline mode for source %s. Observation size of %s after removing unuseless variables.", i, i_test.shape) elif self._sources[i]._type == Source.TYPE_R: i_test = np.loadtxt(i_parsed_file, comments="#", delimiter=",") else: logging.warn( "Source %s does not has a valid type. Type: %s", i, self._sources[i]._type) else: # Missing values are replaced with NaN values i_test = np.empty(i_variables) i_test[:] = np.nan # Test observation test = np.concatenate((test, i_test), axis=0) # 1xM array test = test.reshape((1, test.size)) # Dynamic invocation of the selected data imputation method if needed if np.isnan(test).any(): missingDataMethod = getattr( importlib.import_module(missingDataModule), missingDataMethods[missingDataSelectedMethod]) logging.debug( "Invoking %s method for data imputation for observation at %s", missingDataMethod.func_name, ts) # Calling the corresponding method test = missingDataMethod(obs=test, model=self._sensor._model) obs_generate_file = obs_generated_path + "obs_" + ts + ".dat" np.savetxt(obs_generate_file, test, fmt=valuesFormat, delimiter=",", header=str(datautils.getAllVarNames()), comments="#") logging.debug("Observation generated of %s variables at %s.", test.size, ts) # if the dynamic calibration enabled? if dyn_cal_enabled: # Increments the number of observation self._current_batch_obs = self._current_batch_obs + 1 logging.debug("obs %s added to the batch as number %s.", ts, self._current_batch_obs) # Add the observation self._batch[ts] = {} self._batch[ts]['file'] = obs_generate_file self._batch[ts]['data'] = test # Once we reached the number of batch observations, we can do the dynamic calibration if self._current_batch_obs == batch_obs: # data for calibration x = np.array([]) x = x.reshape((0, test.size)) # Build the [NxM] data for the calibration #print(self._batch.keys()) for i in self._batch.keys(): logging.debug("batch at %s -> %s", i, self._batch[i]['data'].shape) x = np.vstack((x, self._batch[i]['data'])) #print(x) #print(type(x)) # Build the model self._sensor.set_data(x) self._sensor.do_dynamic_calibration(phase=2, lv=3, lamda=lambda_param) # Reset the counter self._current_batch_obs = 0 # Removing all batch observations self._batch.clear() # Do monitoring Qst, Dst = self._sensor.do_monitoring(test) except SensorError as ese: raise MSNMError(self, ese.get_msg(), method_name) except MSNMError as emsnme: raise emsnme logging.debug("MONITORING --> UCLd: %s | Dst: %s", self._sensor.get_model().get_mspc().getUCLD(), self._sensor.get_mspc().getDst()) logging.debug("MONITORING --> UCLq: %s | Qst: %s", self._sensor.get_model().get_mspc().getUCLQ(), self._sensor.get_mspc().getQst()) # Save the generated statistics output_generated_file = output_generated_path + "output_" + ts + ".dat" header = "UCLq:" + str( self._sensor.get_model().get_mspc().getUCLQ()) + ", UCLd:" + str( self._sensor.get_model().get_mspc().getUCLD()) list_array = [ self._sensor.get_mspc().getQst(), self._sensor.get_mspc().getDst() ] statistics = np.array(list_array) statistics = statistics.reshape((1, statistics.size)) np.savetxt(output_generated_file, statistics, fmt=valuesFormat, delimiter=",", header=header, comments="#") # Gets the remote sensor addressed to send the packet remote_addresses = config.get_config()['Sensor']['remote_addresses'] # Send packets is there are someone for sending it! if remote_addresses: # Send the data packet to the corresponding sensor. dataPacket = DataPacket() # Packet sent counter increments self._packet_sent = self._packet_sent + 1 dataPacket.fill_header({ 'id': self._packet_sent, 'sid': config.get_config()['Sensor']['sid'], 'ts': dateutils.get_timestamp(), 'type': Packet.TYPE_D }) dataPacket.fill_body({ 'Q': self._sensor.get_mspc().getQst(), 'D': self._sensor.get_mspc().getDst() }) logging.debug("Remote sources to send the packet #%s: %s", self._packet_sent, remote_addresses) for i in remote_addresses.keys(): ip = remote_addresses[i]['ip'] port = remote_addresses[i]['port'] tcpClient = TCPClient() tcpClient.set_server_address((ip, port)) tcpClient.set_packet_to_send(dataPacket) TCPClientThread(tcpClient).start() return test, Qst, Dst