예제 #1
0
    def run(self):

        logging.info("Running Source Master Manager ...")

        method_name = "run()"

        # Get configuration
        config = Configure()
        timer = config.get_config()['GeneralParams']['dataSourcesScheduling']
        timeout = config.get_config(
        )['GeneralParams']['dataSourcesNotReadyWaitingTime']

        try:
            # Monitoring interval counter
            c_interval = 1

            # Doing until stop request
            while not self._stopped_event.isSet():

                # init of the monitoring interval
                t_init_interval = datetime.now()

                # end of the monitoring interval
                t_end_interval = t_init_interval + timedelta(seconds=timer)

                # max time for waiting a source
                t_max_interval = t_end_interval + timedelta(seconds=timeout)

                # ts associated to the current monitoring interval
                ts = dateutils.get_timestamp()

                # Start a thread to manage the sources ready for the current monitoring interval
                intervalSourceMonitoringThread = IntervalMonitoringSourceManagerThread(
                    self._sourceManager_instance, t_init_interval,
                    t_end_interval, t_max_interval, ts)
                intervalSourceMonitoringThread.setName("IntervalThread_" +
                                                       str(c_interval))
                intervalSourceMonitoringThread.start()

                # Wait for the end of the interval
                logging.debug("Waiting for the next interval ...")
                sleep(timer)

                # Monitoring interval counter
                c_interval = c_interval + 1

        except Exception as detail:
            logging.error(
                "Error in processing the data sources. Type: %s, msg: %s",
                sys.exc_info()[0], detail)
            exc_type, exc_value, exc_traceback = sys.exc_info()
            traceback.print_exception(exc_type,
                                      exc_value,
                                      exc_traceback,
                                      limit=5,
                                      file=sys.stdout)
            raise DataSourceError(self, detail, method_name)
예제 #2
0
    def manage_data(self, data, client_address):
        """
        Depending on the type of the received package this method selects the corresponding procedure to process it.
            
        Parameters
        ----------
        data: str
            the received serialized data
        client_address: 
            client address
    
        """

        # Reception timestamp
        ts_rec = dateutils.get_timestamp()

        # Client IP
        client_IP = client_address[0]

        # De-serialized the received data
        pack = pickle.loads(data)

        logging.info(
            "Data received from sensor: %s (%s) at %s. Package type: %s",
            pack._header['sid'], client_IP, ts_rec, pack._type)

        # Data packet
        if pack._type == Packet.TYPE_D:
            # Save the packet
            self.save_packet(pack, client_address, ts_rec)

        # Command packet
        elif pack._type == Packet.TYPE_C:
            # TODO fill additional package fields
            # Do diagnosis
            pass
        else:
            # Error
            logging.warn("The received packet is not a valid type. Type: %s",
                         pack._type)

        return pack
예제 #3
0
    def do_dynamic_calibration(self, **kwargs):
        """
        Starting point for the dynamic calibration procedure

        Raises
        ------
        SensorError, MSNMError

        """

        method_name = "do_dynamic_calibration()"

        logging.info("Doing dynamic calibration ...")

        # Time stamp
        ts = dateutils.get_timestamp()

        # Get configuration
        config = Configure()
        model_backup_path = config.get_config()['Sensor']['model']
        model_backup_file = model_backup_path + "model_" + ts + ".json"

        try:
            # Model calibration init
            self._model.calibrate_dynamically(self._data, **kwargs)

            # Get the JSON of the model
            logging.debug("Saving the current model")
            json_model = datautils.model2json(self._model, ts)

            # Save the model
            datautils.save2json(json_model, model_backup_file)

        except ModelError as me:
            logging.error("Error doing dynamic calibration: %s", me.get_msg())
            raise SensorError(self, me.get_msg(), method_name)
        except MSNMError as emsnm:
            logging.error("Error doing dynamic calibration: %s",
                          emsnm.get_msg())
            raise emsnm

        logging.info("End of doing dynamic calibration...")
예제 #4
0
    def do_calibration(self, **kwargs):
        """
        Starting point of the calibration procedure

        Raises
        ------
        SensorError, MSNMError

        """

        method_name = "do_calibration()"

        # Time stamp
        ts = dateutils.get_timestamp()

        # Get configuration
        config = Configure()
        # Get root path for creating data files
        rootDataPath = config.get_config()['GeneralParams']['rootPath']
        model_backup_path = config.get_config()['Sensor']['model']

        model_backup_file = rootDataPath + model_backup_path + "model_" + ts + ".json"

        try:
            # Model calibration init
            self._model.calibrate(self._data, **kwargs)

            # Get the JSON of the model
            json_model = datautils.model2json(self._model, ts)

            # Save the model
            datautils.save2json(json_model, model_backup_file)

        except ModelError as eme:
            raise SensorError(self, eme.msg, method_name)
        except MSNMError as emsnm:
            raise emsnm
예제 #5
0
    def send_response(self, request, msg):
        """
        Sending the response to the client
        
        Parameters
        ----------
        request:
            client request
        msg: str
            response msg
            
        """

        # Sensor ID. The current sensor ID that sends the response
        config = Configure()
        sid = config.get_config()['Sensor']['sid']

        # sent timestamp
        ts = dateutils.get_timestamp()

        # build a response message
        pack_resp = ResponsePacket()
        pack_resp.fill_header({
            'id': self._packet_sent,
            'sid': sid,
            'ts': ts,
            'type': Packet.TYPE_R
        })
        pack_resp.fill_body({'resp': msg})

        # Response packet
        p_serialized = pickle.dumps(pack_resp, 2)
        request.send(p_serialized)

        # increment the number of packets sent
        self._packet_sent = self._packet_sent + 1
예제 #6
0
    def run(self):

        method_name = "run()"

        iptables_log = self.config.get_config()['DataSources'][
            self._iptables_instance._type][
                self._iptables_instance.__class__.__name__]['captures']
        iptables_log_raw_folder = self.rootDataPath + self.config.get_config(
        )['DataSources'][self._iptables_instance._type][
            self._iptables_instance.__class__.__name__]['raw']
        iptables_log_processed_folder = self.rootDataPath + self.config.get_config(
        )['DataSources'][self._iptables_instance._type][
            self._iptables_instance.__class__.__name__]['processed']
        iptables_log_parsed_folder = self.rootDataPath + self.config.get_config(
        )['DataSources'][self._iptables_instance._type][
            self._iptables_instance.__class__.__name__]['parsed']
        iptables_flow_parser_config_file = self.config.get_config(
        )['DataSources'][self._iptables_instance._type][
            self._iptables_instance.__class__.__name__]['parserConfig']
        # Parser configuration file for iptables
        timer = self.config.get_config(
        )['GeneralParams']['dataSourcesScheduling']

        try:

            # Doing until stop request
            while not self._stopped_event.isSet():

                logging.info("Running iptables thread ...")

                logging.debug("Getting lines from file %s during %s seconds.",
                              iptables_log, timer)
                # Get the iptables logs
                log_lines = self._iptables_instance.get_file_to_parse_time(
                    iptables_log, timer)

                # Time stamp
                ts = dateutils.get_timestamp()

                # Path for the backup
                iptables_raw_log_file = iptables_log_raw_folder + "iptables_" + ts + ".log"
                self._iptables_instance.save_file(log_lines,
                                                  iptables_raw_log_file)

                # Parse it in *.csv format
                logging.debug("Parsing file %s", iptables_raw_log_file)
                iptables_log_processed_file = iptables_log_processed_folder + "iptables_" + ts + ".csv"
                self._iptables_instance.parse(iptables_raw_log_file,
                                              iptables_log_processed_file)

                # Copy CSV file to parsed folder to be parsed by the flow parsed
                iptables_log_parsed_file = iptables_log_parsed_folder + "iptables_" + ts + ".csv"
                logging.debug("Copying file %s to %s ",
                              iptables_log_processed_file,
                              iptables_log_parsed_file)
                shutil.copyfile(iptables_log_processed_file,
                                iptables_log_parsed_file)

                # Flow parser
                logging.debug("Running flow parser for %s file config.",
                              iptables_flow_parser_config_file)
                self._iptables_instance.launch_flow_parser(
                    iptables_flow_parser_config_file)

                # Add the *.dat output from parser to the dict of generated files
                self._iptables_instance._files_generated[
                    ts] = iptables_log_parsed_folder + "output-iptables_" + ts + ".dat"

                # Remove CSV file once it is parsed successfully
                logging.debug("Deleting file %s", iptables_log_parsed_file)
                os.remove(iptables_log_parsed_file)

        except DataSourceError as edse:
            logging.error("Error processing iptables source: %s",
                          edse.get_msg())
            raise edse
        except IOError as ioe:
            logging.error("Error processing iptables source: %s",
                          ioe.get_msg())
            raise DataSourceError(self, sys.exc_info()[0], method_name)
예제 #7
0
 def run(self):
     # Get configuration
     config = Configure()
     
     # Load local data sources running in static Mode
     local_dict = {}        
     try:
         src_local = config.get_config()['DataSources']['local']
             
         logging.debug("Loading %s local sources %s.",len(src_local),src_local.keys())
             
         for i in src_local.keys():
             
             if src_local[i]['staticMode']:
                 
                 logging.debug("Local source %s is running in static mode.",i)                        
                 local_dict[i] = src_local[i]
                 
     except KeyError as ke:
         logging.warning("There are no local sources configured: %s", ke)
     
     
     #Test observations per source
     obsBySource = {}
     
     # Get external files
     for i in local_dict.keys():
         if i == 'Netflow':
             # Get all parsed *.dat files from a specific folder
             staticFiles = local_dict[i]['staticParsedFilesPath']
             
             # Get the name of the files ordered
             filesOrdered = np.sort(os.listdir(staticFiles))
             
             logging.debug("Got %s files from source %s ",len(filesOrdered),i)
             
             # Remove auxiliar files weights.dat and stats.log
             filesOrdered = filesOrdered[np.logical_not('stats.log' == filesOrdered)]
             filesOrdered = filesOrdered[np.logical_not('weights.dat' == filesOrdered)]
             
             logging.debug("Removed unuseless files from source %s. Total files to process: %s ",i,len(filesOrdered))
             
             # Generate a dataframe containing all *.dat (all observations)
             # Date range as index
             dstart = filesOrdered[0][7:][0:-4]# get initial timestamp from the first file name, e.g., output-20160209t1249.dat            
             dend = filesOrdered[-1][7:][0:-4]# get ending timestamp from the first file name, e.g., output-20160209t1249.dat
             d_range = pd.date_range(dstart,dend,freq='1min')
             dfAllObs = pd.DataFrame(filesOrdered,d_range,columns=['obs'])
             
             logging.debug("Got all obs from %s to %s",dstart,dend)
             
             # Get the test date range
             date_range_start = local_dict[i]['staticObsRangeStart']
             date_range_end = local_dict[i]['staticObsRangeEnd']
             obsBySource[i] = dfAllObs[date_range_start:date_range_end]
                         
             logging.debug("%s observations filtered from %s to %s",len(obsBySource[i]),date_range_start,date_range_end)
             
         else:
             logging.debug("TODO: managing %s local sources",i)
             
     
     # dataSourcesScheduling
     schedulingTimer = config.get_config()['GeneralParams']['dataSourcesScheduling']
     nfcapdTimeFormat = config.get_config()['GeneralParams']['dateFormatNfcapdFiles']
     netflow_parsed_folder = config.get_config()['DataSources']['local']['Netflow']['parsed']
     netflow_dat_manual_folder = config.get_config()['DataSources']['local']['Netflow']['staticParsedFilesPath']
     netflow_captures = config.get_config()['DataSources']['local']['Netflow']['captures']
 
     
     # flag to finish the simulation
     end = False
     
     # Observations counter
     obsCounter = 0
     
     # list of observations
     obsList = list(obsBySource['Netflow']['obs'])
     
     while not end and not self._stopped_event.isSet():
         
         try:
             # Process the observation
             obsToProcess = obsList[obsCounter]
             
             logging.debug("Observation to process: %s",obsToProcess)
             
             # ts from obs file
             tsFile = obsToProcess[7:][0:-4]
             
             # ts from the current host
             ts = dateutils.get_timestamp()
             
             # TODO ts for nfcap file 
             tsdatetime = parser.parse(ts)
             tsdatetimeFile = parser.parse(tsFile)
             tsFormatted = tsdatetime.strftime(nfcapdTimeFormat)
             tsFormattedFile = tsdatetimeFile.strftime(nfcapdTimeFormat)
             
             logging.debug("Creating nfcapd.current.%s", tsFormatted)               
                         
             # Generate nfcapd synthetic current filefile with equal timestamp as the observation
             nfcapdCurrent = netflow_captures + os.sep + "nfcapd.current." + tsFormatted            
             with open(nfcapdCurrent,'w') as f:
                 f.write("Dummy nfcapd for static mode current ")
             
             # Move current nfcapd file to emulate on_moved event in netflow source
             #In static mode the emulated nfcapd files has the name like  nfcapd.201701302000_2016001182312, where last ts after '_' is the ts of the statid *.dat
             # file                 
             nfcapdDummy = netflow_captures + os.sep + "nfcapd." + tsFormatted  + "_" + tsFormattedFile
             logging.debug("Renaming %s to %s",nfcapdCurrent,nfcapdDummy)
             shutil.move(nfcapdCurrent, nfcapdDummy)
             
             # Copy *.dat file to 'parsed' folder
             netflow_dat_manual_file = netflow_dat_manual_folder + obsToProcess
             netflow_parsed_file = netflow_parsed_folder + "output-netflow_" + tsFormattedFile + ".dat"
             logging.debug("Copying netflow manually generated file %s to %s ",netflow_dat_manual_file, netflow_parsed_file)
             shutil.copyfile(netflow_dat_manual_file, netflow_parsed_file)                
         
             logging.debug("Waiting for %s s ...",schedulingTimer)
             sleep(schedulingTimer)
             obsCounter = obsCounter + 1
             
             
         except KeyboardInterrupt:
             logging.info("KeyboardInterrupt received. Exiting ...")
             end = True
         except Exception:
             logging.warning("Probably we have reached the end of the observations. ERROR: %s", sys.exc_info()[1])
             end = True
예제 #8
0
    def launch_monitoring(self, ts):
        """
        Once the parsing (flow parser) procedure is done, this method is in charge of to start the monitoring
        process

        Raises
        ------
        MSNMError

        """

        method_name = "launch_monitoring()"

        # Configuration
        config = Configure()
        # Get root path for creating data files
        rootDataPath = config.get_config()['GeneralParams']['rootPath']

        obs_generated_path = rootDataPath + config.get_config()['Sensor'][
            'observation']  # path to save the complete observation joining all data sources
        batch_obs = config.get_config()['Sensor']['dynamiCalibration'][
            'B']  # number of observation in a batch for EWMA calibration
        lambda_param = config.get_config()['Sensor']['dynamiCalibration'][
            'lambda']  # fogetting parameter for EWMA calibration
        dyn_cal_enabled = config.get_config()['Sensor']['dynamiCalibration'][
            'enabled']  # is the dynamic calibration activated?
        output_generated_path = rootDataPath + config.get_config()['Sensor'][
            'output']  # path to save the Q and T statistics obtained from the previous observation
        missingDataMethods = config.get_config()['Sensor']['missingData'][
            'missingDataMethods']  # Missing data available methods
        missingDataSelectedMethod = config.get_config()['Sensor'][
            'missingData']['selected']  # Get the selected missing data method
        missingDataModule = config.get_config()['Sensor']['missingData'][
            'missingDataModule']  # Missing data available methods
        valuesFormat = config.get_config()['GeneralParams'][
            'valuesFormat']  # how the variables of the complete observation are saved

        logging.debug("Launch monitoring for %s ", ts)

        try:
            logging.debug("Building the observation at %s for %s sources.", ts,
                          self._sources.keys())
            # Build the observation for monitoring
            test = []
            for i in self._sources.keys():
                # Get the number of variables of source i
                i_variables = self.get_number_source_variables(
                    self._sources[i], i)
                logging.debug("Source %s has %s variables.", i, i_variables)
                # Get the source output parsed file for the current
                i_parsed_file = self._sources[i]._files_generated[ts]
                logging.debug("File generated of source %s at %s: %s", i, ts,
                              i_parsed_file)

                if i_parsed_file:
                    # Load the file
                    if self._sources[i]._type == Source.TYPE_L:

                        # static mode?
                        # TODO: next version
                        #staticMode = config.get_config()['DataSources'][self._sources[i]._type][i]['staticMode'];
                        staticMode = False

                        if not staticMode:  # online or dynamic mode
                            i_test = np.loadtxt(i_parsed_file,
                                                comments="#",
                                                delimiter=",")
                        else:  # offline or static mode
                            # TODO it is just a patch to remove in_npackets_verylow e in_nbytes_verylow like in matlab experiment and just for Netflow!!!
                            # look for a more smart way to do this e.g., by configuration params
                            i_test = np.loadtxt(i_parsed_file,
                                                comments="#",
                                                delimiter=",",
                                                usecols=range(
                                                    1, i_variables + 1 + 2))

                            logging.debug(
                                "Offline mode for source %s. Observation size of %s",
                                i, i_test.shape)

                            mask = np.ones(i_test.shape, dtype=bool)
                            # in_npackets_verylow index in matlab is 119 --> 119 in numpy
                            # in_nbytes_verylow index in matlab is 129 --> 129 in numpy
                            mask[118] = False
                            mask[128] = False
                            i_test = i_test[mask]

                            logging.debug(
                                "Offline mode for source %s. Observation size of %s after removing unuseless variables.",
                                i, i_test.shape)

                    elif self._sources[i]._type == Source.TYPE_R:
                        i_test = np.loadtxt(i_parsed_file,
                                            comments="#",
                                            delimiter=",")
                    else:
                        logging.warn(
                            "Source %s does not has a valid type. Type: %s", i,
                            self._sources[i]._type)
                else:
                    # Missing values are replaced with NaN values
                    i_test = np.empty(i_variables)
                    i_test[:] = np.nan

                # Test observation
                test = np.concatenate((test, i_test), axis=0)

            # 1xM array
            test = test.reshape((1, test.size))

            # Dynamic invocation of the selected data imputation method if needed
            if np.isnan(test).any():
                missingDataMethod = getattr(
                    importlib.import_module(missingDataModule),
                    missingDataMethods[missingDataSelectedMethod])
                logging.debug(
                    "Invoking %s method for data imputation for observation at %s",
                    missingDataMethod.func_name, ts)
                # Calling the corresponding method
                test = missingDataMethod(obs=test, model=self._sensor._model)

            obs_generate_file = obs_generated_path + "obs_" + ts + ".dat"
            np.savetxt(obs_generate_file,
                       test,
                       fmt=valuesFormat,
                       delimiter=",",
                       header=str(datautils.getAllVarNames()),
                       comments="#")

            logging.debug("Observation generated of %s variables at %s.",
                          test.size, ts)

            # if the dynamic calibration enabled?
            if dyn_cal_enabled:

                # Increments the number of observation
                self._current_batch_obs = self._current_batch_obs + 1

                logging.debug("obs %s added to the batch as number %s.", ts,
                              self._current_batch_obs)

                # Add the observation
                self._batch[ts] = {}
                self._batch[ts]['file'] = obs_generate_file
                self._batch[ts]['data'] = test

                # Once we reached the number of batch observations, we can do the dynamic calibration
                if self._current_batch_obs == batch_obs:
                    # data for calibration
                    x = np.array([])
                    x = x.reshape((0, test.size))

                    # Build the [NxM] data for the calibration
                    #print(self._batch.keys())
                    for i in self._batch.keys():
                        logging.debug("batch at %s -> %s", i,
                                      self._batch[i]['data'].shape)
                        x = np.vstack((x, self._batch[i]['data']))

                    #print(x)
                    #print(type(x))

                    # Build the model
                    self._sensor.set_data(x)
                    self._sensor.do_dynamic_calibration(phase=2,
                                                        lv=3,
                                                        lamda=lambda_param)

                    # Reset the counter
                    self._current_batch_obs = 0

                    # Removing all batch observations
                    self._batch.clear()

            # Do monitoring
            Qst, Dst = self._sensor.do_monitoring(test)

        except SensorError as ese:
            raise MSNMError(self, ese.get_msg(), method_name)
        except MSNMError as emsnme:
            raise emsnme

        logging.debug("MONITORING --> UCLd: %s | Dst: %s",
                      self._sensor.get_model().get_mspc().getUCLD(),
                      self._sensor.get_mspc().getDst())
        logging.debug("MONITORING --> UCLq: %s | Qst: %s",
                      self._sensor.get_model().get_mspc().getUCLQ(),
                      self._sensor.get_mspc().getQst())

        # Save the generated statistics
        output_generated_file = output_generated_path + "output_" + ts + ".dat"
        header = "UCLq:" + str(
            self._sensor.get_model().get_mspc().getUCLQ()) + ", UCLd:" + str(
                self._sensor.get_model().get_mspc().getUCLD())
        list_array = [
            self._sensor.get_mspc().getQst(),
            self._sensor.get_mspc().getDst()
        ]
        statistics = np.array(list_array)
        statistics = statistics.reshape((1, statistics.size))
        np.savetxt(output_generated_file,
                   statistics,
                   fmt=valuesFormat,
                   delimiter=",",
                   header=header,
                   comments="#")

        # Gets the remote sensor addressed to send the packet
        remote_addresses = config.get_config()['Sensor']['remote_addresses']

        # Send packets is there are someone for sending it!
        if remote_addresses:

            # Send the data packet to the corresponding sensor.
            dataPacket = DataPacket()
            # Packet sent counter increments
            self._packet_sent = self._packet_sent + 1
            dataPacket.fill_header({
                'id': self._packet_sent,
                'sid': config.get_config()['Sensor']['sid'],
                'ts': dateutils.get_timestamp(),
                'type': Packet.TYPE_D
            })
            dataPacket.fill_body({
                'Q': self._sensor.get_mspc().getQst(),
                'D': self._sensor.get_mspc().getDst()
            })

            logging.debug("Remote sources to send the packet #%s: %s",
                          self._packet_sent, remote_addresses)

            for i in remote_addresses.keys():
                ip = remote_addresses[i]['ip']
                port = remote_addresses[i]['port']
                tcpClient = TCPClient()
                tcpClient.set_server_address((ip, port))
                tcpClient.set_packet_to_send(dataPacket)
                TCPClientThread(tcpClient).start()

        return test, Qst, Dst