Esempio n. 1
0
    def run(self):

        logging.info("Running Source Master Manager ...")

        method_name = "run()"

        # Get configuration
        config = Configure()
        timer = config.get_config()['GeneralParams']['dataSourcesScheduling']
        timeout = config.get_config(
        )['GeneralParams']['dataSourcesNotReadyWaitingTime']

        try:
            # Monitoring interval counter
            c_interval = 1

            # Doing until stop request
            while not self._stopped_event.isSet():

                # init of the monitoring interval
                t_init_interval = datetime.now()

                # end of the monitoring interval
                t_end_interval = t_init_interval + timedelta(seconds=timer)

                # max time for waiting a source
                t_max_interval = t_end_interval + timedelta(seconds=timeout)

                # ts associated to the current monitoring interval
                ts = dateutils.get_timestamp()

                # Start a thread to manage the sources ready for the current monitoring interval
                intervalSourceMonitoringThread = IntervalMonitoringSourceManagerThread(
                    self._sourceManager_instance, t_init_interval,
                    t_end_interval, t_max_interval, ts)
                intervalSourceMonitoringThread.setName("IntervalThread_" +
                                                       str(c_interval))
                intervalSourceMonitoringThread.start()

                # Wait for the end of the interval
                logging.debug("Waiting for the next interval ...")
                sleep(timer)

                # Monitoring interval counter
                c_interval = c_interval + 1

        except Exception as detail:
            logging.error(
                "Error in processing the data sources. Type: %s, msg: %s",
                sys.exc_info()[0], detail)
            exc_type, exc_value, exc_traceback = sys.exc_info()
            traceback.print_exception(exc_type,
                                      exc_value,
                                      exc_traceback,
                                      limit=5,
                                      file=sys.stdout)
            raise DataSourceError(self, detail, method_name)
Esempio n. 2
0
    def run_nfdump(self, nfcapd_file_path, output_file_path):
        """
        From a new netflow file, it is in charge of to call nfdump to transform the byte based
        netflow files to a *.csv file as input of the flow parser.

        Raises
        ------
        MSNMError

        """

        method_name = "run_nfdump()"

        logging.info("Running nfdump ...")

        # Call to nfdump process.
        retcode = call("nfdump -r " + str(nfcapd_file_path) +
                       " -q -o csv >> " + output_file_path,
                       shell=True)

        if retcode != 0:
            raise DataSourceError(self, "Error calling nfdump", method_name)

        logging.debug("New nfdump csv generated: %s", output_file_path)

        try:
            # add new id column to merge in parser
            # TODO: build a more elaborated method to do this e.g., from a dateframe utils package
            df = pd.read_csv(output_file_path, header=None, index_col=0)
            df.loc[:, df.shape[1]] = range(100000, 100000 + df.shape[0])
            df.to_csv(output_file_path, encoding='utf-8', header=False)

        except ValueError:
            # FIXME: Sometimes nfcapd generates an empty file :( I do not why :(
            logging.warn("Nfdump file is empty, skipping ... ERROR: %s ",
                         sys.exc_info()[0])
            raise DataSourceError(self, "Nfdump file is empty ....",
                                  method_name)

        except Exception:
            raise DataSourceError(self, sys.exc_info()[0], method_name)
Esempio n. 3
0
    def save_file(self, log_lines, path_to_save):

        method_name = "save_file()"

        try:
            logging.debug("Saving file lines in %s", path_to_save)
            # Save the piece of file that is going to be parsed
            with open(path_to_save, 'w') as fw:
                fw.writelines(log_lines)

        except Exception:
            raise DataSourceError(self, sys.exc_info()[0], method_name)
Esempio n. 4
0
    def get_file_to_parse_time(self, file_to_parse, timer):
        """
        Getting last lines of specific file during a certain of the iptables logs to be parsed.
        Although this method is initially conceived to work with iptables logs it could be
        used with similar files.

        Parameters
        ----------
        file_to_parse: str
            Path to the whole iptables log file
        backups_path: str
            Path to save the 'n' lines extracted
        nlastlines: int
            The 'n' last lines to be extracted

        Return
        ------
        d:
            A string accordingly formated.

        Example
        -------
        >>> formatted_string = format_tcp_flags_as_netflow(['ACK','PSH','SYN'])
        >>> print(formatted_string)
        >>> # it should returns '.AP.S.' as nfdump formats of the TCP flags


        """

        method_name = "get_file_to_parse_time()"

        try:

            # Log lines gathered from the file
            log_lines = []

            # Read only the n last lines of the file
            with open(file_to_parse, 'r') as f:
                # Goes to the end of the file
                f.seek(0, 2)
                # Computes the ending time in seconds
                t_end = time.time() + timer
                while time.time() < t_end:
                    line = f.readline()
                    if line:
                        log_lines.append(line)

        except Exception:
            raise DataSourceError(self, sys.exc_info()[0], method_name)

        return log_lines
Esempio n. 5
0
    def get_synchronized_file_from_netflow_dump(self, df, netflow_dumps_path):

        method_name = "get_synchronized_file_from_netflow_dump()"

        try:
            with open(netflow_dumps_path, 'r') as f:
                init_ts = f.readline().split(',')[
                    0]  # Get the ts of the first line in nfdump *.csv file
                # Get the ts of the last line in nfdump *.csv file
                d = deque(f, 1)  # Get the las line
                end_ts = list(d)[0].split(',')[0]  # Get the ts

        except Exception:
            raise DataSourceError(self, sys.exc_info()[0], method_name)
        # Get the dataframe rows according to the ts_master_source
        return df[str(init_ts):str(end_ts)]
Esempio n. 6
0
    def get_file_to_parse(self, file_to_parse, backups_path, nlastlines):
        """
        Obtaining and saving the 'n' last lines of the iptables logs to be parsed.
        Although this method is initially conceived to work with iptables logs it could be
        used with similar log files where

        Parameters
        ----------
        file_to_parse: str
            Path to the whole iptables log file
        backups_path: str
            Path to save the 'n' lines extracted
        nlastlines: int
            The 'n' last lines to be extracted

        Return
        ------
        d:
            A string accordingly formated.

        Example
        -------
        >>> formatted_string = format_tcp_flags_as_netflow(['ACK','PSH','SYN'])
        >>> print(formatted_string)
        >>> # it should returns '.AP.S.' as nfdump formats of the TCP flags


        """

        method_name = "get_file_to_parse()"

        try:

            # Read only the n last lines of the file
            with open(file_to_parse, 'r') as f:
                d = deque(f, nlastlines)
                #print d

            # Save the piece of file that is going to be parsed
            with open(backups_path, 'w') as fw:
                fw.writelines(list(d))

        except Exception:
            raise DataSourceError(self, sys.exc_info()[0], method_name)

        return d
Esempio n. 7
0
    def launch_flow_parser(self, flow_parser_config):
        """
        Launch the parsing procedure (flow parser)

        Raises
        ------
        MSNMError

        """

        method_name = "launch_flow_parser()"

        #TODO: modify parser source to raise ParseError or some like that as in the MSNM sensor.

        try:
            logging.debug("Parsing from %s configuration.", flow_parser_config)
            fcparser.main(call='internal', configfile=flow_parser_config)
        except Exception:
            logging.error("Error parsing data: %s", sys.exc_info()[2])
            traceback.print_exc()
            raise DataSourceError(self, sys.exc_info()[1], method_name)
Esempio n. 8
0
    def run_nfcapd(self):
        """
        Runs the nfcapd daemon. This is in charge of to capture the traffic flow and generates the
        correspondiente nfcapd* files every certain scheduling time. This time is set up on demand.

        Raises
        ------
        DataSourceError

        """

        method_name = "run_nfcapd()"

        nfcapd_captures_folder = self.config.get_config()['DataSources'][
            self._type][self.__class__.__name__][
                'captures']  # Folder where the nfcapd will be generated
        timer = self.config.get_config()['GeneralParams'][
            'dataSourcesScheduling']  # data source captures scheduling

        # TODO: compatibility for windows machines
        # If nfcapd is already started we kill it!
        retcode = os.system("killall -9 nfcapd")
        if retcode == 0:
            logging.debug("nfcapd was succesfully killed ..")

        logging.debug(
            "Running nfcapd application. Captures will be generated in %s every %s seconds.",
            nfcapd_captures_folder, timer)

        # Call to nfcapd process.
        retcode = call("nfcapd -w -D -l " + str(nfcapd_captures_folder) +
                       " -p 2055 -T +8 -t " + str(timer),
                       shell=True)

        if retcode != 0:
            raise DataSourceError(self, "Error calling nfcapd ...",
                                  method_name)
Esempio n. 9
0
    def parse(self, file_to_parse, file_parsed, **kwargs):
        '''
        CSV parsing of a iptables log file portion as the input param ``file_to_parse``. The parsed CSV files is saved as the ``file_parsed`` param.
        The log is parsed following the nfdump format tool.

        Parameters
        ----------
        file_to_parse: str
            Path to iptables log file portion to parse
        file_parsed: str
            Path to the CSV output parsed file

        Raises
        ------
        DataSourceError

        '''

        method_name = "parse()"

        dateFormat = self.config.get_config()['GeneralParams']['dateFormat']
        # super(IPTables, self).parse(file_to_parse)

        TS_IP = re.compile(
            r"(\w+\s+\d+\s\d+:\d+:\d+).+SRC=([\d.]+)\s+DST=([\d.]+)"
        )  # search for ts and IPs
        PORTS = re.compile("SPT=(.*?(?=\s))\s+DPT=(\d+)")  # search for ports
        PROTO = re.compile(
            "PROTO=(.*?(?=\s))"
        )  # search PROTO= string followed by any character and any number or repetitions until the first white space
        MAC = re.compile(
            "MAC=(.*?(?=\s))"
        )  # search MAC= string followed by any character and any number or repetitions until the first white space

        try:

            # contains all parsed csv lines
            parsed_lines = []

            # csv parsed file columns
            columns = 'DATE,SRC,DST,SMAC,DMAC,TMAC,SPT,DPT,PROTO,TCP_FLAGS,EVENT'

            # Note: with is in charge of to open and close the file
            with open(file_to_parse) as my_file:

                #df = pd.DataFrame(columns=columns, dtype=int)

                # Registered firewall events
                event_id = 1

                for line in my_file.xreadlines():

                    try:

                        # Search for IPs
                        ts_ip_port_match = TS_IP.search(line)

                        # Get timestamp
                        date = ts_ip_port_match.group(1)

                        # Format the date of the log
                        # date = datetime.strptime(date, '%b %d %H:%M:%S')
                        date = parser.parse(
                            date
                        )  # It manages all date format. Solves the issue found when using 'Aug  3 18:20:19'
                        date = date.replace(year=datetime.now().year)
                        date = date.strftime(dateFormat)

                        # Time stamp and Ips and ports
                        # Get the fields from *.log according to the groups in the regexp
                        src_addr = ts_ip_port_match.group(2)
                        dst_addr = ts_ip_port_match.group(3)

                        # Search for ports
                        ports_match = PORTS.search(line)
                        src_port = ports_match.group(1)
                        dst_port = ports_match.group(2)

                        # Search for MAC
                        mac_match = MAC.search(line)

                        # Check if MAC field exists
                        if mac_match:
                            # Get MAC
                            mac = mac_match.group(1)
                            dst_mac, src_mac, type_mac = self.get_params_from_mac(
                                mac)
                        else:
                            dst_mac = ""
                            src_mac = ""
                            type_mac = ""

                        # Search for the protocol
                        protocol_match = PROTO.search(line)
                        protocol = protocol_match.group(1)

                        if protocol == self.TCP:
                            # Call super method format_tcp_flags()
                            tcp_flags = self.format_tcp_flags(
                                self.get_tcp_flags(line),
                                format_type='netflow')
                        else:
                            tcp_flags = ""

                    except AttributeError:
                        logging.warn(
                            "Attribute error: the iptables event cannot be parsed. Skipping line: %s",
                            line)
                        continue

                    # Add new row to the dataframe
                    #df.loc[len(df)] = [date, src_addr, dst_addr, src_mac, dst_mac, type_mac, src_port, dst_port, protocol, tcp_flags, str(event_id)]

                    # Add new row to the list
                    parsed_lines.append(date + "," + src_addr + "," +
                                        dst_addr + "," + src_mac + "," +
                                        dst_mac + "," + type_mac + "," +
                                        src_port + "," + dst_port + "," +
                                        protocol + "," + tcp_flags + "," +
                                        str(event_id))
                    # Registered firewal events (one per line)
                    event_id = event_id + 1

            #df2 = df.set_index('DATE')

            # If there are optional parameters
            #TODO: activate if needed --> NOTE, this block works with DATAFRAMES!


#             if len(kwargs):
#                 # Get the corresponding lines of iptables.log from the first to the end timestamp in netflo file
#                 if 'netflow_dumps_path' in kwargs:
#                     # Get the synchronized lines
#                     df2 = self.get_synchronized_file_from_netflow_dump(df2, kwargs['netflow_dumps_path'])
#                 else:
#                     # Get the corresponding lines of iptables.log from a custom ts and sampling_rate minutes before.
#                     if 'ts_master_source' in kwargs:
#                         ts_master_source = kwargs['ts_master_source']
#                     else:
#                         # Current date by default
#                         ts_master_source = datetime.now().strftime(dateFormat)
#
#                     # Check the optional parameter sampling rate to get a temporal slice of lines of the log file
#                     if 'sampling_rate' in kwargs:
#                         sampling_rate = kwargs['sampling_rate']
#                     else:
#                         # 1 minute by default
#                         sampling_rate = 1
#                     # Get the synchronized lines
#                     df2 = self.get_synchronized_file(df2, ts_master_source, sampling_rate)

# Save in csv as input of the PARSER
#df2.to_csv(file_parsed, encoding='utf-8', header=False)

# Save in csv as input of the PARSER
            with open(file_parsed, 'w') as fl:
                fl.write(columns + "\n")
                fl.write("\n".join(parsed_lines))

        except DataSourceError as dse:
            raise dse
        except Exception:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            traceback.print_exception(exc_type,
                                      exc_value,
                                      exc_traceback,
                                      limit=5,
                                      file=sys.stdout)
            raise DataSourceError(self, sys.exc_info()[0], method_name)
Esempio n. 10
0
    def run(self):

        method_name = "run()"

        iptables_log = self.config.get_config()['DataSources'][
            self._iptables_instance._type][
                self._iptables_instance.__class__.__name__]['captures']
        iptables_log_raw_folder = self.rootDataPath + self.config.get_config(
        )['DataSources'][self._iptables_instance._type][
            self._iptables_instance.__class__.__name__]['raw']
        iptables_log_processed_folder = self.rootDataPath + self.config.get_config(
        )['DataSources'][self._iptables_instance._type][
            self._iptables_instance.__class__.__name__]['processed']
        iptables_log_parsed_folder = self.rootDataPath + self.config.get_config(
        )['DataSources'][self._iptables_instance._type][
            self._iptables_instance.__class__.__name__]['parsed']
        iptables_flow_parser_config_file = self.config.get_config(
        )['DataSources'][self._iptables_instance._type][
            self._iptables_instance.__class__.__name__]['parserConfig']
        # Parser configuration file for iptables
        timer = self.config.get_config(
        )['GeneralParams']['dataSourcesScheduling']

        try:

            # Doing until stop request
            while not self._stopped_event.isSet():

                logging.info("Running iptables thread ...")

                logging.debug("Getting lines from file %s during %s seconds.",
                              iptables_log, timer)
                # Get the iptables logs
                log_lines = self._iptables_instance.get_file_to_parse_time(
                    iptables_log, timer)

                # Time stamp
                ts = dateutils.get_timestamp()

                # Path for the backup
                iptables_raw_log_file = iptables_log_raw_folder + "iptables_" + ts + ".log"
                self._iptables_instance.save_file(log_lines,
                                                  iptables_raw_log_file)

                # Parse it in *.csv format
                logging.debug("Parsing file %s", iptables_raw_log_file)
                iptables_log_processed_file = iptables_log_processed_folder + "iptables_" + ts + ".csv"
                self._iptables_instance.parse(iptables_raw_log_file,
                                              iptables_log_processed_file)

                # Copy CSV file to parsed folder to be parsed by the flow parsed
                iptables_log_parsed_file = iptables_log_parsed_folder + "iptables_" + ts + ".csv"
                logging.debug("Copying file %s to %s ",
                              iptables_log_processed_file,
                              iptables_log_parsed_file)
                shutil.copyfile(iptables_log_processed_file,
                                iptables_log_parsed_file)

                # Flow parser
                logging.debug("Running flow parser for %s file config.",
                              iptables_flow_parser_config_file)
                self._iptables_instance.launch_flow_parser(
                    iptables_flow_parser_config_file)

                # Add the *.dat output from parser to the dict of generated files
                self._iptables_instance._files_generated[
                    ts] = iptables_log_parsed_folder + "output-iptables_" + ts + ".dat"

                # Remove CSV file once it is parsed successfully
                logging.debug("Deleting file %s", iptables_log_parsed_file)
                os.remove(iptables_log_parsed_file)

        except DataSourceError as edse:
            logging.error("Error processing iptables source: %s",
                          edse.get_msg())
            raise edse
        except IOError as ioe:
            logging.error("Error processing iptables source: %s",
                          ioe.get_msg())
            raise DataSourceError(self, sys.exc_info()[0], method_name)
Esempio n. 11
0
    def run(self):

        logging.info(
            "Monitoring sources from %s to %s with maximum time until %s",
            self._t_init, self._t_end, self._t_max)

        method_name = "run()"

        # Get configuration
        config = Configure()
        # root path for the data
        rootDataPath = config.get_config()['GeneralParams']['rootPath']

        timer = config.get_config()['GeneralParams']['dataSourcesPolling']
        diagnosis_backup_path = rootDataPath + config.get_config()['Sensor'][
            'diagnosis']  # path to save diagnosis vector output
        valuesFormat = config.get_config()['GeneralParams'][
            'valuesFormat']  # how the variables of the complete observation are saved

        try:

            # Set all data sources as non-ready
            for i in self._sourceManager_instance._sources.keys():
                self._sources_ready[i] = False

            logging.debug("Checking sources at %s time interval.", self._ts)

            # End thread
            finish = False

            while not finish:

                logging.debug("Data sources not ready at interval %s: %s",
                              self._ts,
                              self.get_not_ready().keys())

                # Current time
                tc = datetime.now()

                # for each source
                for i in self._sourceManager_instance._sources.keys():

                    # If the max time to wait is not reached and not all sources are ready
                    if tc <= self._t_max and not self.are_ready(self._ts):
                        # Source i is ready?
                        if self.is_ready(i, self._ts):
                            # Source 'i' is ready
                            self._sources_ready[i] = True

                    else:
                        # Get not ready sources for that ts
                        src_not_ready = self.get_not_ready()

                        # Create an empty dummy *.dat file for the missing sources
                        logging.debug("Data sources not ready: %s",
                                      src_not_ready.keys())

                        for i in src_not_ready.keys():
                            if src_not_ready[i]._type == Source.TYPE_R:
                                parsed_file_path = rootDataPath + config.get_config(
                                )['DataSources'][Source.TYPE_R][i]['parsed']
                            else:
                                parsed_file_path = rootDataPath + config.get_config(
                                )['DataSources'][Source.TYPE_L][i]['parsed']

                            dummy_file = parsed_file_path + "dummy_" + self._ts + ".dat"

                            # Creates a dummy empty file
                            with open(dummy_file, 'w') as fw:
                                fw.write(
                                    "Empty dummy file indicating that there was no data available for that source at "
                                    + self._ts)

                            # For this ts the source is not ready
                            self._sourceManager_instance._sources[
                                i]._files_generated[self._ts] = None

                            #logging.debug("Dummy file created : %s", dummy_file)
                            logging.debug(
                                "Files generated for source %s at %s: %s", i,
                                self._ts, self._sourceManager_instance.
                                _sources[i]._files_generated)

                        # if the sensor has no remote sensor to send the statistics means that it is the root in the sensor hierarchy,
                        # so launch_monitoring is not necessary
                        remote_addresses = config.get_config(
                        )['Sensor']['remote_addresses']

                        # Do monitoring
                        test, Qst, Dst = self._sourceManager_instance.launch_monitoring(
                            self._ts)

                        # Set up which observations are compared
                        dummy = np.zeros((1, test.shape[0]))
                        # We evaluate the observation 1
                        dummy[0, 0] = 1

                        # Do diagnosis
                        diagnosis_vec = self._sourceManager_instance._sensor.do_diagnosis(
                            test, dummy)

                        # Save the diagnosis
                        diagnosis_backup_file = diagnosis_backup_path + "diagnosis_" + self._ts + ".dat"
                        #datautils.save2json(diagnosis_vec.tolist(), diagnosis_backup_file)
                        np.savetxt(diagnosis_backup_file,
                                   diagnosis_vec,
                                   fmt=valuesFormat,
                                   delimiter=",",
                                   header=str(datautils.getAllVarNames()),
                                   comments="#")

                        if not remote_addresses:
                            logging.warning(
                                "There are no remote addresses configured. This sensor should be the root in the sensor hierarchy."
                            )

                        # Finish the thread after launch the monitoring procedure
                        finish = True

                        # Exit to start the new monitoring interval
                        break

                # wait for polling time: waiting time for checking if the sources are ready
                sleep(timer)  # TODO: to customize on demand

        except Exception as detail:
            logging.error(
                "Error in processing the data sources. Type: %s, msg: %s",
                sys.exc_info()[0], detail)
            exc_type, exc_value, exc_traceback = sys.exc_info()
            traceback.print_exception(exc_type,
                                      exc_value,
                                      exc_traceback,
                                      limit=5,
                                      file=sys.stdout)
            raise DataSourceError(self, detail, method_name)