コード例 #1
0
    def process(self):
        report = self.receive_message()
        raw_report = utils.base64_decode(report["raw"])

        soup = bs(raw_report, self.parser)
        if self.attr_name:
            table = soup.find_all('table',
                                  attrs={self.attr_name: self.attr_value})
            self.logger.debug('Found %d table(s) by attribute %r: %r.',
                              (len(table), self.attr_name, self.attr_value))
        else:
            table = soup.find_all('table')
            self.logger.debug('Found %d table(s).', len(table))
        table = table[self.table_index]

        rows = table.find_all('tr')[self.skip_row:]
        self.logger.debug('Handling %d row(s).', len(rows))

        for feed in rows:

            event = self.new_event(report)
            tdata = [data.text for data in feed.find_all('td')]

            data_added = False
            for key, data, ignore_value in zip(self.columns, tdata,
                                               self.ignore_values):
                keys = key.split('|') if '|' in key else [
                    key,
                ]
                data = data.strip()
                if data == ignore_value:
                    continue
                for key in keys:
                    if isinstance(
                            data,
                            str) and not data:  # empty string is never valid
                        break

                    if key in ["__IGNORE__", ""]:
                        break

                    if self.split_column and key == self.split_column:
                        data = data.split(self.split_separator)[int(
                            self.split_index)]
                        data = data.strip()

                    if key in ["time.source", "time.destination"]:
                        try:
                            data = int(data)
                        except ValueError:
                            pass
                        data = DateTime.convert(data, format=self.time_format)

                    elif key.endswith('.url'):
                        if not data:
                            continue
                        if '://' not in data:
                            data = self.default_url_protocol + data

                    if event.add(key, data, raise_failure=False):
                        data_added = True
                        break
                else:
                    raise ValueError(
                        "Could not add value %r to %s, all invalid."
                        "" % (data, keys))

            if not data_added:
                # we added nothing from this row, so skip it
                continue
            if hasattr(self.parameters, 'type')\
                    and "classification.type" not in event:
                event.add('classification.type', self.parameters.type)
            event.add('raw', feed)
            self.send_message(event)

        self.acknowledge_message()