def process(self): report = self.receive_message() raw_report = utils.base64_decode(report["raw"]) soup = bs(raw_report, self.parser) if self.attr_name: table = soup.find_all('table', attrs={self.attr_name: self.attr_value}) self.logger.debug('Found %d table(s) by attribute %r: %r.', (len(table), self.attr_name, self.attr_value)) else: table = soup.find_all('table') self.logger.debug('Found %d table(s).', len(table)) table = table[self.table_index] rows = table.find_all('tr')[self.skip_row:] self.logger.debug('Handling %d row(s).', len(rows)) for feed in rows: event = self.new_event(report) tdata = [data.text for data in feed.find_all('td')] data_added = False for key, data, ignore_value in zip(self.columns, tdata, self.ignore_values): keys = key.split('|') if '|' in key else [ key, ] data = data.strip() if data == ignore_value: continue for key in keys: if isinstance( data, str) and not data: # empty string is never valid break if key in ["__IGNORE__", ""]: break if self.split_column and key == self.split_column: data = data.split(self.split_separator)[int( self.split_index)] data = data.strip() if key in ["time.source", "time.destination"]: try: data = int(data) except ValueError: pass data = DateTime.convert(data, format=self.time_format) elif key.endswith('.url'): if not data: continue if '://' not in data: data = self.default_url_protocol + data if event.add(key, data, raise_failure=False): data_added = True break else: raise ValueError( "Could not add value %r to %s, all invalid." "" % (data, keys)) if not data_added: # we added nothing from this row, so skip it continue if hasattr(self.parameters, 'type')\ and "classification.type" not in event: event.add('classification.type', self.parameters.type) event.add('raw', feed) self.send_message(event) self.acknowledge_message()