class MatchTable(object): """ """ def __init__(self, cols): self._rows = [] self.logger = logging.Logger.manager.getLogger(self.__class__.__name__) #self.prio_rows = {} self.cols = cols self._passes = 0 # perf counter to see how many tests we do on a given table self.perf = Perf(self.logger) def passes(self): return self._passes() def len(self): return len(self._rows) def add_row(self, row): self._add_match_row( MatchRow(row, cols=self.cols, ordinal=len(self._rows))) def _add_match_row(self, match_row, prio=0, colname=None): # storig these as tuples, in the initial load they are 0 because we don't ko wthe self._rows.append(match_row) #, prio, colname)) # gets a dict of key/value pairs for this row from the lookup def get_row(self, pos): return self._rows[pos].get_values() def match_table(self, record): """ search for all items im cols find the matching rows from dict params: cols: list of column names record: a splunk event to match to the table returns a single dict that was the best match for the input passed if nothing matches we return None """ # find the matching rows if self.logger.level == logging.DEBUG: self.logger.debug("match table {}".format(self.cols)) self.logger.debug("match table cols:{} record:{}".format( self.cols, record)) tbl = self try: for col in self.cols: tbl = tbl._match(col, record[col]) if tbl.len() == 0: # nothing left to do ... we didn't match self.logger.info("match table has no rows to return") return None except KeyError: # key doesn't exist in message is a failed match self.logger.warn( "Key(s) '{}' doesn't exist in input message {}".format( str(self.cols), record)) return None tbl.drop_low_prio_rows() self.logger.info("match_table Passes: {}".format(tbl._passes)) return tbl.get_first_row().get_values() def match_table_optimised(self, record): """ search for all items im cols find the matching rows from dict params: cols: list of column names record: a splunk event to match to the table returns a single dict that was the best match for the input passed if nothing matches we return None """ # find the matching rows info = self.logger.info self.logger.debug("match table {}".format(self.cols)) info("match table cols:{} record:{}".format(self.cols, record)) tbl = self try: for col in self.cols: tbl = tbl._match(col, record[col]) if tbl.len() == 0: # nothing left to do ... we didn't match info("match table has no rows to return") return None tbl.drop_low_prio_rows() if tbl.len() == 1: # nothing left to do ... we didn't match info("match table match 1 row found") break except KeyError: # key doesn't exist in message is a failed match self.logger.warn( "Key(s) '{}' doesn't exist in input message {}".format( str(self.cols), record)) return None #tbl.drop_low_prio_rows() self.logger.info("match_table_optimised Passes: {}".format( tbl._passes)) return tbl.get_first_row().get_values() def get_first_row(self): # if more than 1 remain take the one that was inserted first if self.len() > 1: self._rows.sort(key=lambda t: t.idx) elif self.len() == 0: self.logger.warn("match table is empty return empty dictionary") return {} return self._rows[0] def drop_low_prio_rows(self): # prune the rows down based on prioirty rules # for each column, in order, drop any rows that are not equal to the highest priority self.perf.start("drop_low_prio_rows") before = self.len() for col in self.cols: if self.len() == 1: # we are done when there is just 1 left self.logger.info("matched table 1 row remains") self.perf.end("drop_low_prio_rows", "before {}/ after 1".format(before)) return self.prune_rows(col) self.logger.info("before/after dropping low prio rows {}/{}".format( before, self.len())) self.perf.end("drop_low_prio_rows", "before: {}/ after: {}".format(before, self.len())) def prune_rows(self, colname): """ By this stage the table should contain just the rows that matched the input data Go through and discard and rows that have low match priority we do this by scoring rows using this - 10^(priority^2) * (length-1) * is always 1 """ rows = [] for row in self._rows: cell = row.get_value(colname) rows.append((row, pow(10, pow(cell.prio, 2)) * (cell.length - 1))) self.logger.info("prune rows, colname={}, row_count={}".format( colname, len(rows))) rows.sort(key=lambda t: t[1], reverse=True) # now max prio row is at the head of this list max = rows[0][1] self._rows = [] while len(rows) > 0 and rows[0][1] == max: row = rows.pop(0)[0] self.logger.info("prune is adding row {}".format(row.get_values())) self._add_match_row(row) def _match(self, colname, value): if 1 == 2: return self._match_fast(colname, value) else: return self._match_slow(colname, value) def _match_fast(self, colname, value): """ find all rows where the column matches and return a new table """ new_table = MatchTable(self.cols) self.logger.debug("Matching {} to {}".format(colname, value)) # ll stores matched rows based on priority (0 .. 3) and only takes the list with the highest priority # should help to reduce the number of iterations as wildcard matches are dropped earlier # I want to learn lists in python n0w ... ll = [[], [], [], []] for row in self._rows: matched = row.match_row(colname, value) if matched[0]: ll[matched[1]].append(row) #new_table._add_match_row(row) for p in reversed(ll): if len(p) > 0: new_table._rows = p break new_table._passes = self._passes + len(self._rows) return new_table def _match_slow(self, colname, value): """ find all rows where the column matches and return a new table """ new_table = MatchTable(self.cols) self.logger.debug("Matching {} to {}".format(colname, value)) for row in self._rows: matched = row.match_row(colname, value) if matched[0]: new_table._add_match_row(row) new_table._passes = self._passes + len(self._rows) return new_table