Esempio n. 1
0
class MatchTable(object):
    """
    """
    def __init__(self, cols):
        self._rows = []
        self.logger = logging.Logger.manager.getLogger(self.__class__.__name__)
        #self.prio_rows = {}
        self.cols = cols

        self._passes = 0  # perf counter to see how many tests we do on a given table
        self.perf = Perf(self.logger)

    def passes(self):
        return self._passes()

    def len(self):
        return len(self._rows)

    def add_row(self, row):
        self._add_match_row(
            MatchRow(row, cols=self.cols, ordinal=len(self._rows)))

    def _add_match_row(self, match_row, prio=0, colname=None):
        # storig these as tuples, in the initial load they are 0 because we don't ko wthe
        self._rows.append(match_row)  #, prio, colname))

    # gets a dict of key/value pairs for this row from the lookup
    def get_row(self, pos):
        return self._rows[pos].get_values()

    def match_table(self, record):
        """
        search for all items im cols find the matching rows from dict
        params:
        cols: list of column names
        record: a splunk event to match to the table

        returns a single dict that was the best match for the input passed
        if nothing matches we return None
        """
        # find the matching rows
        if self.logger.level == logging.DEBUG:
            self.logger.debug("match table {}".format(self.cols))
            self.logger.debug("match table cols:{} record:{}".format(
                self.cols, record))

        tbl = self
        try:
            for col in self.cols:
                tbl = tbl._match(col, record[col])
                if tbl.len() == 0:
                    # nothing left to do ... we didn't match
                    self.logger.info("match table has no rows to return")
                    return None
        except KeyError:
            # key doesn't exist in message is a failed match
            self.logger.warn(
                "Key(s) '{}' doesn't exist in input message {}".format(
                    str(self.cols), record))
            return None

        tbl.drop_low_prio_rows()
        self.logger.info("match_table Passes: {}".format(tbl._passes))

        return tbl.get_first_row().get_values()

    def match_table_optimised(self, record):
        """
        search for all items im cols find the matching rows from dict
        params:
        cols: list of column names
        record: a splunk event to match to the table

        returns a single dict that was the best match for the input passed
        if nothing matches we return None
        """
        # find the matching rows
        info = self.logger.info
        self.logger.debug("match table {}".format(self.cols))
        info("match table cols:{} record:{}".format(self.cols, record))
        tbl = self
        try:
            for col in self.cols:
                tbl = tbl._match(col, record[col])
                if tbl.len() == 0:
                    # nothing left to do ... we didn't match
                    info("match table has no rows to return")
                    return None
                tbl.drop_low_prio_rows()
                if tbl.len() == 1:
                    # nothing left to do ... we didn't match
                    info("match table match 1 row found")
                    break
        except KeyError:
            # key doesn't exist in message is a failed match
            self.logger.warn(
                "Key(s) '{}' doesn't exist in input message {}".format(
                    str(self.cols), record))
            return None

        #tbl.drop_low_prio_rows()

        self.logger.info("match_table_optimised Passes: {}".format(
            tbl._passes))
        return tbl.get_first_row().get_values()

    def get_first_row(self):
        # if more than 1 remain take the one that was inserted first
        if self.len() > 1:
            self._rows.sort(key=lambda t: t.idx)
        elif self.len() == 0:
            self.logger.warn("match table is empty return empty dictionary")
            return {}

        return self._rows[0]

    def drop_low_prio_rows(self):
        # prune the rows down based on prioirty rules
        # for each column, in order, drop any rows that are not equal to the highest priority
        self.perf.start("drop_low_prio_rows")
        before = self.len()
        for col in self.cols:
            if self.len() == 1:
                # we are done when there is just 1 left
                self.logger.info("matched table 1 row remains")
                self.perf.end("drop_low_prio_rows",
                              "before {}/ after 1".format(before))
                return
            self.prune_rows(col)
        self.logger.info("before/after dropping low prio rows {}/{}".format(
            before, self.len()))
        self.perf.end("drop_low_prio_rows",
                      "before: {}/ after: {}".format(before, self.len()))

    def prune_rows(self, colname):
        """
        By this stage the table should contain just the rows that matched the input data
        Go through and discard and rows that have low match priority
        we do this by scoring rows using this - 10^(priority^2) * (length-1)
        * is always 1
        """
        rows = []
        for row in self._rows:
            cell = row.get_value(colname)
            rows.append((row, pow(10, pow(cell.prio, 2)) * (cell.length - 1)))
        self.logger.info("prune rows, colname={}, row_count={}".format(
            colname, len(rows)))
        rows.sort(key=lambda t: t[1], reverse=True)
        # now max prio row is at the head of this list
        max = rows[0][1]
        self._rows = []
        while len(rows) > 0 and rows[0][1] == max:
            row = rows.pop(0)[0]
            self.logger.info("prune is adding row {}".format(row.get_values()))
            self._add_match_row(row)

    def _match(self, colname, value):
        if 1 == 2:
            return self._match_fast(colname, value)
        else:
            return self._match_slow(colname, value)

    def _match_fast(self, colname, value):
        """
        find all rows where the column matches and return a new table
        """
        new_table = MatchTable(self.cols)
        self.logger.debug("Matching {} to {}".format(colname, value))
        # ll stores matched rows based on priority (0 .. 3) and only takes the list with the highest priority
        # should help to reduce the number of iterations as wildcard matches are dropped earlier
        # I want to learn lists in python n0w ...
        ll = [[], [], [], []]
        for row in self._rows:
            matched = row.match_row(colname, value)
            if matched[0]:
                ll[matched[1]].append(row)
                #new_table._add_match_row(row)
        for p in reversed(ll):
            if len(p) > 0:
                new_table._rows = p
                break

        new_table._passes = self._passes + len(self._rows)
        return new_table

    def _match_slow(self, colname, value):
        """
        find all rows where the column matches and return a new table
        """
        new_table = MatchTable(self.cols)
        self.logger.debug("Matching {} to {}".format(colname, value))
        for row in self._rows:
            matched = row.match_row(colname, value)
            if matched[0]:
                new_table._add_match_row(row)
        new_table._passes = self._passes + len(self._rows)
        return new_table