class TestMultiDict(TestCase): def setUp(self): super(TestMultiDict, self).setUp() self.index = MultiDict() def test_add_single(self): self.index.add("k", "v") self.assertTrue(self.index.contains("k", "v")) self.assertEqual(set(self.index.iter_values("k")), set(["v"])) def test_add_remove_single(self): self.index.add("k", "v") self.index.discard("k", "v") self.assertFalse(self.index.contains("k", "v")) self.assertEqual(self.index._index, {}) def test_empty(self): self.assertFalse(bool(self.index)) self.assertEqual(self.index.num_items("k"), 0) self.assertEqual(list(self.index.iter_values("k")), []) def test_add_multiple(self): self.index.add("k", "v") self.assertTrue(bool(self.index)) self.assertEqual(self.index.num_items("k"), 1) self.index.add("k", "v") self.assertEqual(self.index.num_items("k"), 1) self.index.add("k", "v2") self.assertEqual(self.index.num_items("k"), 2) self.index.add("k", "v3") self.assertEqual(self.index.num_items("k"), 3) self.assertIn("k", self.index) self.assertNotIn("k2", self.index) self.assertTrue(self.index.contains("k", "v")) self.assertTrue(self.index.contains("k", "v2")) self.assertTrue(self.index.contains("k", "v3")) self.assertEqual(self.index._index, {"k": set(["v", "v2", "v3"])}) self.assertEqual(set(self.index.iter_values("k")), set(["v", "v2", "v3"])) self.index.discard("k", "v") self.index.discard("k", "v2") self.assertTrue(self.index.contains("k", "v3")) self.index.discard("k", "v3") self.assertEqual(self.index._index, {})
class LabelValueIndex(LinearScanLabelIndex): """ LabelNode index that indexes the values of labels, allowing for efficient (re)calculation of the matches for selectors of the form 'a == "b" && c == "d" && ...', which are the mainline. """ def __init__(self): super(LabelValueIndex, self).__init__() self.item_ids_by_key_value = MultiDict() # Maps tuples of (a, b) to the set of expressions that are trivially # satisfied by label dicts with label a = value b. For example, # trivial expressions of the form a == "b", and a in {"b", "c", ...} # can be evaluated by look-up in this dict. self.literal_exprs_by_kv = MultiDict() # Mapping from expression ID to any expressions that can't be # represented in the way described above. self.non_kv_expressions_by_id = {} def on_labels_update(self, item_id, new_labels): """ Called to update a particular set of labels. Triggers events for match changes. :param item_id: an opaque (hashable) ID to associate with the labels. There can only be one set of labels per ID. :param new_labels: The labels dict to add to the index or None to remove it. """ _log.debug("Updating labels for %s to %s", item_id, new_labels) # Find any old labels associated with this item_id and remove the # ones that have changed from the index. old_labels = self.labels_by_item_id.get(item_id, {}) for k_v in old_labels.iteritems(): k, v = k_v if new_labels is None or new_labels.get(k) != v: _log.debug("Removing old key/value (%s, %s) from index", k, v) self.item_ids_by_key_value.discard(k_v, item_id) # Check all the old matches for updates. Record that we've already # re-evaluated these expressions so we can skip them later. seen_expr_ids = set() old_matches = list(self.matches_by_item_id.iter_values(item_id)) for expr_id in old_matches: seen_expr_ids.add(expr_id) self._update_matches(expr_id, self.expressions_by_id[expr_id], item_id, new_labels) if new_labels is not None: # Spin through the new labels, storing them in the index and # looking for expressions of the form 'k == "v"', which we have # indexed. for k_v in new_labels.iteritems(): _log.debug("Adding (%s, %s) to index", *k_v) self.item_ids_by_key_value.add(k_v, item_id) for expr_id in self.literal_exprs_by_kv.iter_values(k_v): if expr_id in seen_expr_ids: continue self._store_match(expr_id, item_id) seen_expr_ids.add(expr_id) # Spin through the remaining expressions, which we can't optimize. for expr_id, expr in self.non_kv_expressions_by_id.iteritems(): if expr_id in seen_expr_ids: continue _log.debug("Checking updated labels against non-indexed expr: %s", expr_id) self._update_matches(expr_id, expr, item_id, new_labels) # Finally, store the update. self._store_labels(item_id, new_labels) def on_expression_update(self, expr_id, expr): """ Called to update a particular expression. Triggers events for match changes. :param expr_id: an opaque (hashable) ID to associate with the expression. There can only be one expression per ID. :param expr: The SelectorExpression to add to the index or None to remove it. """ old_expr = self.expressions_by_id.get(expr_id) if expr == old_expr: _log.debug("Expression %s unchanged, ignoring", expr_id) return # Remove any old value from the indexes. We'll then add the expression # back in if it's suitable below. _log.debug("Expression %s updated to %s", expr_id, expr) if old_expr and isinstance(old_expr.expr_op, (LabelToLiteralEqualityNode, LabelInSetLiteralNode)): # Either an expression of the form a == "b", or one of the form # a in {"b", "c", ...}. Undo our index for the old entry, we'll # then add it back in below. label_name = old_expr.expr_op.lhs if isinstance(old_expr.expr_op, LabelToLiteralEqualityNode): values = [old_expr.expr_op.rhs] else: values = old_expr.expr_op.rhs for value in values: _log.debug("Old expression was indexed, removing") k_v = label_name, value self.literal_exprs_by_kv.discard(k_v, expr_id) self.non_kv_expressions_by_id.pop(expr_id, None) if not expr: # Deletion, clean up the matches. for item_id in list(self.matches_by_expr_id.iter_values(expr_id)): _log.debug("Expression deleted, removing old match: %s", item_id) self._update_matches(expr_id, None, item_id, self.labels_by_item_id[item_id]) elif isinstance(expr.expr_op, (LabelToLiteralEqualityNode, LabelInSetLiteralNode)): # Either an expression of the form a == "b", or one of the form # a in {"b", "c", ...}. We can optimise these forms so that # they can be evaluated by an exact lookup. label_name = expr.expr_op.lhs if isinstance(expr.expr_op, LabelToLiteralEqualityNode): values = [expr.expr_op.rhs] else: values = expr.expr_op.rhs # Get the old matches as a set. Then we can discard the items # that still match, leaving us with the ones that no longer # match. old_matches = set(self.matches_by_expr_id.iter_values(expr_id)) for value in values: _log.debug("New expression is a LabelToLiteralEqualityNode, using " "index") k_v = label_name, value for item_id in self.item_ids_by_key_value.iter_values(k_v): _log.debug("From index, %s matches %s", expr_id, item_id) old_matches.discard(item_id) self._store_match(expr_id, item_id) self.literal_exprs_by_kv.add(k_v, expr_id) # old_matches now contains only the items that this expression # previously matched but no longer does. Remove them. for item_id in old_matches: _log.debug("Removing old match %s, %s", expr_id, item_id) self._discard_match(expr_id, item_id) else: # The expression isn't a super-simple k == "v", let's see if we # can still use the index... required_kvs = expr.required_kvs if expr else None if required_kvs: # The expression has some required k == "v" constraints, let's # try to find an index that reduces the work we need to do. _log.debug("New expression requires these values: %s", required_kvs) best_kv = self._find_best_index(required_kvs) # Scan over the best index that we found. old_matches = set(self.matches_by_expr_id.iter_values(expr_id)) for item_id in self.item_ids_by_key_value.iter_values(best_kv): old_matches.discard(item_id) self._update_matches(expr_id, expr, item_id, self.labels_by_item_id[item_id]) # Clean up any left-over old matches. for item_id in old_matches: self._update_matches(expr_id, None, item_id, self.labels_by_item_id[item_id]) else: # The expression was just too complex to index. Give up and # do a linear scan. _log.debug("%s too complex to use indexes, doing linear scan", expr_id) self._scan_all_labels(expr_id, expr) self.non_kv_expressions_by_id[expr_id] = expr # Finally, store the update. self._store_expression(expr_id, expr) def _find_best_index(self, required_kvs): """ Finds the smallest index for the given set of key/value requirements. For example, an expression "env == 'prod' && type == 'foo'" would have requirements [("env", "prod"), ("type", "foo")]. Suppose type=="foo" only applies to a handful of items but env=="prod" applies to many; this method would return ("type", "foo") as the best index. :returns the key, value tuple for the best index to use. """ min_kv = None min_num = None for k_v in required_kvs: num = self.item_ids_by_key_value.num_items(k_v) if min_num is None or num < min_num: min_kv = k_v min_num = num if num < 10: # Good enough, let's get on with evaluating the # expressions rather than spending more time looking for # a better index. break _log.debug("Best index: %s, %s items", min_kv, min_num) return min_kv
class LabelValueIndex(LinearScanLabelIndex): """ LabelNode index that indexes the values of labels, allowing for efficient (re)calculation of the matches for selectors of the form 'a == "b" && c == "d" && ...', which are the mainline. """ def __init__(self): super(LabelValueIndex, self).__init__() self.item_ids_by_key_value = MultiDict() # Maps tuples of (a, b) to the set of expressions that are trivially # satisfied by label dicts with label a = value b. For example, # trivial expressions of the form a == "b", and a in {"b", "c", ...} # can be evaluated by look-up in this dict. self.literal_exprs_by_kv = MultiDict() # Mapping from expression ID to any expressions that can't be # represented in the way described above. self.non_kv_expressions_by_id = {} def on_labels_update(self, item_id, new_labels): """ Called to update a particular set of labels. Triggers events for match changes. :param item_id: an opaque (hashable) ID to associate with the labels. There can only be one set of labels per ID. :param new_labels: The labels dict to add to the index or None to remove it. """ _log.debug("Updating labels for %s to %s", item_id, new_labels) # Find any old labels associated with this item_id and remove the # ones that have changed from the index. old_labels = self.labels_by_item_id.get(item_id, {}) for k_v in old_labels.iteritems(): k, v = k_v if new_labels is None or new_labels.get(k) != v: _log.debug("Removing old key/value (%s, %s) from index", k, v) self.item_ids_by_key_value.discard(k_v, item_id) # Check all the old matches for updates. Record that we've already # re-evaluated these expressions so we can skip them later. seen_expr_ids = set() old_matches = list(self.matches_by_item_id.iter_values(item_id)) for expr_id in old_matches: seen_expr_ids.add(expr_id) self._update_matches(expr_id, self.expressions_by_id[expr_id], item_id, new_labels) if new_labels is not None: # Spin through the new labels, storing them in the index and # looking for expressions of the form 'k == "v"', which we have # indexed. for k_v in new_labels.iteritems(): _log.debug("Adding (%s, %s) to index", *k_v) self.item_ids_by_key_value.add(k_v, item_id) for expr_id in self.literal_exprs_by_kv.iter_values(k_v): if expr_id in seen_expr_ids: continue self._store_match(expr_id, item_id) seen_expr_ids.add(expr_id) # Spin through the remaining expressions, which we can't optimize. for expr_id, expr in self.non_kv_expressions_by_id.iteritems(): if expr_id in seen_expr_ids: continue _log.debug("Checking updated labels against non-indexed expr: %s", expr_id) self._update_matches(expr_id, expr, item_id, new_labels) # Finally, store the update. self._store_labels(item_id, new_labels) def on_expression_update(self, expr_id, expr): """ Called to update a particular expression. Triggers events for match changes. :param expr_id: an opaque (hashable) ID to associate with the expression. There can only be one expression per ID. :param expr: The SelectorExpression to add to the index or None to remove it. """ old_expr = self.expressions_by_id.get(expr_id) if expr == old_expr: _log.debug("Expression %s unchanged, ignoring", expr_id) return # Remove any old value from the indexes. We'll then add the expression # back in if it's suitable below. _log.debug("Expression %s updated to %s", expr_id, expr) if old_expr and isinstance( old_expr.expr_op, (LabelToLiteralEqualityNode, LabelInSetLiteralNode)): # Either an expression of the form a == "b", or one of the form # a in {"b", "c", ...}. Undo our index for the old entry, we'll # then add it back in below. label_name = old_expr.expr_op.lhs if isinstance(old_expr.expr_op, LabelToLiteralEqualityNode): values = [old_expr.expr_op.rhs] else: values = old_expr.expr_op.rhs for value in values: _log.debug("Old expression was indexed, removing") k_v = label_name, value self.literal_exprs_by_kv.discard(k_v, expr_id) self.non_kv_expressions_by_id.pop(expr_id, None) if not expr: # Deletion, clean up the matches. for item_id in list(self.matches_by_expr_id.iter_values(expr_id)): _log.debug("Expression deleted, removing old match: %s", item_id) self._update_matches(expr_id, None, item_id, self.labels_by_item_id[item_id]) elif isinstance(expr.expr_op, (LabelToLiteralEqualityNode, LabelInSetLiteralNode)): # Either an expression of the form a == "b", or one of the form # a in {"b", "c", ...}. We can optimise these forms so that # they can be evaluated by an exact lookup. label_name = expr.expr_op.lhs if isinstance(expr.expr_op, LabelToLiteralEqualityNode): values = [expr.expr_op.rhs] else: values = expr.expr_op.rhs # Get the old matches as a set. Then we can discard the items # that still match, leaving us with the ones that no longer # match. old_matches = set(self.matches_by_expr_id.iter_values(expr_id)) for value in values: _log.debug( "New expression is a LabelToLiteralEqualityNode, using " "index") k_v = label_name, value for item_id in self.item_ids_by_key_value.iter_values(k_v): _log.debug("From index, %s matches %s", expr_id, item_id) old_matches.discard(item_id) self._store_match(expr_id, item_id) self.literal_exprs_by_kv.add(k_v, expr_id) # old_matches now contains only the items that this expression # previously matched but no longer does. Remove them. for item_id in old_matches: _log.debug("Removing old match %s, %s", expr_id, item_id) self._discard_match(expr_id, item_id) else: # The expression isn't a super-simple k == "v", let's see if we # can still use the index... required_kvs = expr.required_kvs if expr else None if required_kvs: # The expression has some required k == "v" constraints, let's # try to find an index that reduces the work we need to do. _log.debug("New expression requires these values: %s", required_kvs) best_kv = self._find_best_index(required_kvs) # Scan over the best index that we found. old_matches = set(self.matches_by_expr_id.iter_values(expr_id)) for item_id in self.item_ids_by_key_value.iter_values(best_kv): old_matches.discard(item_id) self._update_matches(expr_id, expr, item_id, self.labels_by_item_id[item_id]) # Clean up any left-over old matches. for item_id in old_matches: self._update_matches(expr_id, None, item_id, self.labels_by_item_id[item_id]) else: # The expression was just too complex to index. Give up and # do a linear scan. _log.debug("%s too complex to use indexes, doing linear scan", expr_id) self._scan_all_labels(expr_id, expr) self.non_kv_expressions_by_id[expr_id] = expr # Finally, store the update. self._store_expression(expr_id, expr) def _find_best_index(self, required_kvs): """ Finds the smallest index for the given set of key/value requirements. For example, an expression "env == 'prod' && type == 'foo'" would have requirements [("env", "prod"), ("type", "foo")]. Suppose type=="foo" only applies to a handful of items but env=="prod" applies to many; this method would return ("type", "foo") as the best index. :returns the key, value tuple for the best index to use. """ min_kv = None min_num = None for k_v in required_kvs: num = self.item_ids_by_key_value.num_items(k_v) if min_num is None or num < min_num: min_kv = k_v min_num = num if num < 10: # Good enough, let's get on with evaluating the # expressions rather than spending more time looking for # a better index. break _log.debug("Best index: %s, %s items", min_kv, min_num) return min_kv