Exemple #1
0
def test_basic_rule_tree():
    rules = [
        ("abc", "d"),
        ("abc", "f"),
        ("ac", "g"),
        ("bc", "d"),
        ("de", "g"),
    ]
    rules = map(lambda r: (ItemSet(r[0]), ItemSet(r[1])), rules)

    tree = RuleTree(4)
    for (antecedent, consequent) in rules:
        tree.insert(antecedent, consequent)

    # (ItemSet, [(antecedent, consquent)...])
    test_cases = [
        ("abcd", [1, 0, 0, 1, 0]),
        ("geabcd", [1, 0, 0.5, 1, 0.5]),
        ("abc", [2 / 3, 0.0, 1 / 3, 2 / 3, 1 / 3]),
        ("bcd", [0.5, 0.0, 0.25, 0.75, 0.25]),
        ("def", [0.25, 0.0, 0.25, 0.5, 0.25]),
        ("geab", [0.0, 0.0, 0.0, 0.25, 0.0]),
    ]
    test_cases = list(map(lambda t: (ItemSet(t[0]), t[1]), test_cases))
    print()
    for (itemset, expected_results) in test_cases:
        print("Adding {}".format(itemset))
        tree.record_matches(itemset)
        for (a, c) in tree.rules():
            print("  {} -> {} ; {}".format(a, c, tree.match_count_of(a, c)))
        assert (expected_results == tree.match_vector())
Exemple #2
0
class DriftDetector:
    def __init__(self, volatility_detector):
        self.volatility_detector = volatility_detector

    def train(self, window, rules):
        assert (len(rules) > 0)
        assert (len(window) > 0)
        self.training_rule_tree = RuleTree(len(window))
        for (antecedent, consequent, _, _, _) in rules:
            self.training_rule_tree.insert(antecedent, consequent)

        # Populate the training rule tree with the rule frequencies from
        # the training window.
        for transaction in window:
            self.training_rule_tree.record_matches(transaction)

        # Populate the test rule tree with a deep copy of the training set.
        self.test_rule_tree = deepcopy(self.training_rule_tree)

        # Record the match vector; the vector of rules' supports in the
        # training window.
        self.training_match_vec = self.training_rule_tree.match_vector()

        self.num_test_transactions = 0
        self.rule_vec_mean = RollingMean()
        self.rag_bag_mean = RollingMean()

    def check_for_drift(self, transaction, transaction_num):
        self.test_rule_tree.record_matches(transaction)
        self.num_test_transactions += 1
        if self.num_test_transactions < SAMPLE_INTERVAL:
            return None

        # Sample and test for drift.
        self.num_test_transactions = 0

        if (self.rule_vec_mean.n + 1 > SAMPLE_THRESHOLD
                or self.rag_bag_mean.n + 1 > SAMPLE_THRESHOLD):
            # We'll need the drift confidence below. Calculate it.
            # Note: the +1 is there because of the add_sample() call below.
            gamma = self.volatility_detector.drift_confidence(transaction_num)
            print("gamma at transaction {} is {}".format(
                transaction_num, gamma))
            drift_confidence = 2.5 - gamma

        # Detect whether the rules' supports in the test window differ
        # from the rules' supports in the training window.
        distance = hellinger(self.training_match_vec,
                             self.test_rule_tree.match_vector())
        self.rule_vec_mean.add_sample(distance)
        if self.rule_vec_mean.n > SAMPLE_THRESHOLD:
            conf = self.rule_vec_mean.std_dev() * drift_confidence
            mean = self.rule_vec_mean.mean()
            if distance > mean + conf or distance < mean - conf:
                return Drift("rule-match-vector", distance, conf, mean)

        # Detect whether the rag bag differs between the training and
        # test windows.
        if not hoeffding_bound(self.training_rule_tree.rag_bag(),
                               self.training_rule_tree.transaction_count,
                               self.test_rule_tree.rag_bag(),
                               self.test_rule_tree.transaction_count, 0.05):
            return Drift(drift_type="rag-bag")

        return None