def rule_generator(dataset, minsup, minconf): frequent_ruleitems = FrequentRuleitems() car = Car() # get large 1-ruleitems and generate rules class_label = set([x[-1] for x in dataset]) for column in range(0, len(dataset[0]) - 1): distinct_value = set([x[column] for x in dataset]) for value in distinct_value: cond_set = {column: value} for classes in class_label: rule_item = ruleitem.RuleItem(cond_set, classes, dataset) if rule_item.support >= minsup: frequent_ruleitems.add(rule_item) car.gen_rules(frequent_ruleitems, minsup, minconf) cars = car last_cars_number = 0 current_cars_number = len(cars.rules) while frequent_ruleitems.get_size() > 0 and current_cars_number <= 2000 and \ (current_cars_number - last_cars_number) >= 10: candidate = candidate_gen(frequent_ruleitems, dataset) frequent_ruleitems = FrequentRuleitems() car = Car() for item in candidate.frequent_ruleitems_set: if item.support >= minsup: frequent_ruleitems.add(item) car.gen_rules(frequent_ruleitems, minsup, minconf) cars.append(car, minsup, minconf) last_cars_number = current_cars_number current_cars_number = len(cars.rules) return cars
def join(item1, item2, dataset): if item1.class_label != item2.class_label: return None category1 = set(item1.cond_set) category2 = set(item2.cond_set) if category1 == category2: return None intersect = category1 & category2 for item in intersect: if item1.cond_set[item] != item2.cond_set[item]: return None category = category1 | category2 new_cond_set = dict() for item in category: if item in category1: new_cond_set[item] = item1.cond_set[item] else: new_cond_set[item] = item2.cond_set[item] new_ruleitem = ruleitem.RuleItem(new_cond_set, item1.class_label, dataset) return new_ruleitem
def prune(rule, dataset): import sys min_rule_error = sys.maxsize pruned_rule = rule # prune rule recursively def find_prune_rule(this_rule): nonlocal min_rule_error nonlocal pruned_rule # calculate how many errors the rule r make in the dataset def errors_of_rule(r): import apr_cb_m1 errors_number = 0 for case in dataset: if apr_cb_m1.is_satisfy(case, r) == False: errors_number += 1 return errors_number rule_error = errors_of_rule(this_rule) if rule_error < min_rule_error: min_rule_error = rule_error pruned_rule = this_rule this_rule_cond_set = list(this_rule.cond_set) if len(this_rule_cond_set) >= 2: for attribute in this_rule_cond_set: temp_cond_set = dict(this_rule.cond_set) temp_cond_set.pop(attribute) temp_rule = ruleitem.RuleItem(temp_cond_set, this_rule.class_label, dataset) temp_rule_error = errors_of_rule(temp_rule) if temp_rule_error <= min_rule_error: min_rule_error = temp_rule_error pruned_rule = temp_rule if len(temp_cond_set) >= 2: find_prune_rule(temp_rule)