Esempio n. 1
0
File: cba_rg.py Progetto: munif/CBA
def rule_generator(dataset, minsup, minconf):
    frequent_ruleitems = FrequentRuleitems()
    car = Car()

    # get large 1-ruleitems and generate rules
    class_label = set([x[-1] for x in dataset])
    for column in range(0, len(dataset[0]) - 1):
        distinct_value = set([x[column] for x in dataset])
        for value in distinct_value:
            cond_set = {column: value}
            for classes in class_label:
                rule_item = ruleitem.RuleItem(cond_set, classes, dataset)
                if rule_item.support >= minsup:
                    frequent_ruleitems.add(rule_item)
    car.gen_rules(frequent_ruleitems, minsup, minconf)
    cars = car

    last_cars_number = 0
    current_cars_number = len(cars.rules)
    while frequent_ruleitems.get_size() > 0 and current_cars_number <= 2000 and \
                    (current_cars_number - last_cars_number) >= 10:
        candidate = candidate_gen(frequent_ruleitems, dataset)
        frequent_ruleitems = FrequentRuleitems()
        car = Car()
        for item in candidate.frequent_ruleitems_set:
            if item.support >= minsup:
                frequent_ruleitems.add(item)
        car.gen_rules(frequent_ruleitems, minsup, minconf)
        cars.append(car, minsup, minconf)
        last_cars_number = current_cars_number
        current_cars_number = len(cars.rules)

    return cars
Esempio n. 2
0
File: cba_rg.py Progetto: munif/CBA
def join(item1, item2, dataset):
    if item1.class_label != item2.class_label:
        return None
    category1 = set(item1.cond_set)
    category2 = set(item2.cond_set)
    if category1 == category2:
        return None
    intersect = category1 & category2
    for item in intersect:
        if item1.cond_set[item] != item2.cond_set[item]:
            return None
    category = category1 | category2
    new_cond_set = dict()
    for item in category:
        if item in category1:
            new_cond_set[item] = item1.cond_set[item]
        else:
            new_cond_set[item] = item2.cond_set[item]
    new_ruleitem = ruleitem.RuleItem(new_cond_set, item1.class_label, dataset)
    return new_ruleitem
def prune(rule, dataset):
    import sys
    min_rule_error = sys.maxsize
    pruned_rule = rule

    # prune rule recursively
    def find_prune_rule(this_rule):
        nonlocal min_rule_error
        nonlocal pruned_rule

        # calculate how many errors the rule r make in the dataset
        def errors_of_rule(r):
            import apr_cb_m1

            errors_number = 0
            for case in dataset:
                if apr_cb_m1.is_satisfy(case, r) == False:
                    errors_number += 1
            return errors_number

        rule_error = errors_of_rule(this_rule)
        if rule_error < min_rule_error:
            min_rule_error = rule_error
            pruned_rule = this_rule
        this_rule_cond_set = list(this_rule.cond_set)
        if len(this_rule_cond_set) >= 2:
            for attribute in this_rule_cond_set:
                temp_cond_set = dict(this_rule.cond_set)
                temp_cond_set.pop(attribute)
                temp_rule = ruleitem.RuleItem(temp_cond_set,
                                              this_rule.class_label, dataset)
                temp_rule_error = errors_of_rule(temp_rule)
                if temp_rule_error <= min_rule_error:
                    min_rule_error = temp_rule_error
                    pruned_rule = temp_rule
                    if len(temp_cond_set) >= 2:
                        find_prune_rule(temp_rule)