예제 #1
0
    def fit(self, data, label):
        rules_tree = RulesTrie(data, label)

        imp_rules = []
        for i in xrange(self.number_of_classes):
            imp_rules.append(rules_tree.important_rules_selection(self.threshold_for_rules[i], label=i))

        self.trie = rules_tree

        inf = []
        for i in xrange(self.number_of_classes):
            inf.append(RulesImportance(imp_rules[i], rules_tree, self.threshold_for_growth_rate[i], label=i))

        for i in inf:
            self.rules_class.append(i)

        self.model = True
rules_trie = RulesTrie(list_of_sequences=data, label=label)
closure_rules_trie = ClosureRulesTrie(list_of_sequences=data, label=label)

# we can see for example the full trie structure and support for some sequence
print(rules_trie.node_full_sequence_dict)
print(rules_trie.support_t(rule=[['1']], label=1))
print(rules_trie.support_t(rule=[['1']], label=0))

print(closure_rules_trie.node_full_sequence_dict)
print(closure_rules_trie.support_t(rule=[['1']], label=1))
print(closure_rules_trie.support_t(rule=[['1']], label=0))
print("")

# also we can take important rules by some threshold
print("Rules with min support 0.2")
print(rules_trie.important_rules_selection(min_threshold=0.2, label=0))
print(rules_trie.important_rules_selection(min_threshold=0.2, label=1))
print("")
print(closure_rules_trie.important_rules_selection(min_threshold=0.2, label=0))
print(closure_rules_trie.important_rules_selection(min_threshold=0.2, label=1))


# also we can use a tool that uses in classification task, it takes rules with some growth rate threshold
# create some candidates by min support
rules_candidates_for1 = rules_trie.important_rules_selection(min_threshold=0.01, label=1)
rules_candidates_for0 = rules_trie.important_rules_selection(min_threshold=0.01, label=0)

# from candidates select important rules by threshold
important_rules_for0 = RulesImportance(rules=rules_candidates_for0, trie=rules_trie, threshold=2, label=0)
important_rules_for1 = RulesImportance(rules=rules_candidates_for1, trie=rules_trie, threshold=2, label=1)
print("Important Rules")
예제 #3
0
rules_trie = RulesTrie(list_of_sequences=data, label=label)
closure_rules_trie = ClosureRulesTrie(list_of_sequences=data, label=label)

# we can see for example the full trie structure and support for some sequence
print(rules_trie.node_full_sequence_dict)
print(rules_trie.support_t(rule=[['1']], label=1))
print(rules_trie.support_t(rule=[['1']], label=0))

print(closure_rules_trie.node_full_sequence_dict)
print(closure_rules_trie.support_t(rule=[['1']], label=1))
print(closure_rules_trie.support_t(rule=[['1']], label=0))
print("")

# also we can take important rules by some threshold
print("Rules with min support 0.2")
print(rules_trie.important_rules_selection(min_threshold=0.2, label=0))
print(rules_trie.important_rules_selection(min_threshold=0.2, label=1))
print("")
print(closure_rules_trie.important_rules_selection(min_threshold=0.2, label=0))
print(closure_rules_trie.important_rules_selection(min_threshold=0.2, label=1))

# also we can use a tool that uses in classification task, it takes rules with some growth rate threshold
# create some candidates by min support
rules_candidates_for1 = rules_trie.important_rules_selection(
    min_threshold=0.01, label=1)
rules_candidates_for0 = rules_trie.important_rules_selection(
    min_threshold=0.01, label=0)

# from candidates select important rules by threshold
important_rules_for0 = RulesImportance(rules=rules_candidates_for0,
                                       trie=rules_trie,