def fit(self, data, label): rules_tree = RulesTrie(data, label) imp_rules = [] for i in xrange(self.number_of_classes): imp_rules.append(rules_tree.important_rules_selection(self.threshold_for_rules[i], label=i)) self.trie = rules_tree inf = [] for i in xrange(self.number_of_classes): inf.append(RulesImportance(imp_rules[i], rules_tree, self.threshold_for_growth_rate[i], label=i)) for i in inf: self.rules_class.append(i) self.model = True
rules_trie = RulesTrie(list_of_sequences=data, label=label) closure_rules_trie = ClosureRulesTrie(list_of_sequences=data, label=label) # we can see for example the full trie structure and support for some sequence print(rules_trie.node_full_sequence_dict) print(rules_trie.support_t(rule=[['1']], label=1)) print(rules_trie.support_t(rule=[['1']], label=0)) print(closure_rules_trie.node_full_sequence_dict) print(closure_rules_trie.support_t(rule=[['1']], label=1)) print(closure_rules_trie.support_t(rule=[['1']], label=0)) print("") # also we can take important rules by some threshold print("Rules with min support 0.2") print(rules_trie.important_rules_selection(min_threshold=0.2, label=0)) print(rules_trie.important_rules_selection(min_threshold=0.2, label=1)) print("") print(closure_rules_trie.important_rules_selection(min_threshold=0.2, label=0)) print(closure_rules_trie.important_rules_selection(min_threshold=0.2, label=1)) # also we can use a tool that uses in classification task, it takes rules with some growth rate threshold # create some candidates by min support rules_candidates_for1 = rules_trie.important_rules_selection(min_threshold=0.01, label=1) rules_candidates_for0 = rules_trie.important_rules_selection(min_threshold=0.01, label=0) # from candidates select important rules by threshold important_rules_for0 = RulesImportance(rules=rules_candidates_for0, trie=rules_trie, threshold=2, label=0) important_rules_for1 = RulesImportance(rules=rules_candidates_for1, trie=rules_trie, threshold=2, label=1) print("Important Rules")
rules_trie = RulesTrie(list_of_sequences=data, label=label) closure_rules_trie = ClosureRulesTrie(list_of_sequences=data, label=label) # we can see for example the full trie structure and support for some sequence print(rules_trie.node_full_sequence_dict) print(rules_trie.support_t(rule=[['1']], label=1)) print(rules_trie.support_t(rule=[['1']], label=0)) print(closure_rules_trie.node_full_sequence_dict) print(closure_rules_trie.support_t(rule=[['1']], label=1)) print(closure_rules_trie.support_t(rule=[['1']], label=0)) print("") # also we can take important rules by some threshold print("Rules with min support 0.2") print(rules_trie.important_rules_selection(min_threshold=0.2, label=0)) print(rules_trie.important_rules_selection(min_threshold=0.2, label=1)) print("") print(closure_rules_trie.important_rules_selection(min_threshold=0.2, label=0)) print(closure_rules_trie.important_rules_selection(min_threshold=0.2, label=1)) # also we can use a tool that uses in classification task, it takes rules with some growth rate threshold # create some candidates by min support rules_candidates_for1 = rules_trie.important_rules_selection( min_threshold=0.01, label=1) rules_candidates_for0 = rules_trie.important_rules_selection( min_threshold=0.01, label=0) # from candidates select important rules by threshold important_rules_for0 = RulesImportance(rules=rules_candidates_for0, trie=rules_trie,