def fit(self, data, label): rules_tree = ClosureRulesTrie(data, label) imp_rules = [] for i in xrange(self.number_of_classes): imp_rules.append(rules_tree.important_rules_selection(self.threshold_for_rules[i], label=i)) self.trie = rules_tree inf = [] for i in xrange(self.number_of_classes): inf.append(RulesImportance(imp_rules[i], rules_tree, self.threshold_for_growth_rate[i], label=i)) for i in inf: self.rules_class.append(i) self.model = True
sequence_reader = ReadFromCSV(file_name='./data/full_data_shuffle.csv', coding_dict={'work': 1, 'separation': 2, 'partner': 3, 'marriage': 4, 'children': 5, 'parting': 6, 'divorce': 7, 'education': 8}) data, label = sequence_reader.from_file_to_data_list(label_name='label') print(len(data)) # using data and label we can build Rules Trie and Closure Rules Trie rules_trie = RulesTrie(list_of_sequences=data, label=label) closure_rules_trie = ClosureRulesTrie(list_of_sequences=data, label=label) # we can see for example the full trie structure and support for some sequence print(rules_trie.node_full_sequence_dict) print(rules_trie.support_t(rule=[['1']], label=1)) print(rules_trie.support_t(rule=[['1']], label=0)) print(closure_rules_trie.node_full_sequence_dict) print(closure_rules_trie.support_t(rule=[['1']], label=1)) print(closure_rules_trie.support_t(rule=[['1']], label=0)) print("") # also we can take important rules by some threshold print("Rules with min support 0.2") print(rules_trie.important_rules_selection(min_threshold=0.2, label=0)) print(rules_trie.important_rules_selection(min_threshold=0.2, label=1))
coding_dict={ 'work': 1, 'separation': 2, 'partner': 3, 'marriage': 4, 'children': 5, 'parting': 6, 'divorce': 7, 'education': 8 }) data, label = sequence_reader.from_file_to_data_list(label_name='label') print(len(data)) # using data and label we can build Rules Trie and Closure Rules Trie rules_trie = RulesTrie(list_of_sequences=data, label=label) closure_rules_trie = ClosureRulesTrie(list_of_sequences=data, label=label) # we can see for example the full trie structure and support for some sequence print(rules_trie.node_full_sequence_dict) print(rules_trie.support_t(rule=[['1']], label=1)) print(rules_trie.support_t(rule=[['1']], label=0)) print(closure_rules_trie.node_full_sequence_dict) print(closure_rules_trie.support_t(rule=[['1']], label=1)) print(closure_rules_trie.support_t(rule=[['1']], label=0)) print("") # also we can take important rules by some threshold print("Rules with min support 0.2") print(rules_trie.important_rules_selection(min_threshold=0.2, label=0)) print(rules_trie.important_rules_selection(min_threshold=0.2, label=1))