コード例 #1
0
    def fit(self, data, label):
        rules_tree = ClosureRulesTrie(data, label)

        imp_rules = []
        for i in xrange(self.number_of_classes):
            imp_rules.append(rules_tree.important_rules_selection(self.threshold_for_rules[i], label=i))

        self.trie = rules_tree

        inf = []
        for i in xrange(self.number_of_classes):
            inf.append(RulesImportance(imp_rules[i], rules_tree, self.threshold_for_growth_rate[i], label=i))

        for i in inf:
            self.rules_class.append(i)

        self.model = True
sequence_reader = ReadFromCSV(file_name='./data/full_data_shuffle.csv',
                              coding_dict={'work': 1,
                                           'separation': 2,
                                           'partner': 3,
                                           'marriage': 4,
                                           'children': 5,
                                           'parting': 6,
                                           'divorce': 7,
                                           'education': 8})

data, label = sequence_reader.from_file_to_data_list(label_name='label')
print(len(data))
# using data and label we can build Rules Trie and Closure Rules Trie
rules_trie = RulesTrie(list_of_sequences=data, label=label)
closure_rules_trie = ClosureRulesTrie(list_of_sequences=data, label=label)

# we can see for example the full trie structure and support for some sequence
print(rules_trie.node_full_sequence_dict)
print(rules_trie.support_t(rule=[['1']], label=1))
print(rules_trie.support_t(rule=[['1']], label=0))

print(closure_rules_trie.node_full_sequence_dict)
print(closure_rules_trie.support_t(rule=[['1']], label=1))
print(closure_rules_trie.support_t(rule=[['1']], label=0))
print("")

# also we can take important rules by some threshold
print("Rules with min support 0.2")
print(rules_trie.important_rules_selection(min_threshold=0.2, label=0))
print(rules_trie.important_rules_selection(min_threshold=0.2, label=1))
コード例 #3
0
                              coding_dict={
                                  'work': 1,
                                  'separation': 2,
                                  'partner': 3,
                                  'marriage': 4,
                                  'children': 5,
                                  'parting': 6,
                                  'divorce': 7,
                                  'education': 8
                              })

data, label = sequence_reader.from_file_to_data_list(label_name='label')
print(len(data))
# using data and label we can build Rules Trie and Closure Rules Trie
rules_trie = RulesTrie(list_of_sequences=data, label=label)
closure_rules_trie = ClosureRulesTrie(list_of_sequences=data, label=label)

# we can see for example the full trie structure and support for some sequence
print(rules_trie.node_full_sequence_dict)
print(rules_trie.support_t(rule=[['1']], label=1))
print(rules_trie.support_t(rule=[['1']], label=0))

print(closure_rules_trie.node_full_sequence_dict)
print(closure_rules_trie.support_t(rule=[['1']], label=1))
print(closure_rules_trie.support_t(rule=[['1']], label=0))
print("")

# also we can take important rules by some threshold
print("Rules with min support 0.2")
print(rules_trie.important_rules_selection(min_threshold=0.2, label=0))
print(rules_trie.important_rules_selection(min_threshold=0.2, label=1))