예제 #1
0
import treepredict
import preprocessor
import postprocessor
import arff
import copy

label_count = 6
train_data_file = '.\\scene\\scene-train-tiny.arff'
test_data_file = '.\\scene\\scene-test-tiny.arff'
method = input('1 单标签;2 多个二类分类')
if method == '1':
    #读取训练集,建树(多标签转换成单标签)
    (attributes_list, label_value_list,train_data) = preprocessor.read_data(train_data_file, label_count, arff.DENSE)
    train_data = preprocessor.translate_label_multiclass(train_data, label_count)
    tree = treepredict.buildtree(train_data, attributes_list, label_value_list)
    treepredict.printtree(tree)

    #读取测试集,验证效果
    (test_attributes_list, test_label_value_list, test_data) = preprocessor.read_data(test_data_file, label_count, arff.DENSE)
    test_data_copy = copy.deepcopy(test_data)
    predicted_labels_list = []
    for row in test_data:
        result = treepredict.classify(row, tree, test_attributes_list)
        post_result = treepredict.post_classify(result)
        decoded_result = preprocessor.label_decoding(post_result)
        predicted_labels_list.append(decoded_result)

    hamming_loss = postprocessor.hamming_loss(test_data_copy, predicted_labels_list)
    print('hamming loss of merging labels:', hamming_loss)
else :
    #当做多个二类分类问题处理
예제 #2
0
            index)
        decision_tree_in_list = preprocessor.load_tree(decision_tree_filename)
        decision_tree = preprocessor.list2tree(decision_tree_in_list)
        with open(attributes_index_filename,
                  mode='r') as attributes_index_file:
            attributes_index = json.loads(attributes_index_file.read())
        random_trees.append({
            TREE: decision_tree,
            ATTRIBUTES_INDEX: attributes_index
        })

    return random_trees


(origin_attribute_list, label_list,
 train_data) = preprocessor.read_data(train_data_file, label_count, arff.DENSE)
attribute_count = len(origin_attribute_list)
attribute_count_per_tree = math.floor(math.sqrt(attribute_count) * 2)
tree_count_per_sample_copy = math.ceil(attribute_count /
                                       attribute_count_per_tree)

train_data = preprocessor.translate_label_multiclass(train_data,
                                                     label_count)  #转换成单标签数据集

random_trees = train_random_trees(train_data, origin_attribute_list,
                                  label_list, sample_copy_count,
                                  attribute_count_per_tree)

forest_count = len(random_trees)
store_random_trees(random_trees, '.\\my_forest\\my_random_forest_')
loaded_random_trees = load_random_trees('.\\my_forest\\my_random_forest_', 27)
예제 #3
0
    def __init__(self, training_path):
        data = read_data(train_csv_path)
        self.training_data = data[:30000]
        self.validation_data = data[30000:]

        self.training_path = training_path
예제 #4
0
 def __init__(self, training_path):
     data = read_data(train_csv_path)
     self.training_data = data[:30000]
     self.validation_data = data[33000:]
     self.training_path = training_path
     self.batch_data = self.randomize_batch()
예제 #5
0
#读取训练集,建树(多标签转换成单标签)
label_count = 6
# (attributes_list, label_list,train_data) = preprocessor.read_data('.\\scene\\scene-train-tiny.arff',
#                                                                   label_count, arff.DENSE)
# train_data = preprocessor.translate_label_multiclass(train_data, label_count)
# tree = treepredict.buildtree(train_data, attributes_list, label_list)
# treepredict.printtree(tree)
#
# #测试决策树文件读写
# tree_list = preprocessor.tree2array(tree)
# preprocessor.store_tree('.\\my_tree', tree_list)

#从文件中加载决策树
loaded_tree_list = preprocessor.load_tree('.\\my_tree')
loaded_tree = preprocessor.list2tree(loaded_tree_list)

#读取测试集,验证效果
(test_attributes_list, test_label_value_list,
 test_data) = preprocessor.read_data('.\\scene\\scene-test-tiny.arff',
                                     label_count, arff.DENSE)

results = []
for row in test_data:
    result = treepredict.classify(row, loaded_tree, test_label_value_list)
    print('predict result:', result, 'test case', row)
    post_result = treepredict.post_classify(result)
    results.append(preprocessor.label_decoding(post_result))
hammingloss = postprocessor.hamming_loss(test_data, results)
print('hamming loss:', hammingloss)