import treepredict import preprocessor import postprocessor import arff import copy label_count = 6 train_data_file = '.\\scene\\scene-train-tiny.arff' test_data_file = '.\\scene\\scene-test-tiny.arff' method = input('1 单标签;2 多个二类分类') if method == '1': #读取训练集,建树(多标签转换成单标签) (attributes_list, label_value_list,train_data) = preprocessor.read_data(train_data_file, label_count, arff.DENSE) train_data = preprocessor.translate_label_multiclass(train_data, label_count) tree = treepredict.buildtree(train_data, attributes_list, label_value_list) treepredict.printtree(tree) #读取测试集,验证效果 (test_attributes_list, test_label_value_list, test_data) = preprocessor.read_data(test_data_file, label_count, arff.DENSE) test_data_copy = copy.deepcopy(test_data) predicted_labels_list = [] for row in test_data: result = treepredict.classify(row, tree, test_attributes_list) post_result = treepredict.post_classify(result) decoded_result = preprocessor.label_decoding(post_result) predicted_labels_list.append(decoded_result) hamming_loss = postprocessor.hamming_loss(test_data_copy, predicted_labels_list) print('hamming loss of merging labels:', hamming_loss) else : #当做多个二类分类问题处理
index) decision_tree_in_list = preprocessor.load_tree(decision_tree_filename) decision_tree = preprocessor.list2tree(decision_tree_in_list) with open(attributes_index_filename, mode='r') as attributes_index_file: attributes_index = json.loads(attributes_index_file.read()) random_trees.append({ TREE: decision_tree, ATTRIBUTES_INDEX: attributes_index }) return random_trees (origin_attribute_list, label_list, train_data) = preprocessor.read_data(train_data_file, label_count, arff.DENSE) attribute_count = len(origin_attribute_list) attribute_count_per_tree = math.floor(math.sqrt(attribute_count) * 2) tree_count_per_sample_copy = math.ceil(attribute_count / attribute_count_per_tree) train_data = preprocessor.translate_label_multiclass(train_data, label_count) #转换成单标签数据集 random_trees = train_random_trees(train_data, origin_attribute_list, label_list, sample_copy_count, attribute_count_per_tree) forest_count = len(random_trees) store_random_trees(random_trees, '.\\my_forest\\my_random_forest_') loaded_random_trees = load_random_trees('.\\my_forest\\my_random_forest_', 27)
def __init__(self, training_path): data = read_data(train_csv_path) self.training_data = data[:30000] self.validation_data = data[30000:] self.training_path = training_path
def __init__(self, training_path): data = read_data(train_csv_path) self.training_data = data[:30000] self.validation_data = data[33000:] self.training_path = training_path self.batch_data = self.randomize_batch()
#读取训练集,建树(多标签转换成单标签) label_count = 6 # (attributes_list, label_list,train_data) = preprocessor.read_data('.\\scene\\scene-train-tiny.arff', # label_count, arff.DENSE) # train_data = preprocessor.translate_label_multiclass(train_data, label_count) # tree = treepredict.buildtree(train_data, attributes_list, label_list) # treepredict.printtree(tree) # # #测试决策树文件读写 # tree_list = preprocessor.tree2array(tree) # preprocessor.store_tree('.\\my_tree', tree_list) #从文件中加载决策树 loaded_tree_list = preprocessor.load_tree('.\\my_tree') loaded_tree = preprocessor.list2tree(loaded_tree_list) #读取测试集,验证效果 (test_attributes_list, test_label_value_list, test_data) = preprocessor.read_data('.\\scene\\scene-test-tiny.arff', label_count, arff.DENSE) results = [] for row in test_data: result = treepredict.classify(row, loaded_tree, test_label_value_list) print('predict result:', result, 'test case', row) post_result = treepredict.post_classify(result) results.append(preprocessor.label_decoding(post_result)) hammingloss = postprocessor.hamming_loss(test_data, results) print('hamming loss:', hammingloss)