Python read_data 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: preprocessor

메소드/함수: read_data

hotexamples.com에서의 예제들: 5

Python read_data - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 preprocessor.read_data에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: test.py 프로젝트: xiashuxia/RandomForest

import treepredict
import preprocessor
import postprocessor
import arff
import copy

label_count = 6
train_data_file = '.\\scene\\scene-train-tiny.arff'
test_data_file = '.\\scene\\scene-test-tiny.arff'
method = input('1 单标签；2 多个二类分类')
if method == '1':
    #读取训练集，建树(多标签转换成单标签)
    (attributes_list, label_value_list,train_data) = preprocessor.read_data(train_data_file, label_count, arff.DENSE)
    train_data = preprocessor.translate_label_multiclass(train_data, label_count)
    tree = treepredict.buildtree(train_data, attributes_list, label_value_list)
    treepredict.printtree(tree)

    #读取测试集，验证效果
    (test_attributes_list, test_label_value_list, test_data) = preprocessor.read_data(test_data_file, label_count, arff.DENSE)
    test_data_copy = copy.deepcopy(test_data)
    predicted_labels_list = []
    for row in test_data:
        result = treepredict.classify(row, tree, test_attributes_list)
        post_result = treepredict.post_classify(result)
        decoded_result = preprocessor.label_decoding(post_result)
        predicted_labels_list.append(decoded_result)

    hamming_loss = postprocessor.hamming_loss(test_data_copy, predicted_labels_list)
    print('hamming loss of merging labels:', hamming_loss)
else :
    #当做多个二类分类问题处理

예제 #2

파일 보기

            index)
        decision_tree_in_list = preprocessor.load_tree(decision_tree_filename)
        decision_tree = preprocessor.list2tree(decision_tree_in_list)
        with open(attributes_index_filename,
                  mode='r') as attributes_index_file:
            attributes_index = json.loads(attributes_index_file.read())
        random_trees.append({
            TREE: decision_tree,
            ATTRIBUTES_INDEX: attributes_index
        })

    return random_trees


(origin_attribute_list, label_list,
 train_data) = preprocessor.read_data(train_data_file, label_count, arff.DENSE)
attribute_count = len(origin_attribute_list)
attribute_count_per_tree = math.floor(math.sqrt(attribute_count) * 2)
tree_count_per_sample_copy = math.ceil(attribute_count /
                                       attribute_count_per_tree)

train_data = preprocessor.translate_label_multiclass(train_data,
                                                     label_count)  #转换成单标签数据集

random_trees = train_random_trees(train_data, origin_attribute_list,
                                  label_list, sample_copy_count,
                                  attribute_count_per_tree)

forest_count = len(random_trees)
store_random_trees(random_trees, '.\\my_forest\\my_random_forest_')
loaded_random_trees = load_random_trees('.\\my_forest\\my_random_forest_', 27)

예제 #3

파일 보기

    def __init__(self, training_path):
        data = read_data(train_csv_path)
        self.training_data = data[:30000]
        self.validation_data = data[30000:]

        self.training_path = training_path

예제 #4

파일 보기

 def __init__(self, training_path):
     data = read_data(train_csv_path)
     self.training_data = data[:30000]
     self.validation_data = data[33000:]
     self.training_path = training_path
     self.batch_data = self.randomize_batch()

예제 #5

파일 보기

#读取训练集，建树(多标签转换成单标签)
label_count = 6
# (attributes_list, label_list,train_data) = preprocessor.read_data('.\\scene\\scene-train-tiny.arff',
#                                                                   label_count, arff.DENSE)
# train_data = preprocessor.translate_label_multiclass(train_data, label_count)
# tree = treepredict.buildtree(train_data, attributes_list, label_list)
# treepredict.printtree(tree)
#
# #测试决策树文件读写
# tree_list = preprocessor.tree2array(tree)
# preprocessor.store_tree('.\\my_tree', tree_list)

#从文件中加载决策树
loaded_tree_list = preprocessor.load_tree('.\\my_tree')
loaded_tree = preprocessor.list2tree(loaded_tree_list)

#读取测试集，验证效果
(test_attributes_list, test_label_value_list,
 test_data) = preprocessor.read_data('.\\scene\\scene-test-tiny.arff',
                                     label_count, arff.DENSE)

results = []
for row in test_data:
    result = treepredict.classify(row, loaded_tree, test_label_value_list)
    print('predict result:', result, 'test case', row)
    post_result = treepredict.post_classify(result)
    results.append(preprocessor.label_decoding(post_result))
hammingloss = postprocessor.hamming_loss(test_data, results)
print('hamming loss:', hammingloss)