def __init__(self, category_list_path, training_set_path): category_list = system.get_content_list(category_list_path) training_set_material = system.get_content_list(training_set_path) self.category_list = category_list self.training_set_material = training_set_material
# -*- coding:utf-8 -*- from lib.baseClass import NB from py_utility import system if __name__ == "__main__": model_path = "model/nb_model.model" verification_samples_path = "data/verification_set.txt" verification_samples = system.get_content_list(verification_samples_path) n_verification_samples = len(verification_samples) global_counter = 0 cate_counter = dict() precision_counter = dict() # {"互联网":[33, 87], "体育":[14, 53] ..... } for ele in verification_samples: ele_list = ele.split("\t") text = ele_list[0] label = ele_list[1] model = system.json_loads(model_path) cl = NB(model["prior_table"], model["posterior_table"]) result = cl.predict(text) print(result) if label not in cate_counter.keys(): cate_counter[label] = [0, 0] if result not in precision_counter.keys():
# -*- coding:utf-8 -*- from lib.baseClass import NB from py_utility import system if __name__ == "__main__": model_path = "model/nb_model.model" verification_samples_path = "data/verification_set.txt" verification_samples = system.get_content_list(verification_samples_path) n_verification_samples = len(verification_samples) global_counter = 0 cate_counter = dict() precision_counter = dict() # {"互联网":[33, 87], "体育":[14, 53] ..... } for ele in verification_samples: ele_list = ele.split("\t") text = ele_list[0] label = ele_list[1] model = system.json_loads(model_path) cl = NB(model["prior_table"], model["posterior_table"]) result = cl.predict(text) print(result) if label not in cate_counter.keys(): cate_counter[label] = [0, 0] if result not in precision_counter.keys(): precision_counter[result] = [0, 0]
__author__ = 'roy' from py_utility.dataset.preprocess import dataset_split from py_utility import system if __name__ == "__main__": dataset_path = r"data/open_test/no_meiti/dataset.txt" training_path = r"data/training_set.txt" verification_path = r"data/verification_set.txt" dataset = system.get_content_list(dataset_path) ratio = 0.8 training, verification = dataset_split(dataset, ratio) sep = "\n" training_str = system.to_string(training, sep) verification_str = system.to_string(verification, sep) system.write_content(training_path, training_str) system.write_content(verification_path, verification_str)