def get_classification(feature, incr=False): """ 获得分类器 :param feature: :param incr :return: """ train_datas, class_label, _ = feature.get_key_words() train = train_datas # 构建适合 bayes 分类的数据集 if not sp.issparse(train_datas): train = feature.cal_weight_improve(train_datas, class_label) if incr: bayes = IncrBayes() else: bayes = Bayes() clf = Classification(bayes=bayes, subjective=feature.subjective) clf.get_classificator(train, class_label) if incr: incr_train_datas = Load.load_incr_datas() incr_train, incr_class_label, _ = feature.get_key_words(incr_train_datas) # 构建适合 bayes 分类的增量集 if not sp.issparse(incr_train): incr_train = feature.cal_weight_improve(incr_train, incr_class_label) clf.get_incr_classificator(incr_train, incr_class_label, train, class_label, method="five") return clf
def get_classification(feature, incr=False): """ 获得分类器 :param feature: :param incr :return: """ train_datas, class_label, _ = feature.get_key_words() train = train_datas # 构建适合 bayes 分类的数据集 if not sp.issparse(train_datas): train = feature.cal_weight_improve(train_datas, class_label) if incr: bayes = IncrBayes() else: bayes = Bayes() clf = Classification(bayes=bayes, subjective=feature.subjective) clf.get_classificator(train, class_label) if incr: incr_train_datas = Load.load_incr_datas() incr_train, incr_class_label, _ = feature.get_key_words( incr_train_datas) # 构建适合 bayes 分类的增量集 if not sp.issparse(incr_train): incr_train = feature.cal_weight_improve(incr_train, incr_class_label) clf.get_incr_classificator(incr_train, incr_class_label, train, class_label, method="five") return clf
if not sp.issparse(train_datas): train = feature.cal_weight_improve(train_datas, class_label) test = Load.load_test_balance() test_datas, test_label, _ = feature.get_key_words(test) test = test_datas # 构建适合 bayes 分类的数据集 if not sp.issparse(test_datas): test = feature.cal_weight_improve(test_datas, test_label) crossvalidate = False # 若不交叉验证 记得修改 load_sample.py 中加载 train 的比例 if crossvalidate: out = os.path.join(TEXT_OUT, "best_train_test_index/test_index.txt") if not FileUtil.isexist(out) or FileUtil.isempty(out): clf0 = Classification() clf0.cross_validation(train, class_label, score="recall") test_index = np.loadtxt(out, dtype=int) test = train[test_index] test_label = np.asanyarray(class_label)[test_index].tolist() method_options = ("second", "four", "five") method_options_0 = ("B", "C", "D") linestyle = (':', '--', '-') plot.get_instance() for i in range(len(method_options)): bayes = IncrBayes() clf = Classification(bayes=bayes) clf.get_classificator(train, class_label, iscrossvalidate=crossvalidate, isbalance=False, minority_target=EMOTION_CLASS.keys()) # clf.get_classificator(train, class_label, isbalance=True, minority_target=["anger", "fear", "surprise"])