def get_classification(feature, incr=False):
    """
    获得分类器
    :param feature:
    :param incr
    :return:
    """
    train_datas, class_label, _ = feature.get_key_words()

    train = train_datas
    # 构建适合 bayes 分类的数据集
    if not sp.issparse(train_datas):
        train = feature.cal_weight_improve(train_datas, class_label)

    if incr:
        bayes = IncrBayes()
    else:
        bayes = Bayes()
    clf = Classification(bayes=bayes, subjective=feature.subjective)
    clf.get_classificator(train, class_label)
    if incr:
        incr_train_datas = Load.load_incr_datas()
        incr_train, incr_class_label, _ = feature.get_key_words(incr_train_datas)
        # 构建适合 bayes 分类的增量集
        if not sp.issparse(incr_train):
            incr_train = feature.cal_weight_improve(incr_train, incr_class_label)

        clf.get_incr_classificator(incr_train, incr_class_label, train, class_label, method="five")
    return clf
예제 #2
0
def get_classification(feature, incr=False):
    """
    获得分类器
    :param feature:
    :param incr
    :return:
    """
    train_datas, class_label, _ = feature.get_key_words()

    train = train_datas
    # 构建适合 bayes 分类的数据集
    if not sp.issparse(train_datas):
        train = feature.cal_weight_improve(train_datas, class_label)

    if incr:
        bayes = IncrBayes()
    else:
        bayes = Bayes()
    clf = Classification(bayes=bayes, subjective=feature.subjective)
    clf.get_classificator(train, class_label)
    if incr:
        incr_train_datas = Load.load_incr_datas()
        incr_train, incr_class_label, _ = feature.get_key_words(
            incr_train_datas)
        # 构建适合 bayes 分类的增量集
        if not sp.issparse(incr_train):
            incr_train = feature.cal_weight_improve(incr_train,
                                                    incr_class_label)

        clf.get_incr_classificator(incr_train,
                                   incr_class_label,
                                   train,
                                   class_label,
                                   method="five")
    return clf
예제 #3
0
            pred = clf.predict(test)
            pred_unknow = clf.predict_unknow(test)

            print "origin precision:", clf.metrics_precision(test_label, pred_unknow)
            print "origin recall:", clf.metrics_recall(test_label, pred_unknow)
            print "origin f1:", clf.metrics_f1(test_label, pred_unknow)
            print "origin accuracy:", clf.metrics_accuracy(test_label, pred_unknow)
            print "origin zero_one_loss:", clf.metrics_zero_one_loss(test_label, pred_unknow)
            test_proba = clf.predict_max_proba(test)
            print "origin my_zero_one_loss:", clf.metrics_my_zero_one_loss(test_proba)
            print
            clf.metrics_correct(test_label, pred_unknow)
#            plot.plot_roc(test_label, clf.predict_proba(test), classes=clf.bayes.classes_.tolist(), text='origin')

#        bayes.update(c_pred[0], test_datas[0].get("sentence"))
        incr_train_datas = Load.load_incr_datas()
        incr_train, incr_class_label, _ = feature.get_key_words(incr_train_datas)
        # 构建适合 bayes 分类的增量集
        fit_incr_train = incr_train
        if not sp.issparse(incr_train):
            fit_incr_train = feature.cal_weight_improve(incr_train, incr_class_label)

        clf.get_incr_classificator(fit_incr_train, incr_class_label, train, class_label, method=method_options[i])
        pred_unknow = clf.predict_unknow(test)

        print "incr precision:", clf.metrics_precision(test_label, pred_unknow)
        print "incr recall:", clf.metrics_recall(test_label, pred_unknow)
        print "incr f1:", clf.metrics_f1(test_label, pred_unknow)
        print "incr accuracy:", clf.metrics_accuracy(test_label, pred_unknow)
        print "incr zero_one_loss:", clf.metrics_zero_one_loss(test_label, pred_unknow)
        test_proba = clf.predict_max_proba(test)