예제 #1
0
def train_test():
    X_features, y = create_training_vectors()
    n_folds = 10
    kf = StratifiedKFold(n_splits=n_folds, shuffle=True)
    kf.get_n_splits(X_features, y)
    total_acc, total_pre, total_recall, total_macro_f1, total_micro_f1 = [], [], [], [], []
    for train_index, test_index in kf.split(X_features, y):
        print("TRAIN:", train_index, "TEST:", test_index)
        X_train, X_test = X_features[train_index], X_features[test_index]
        y_train, y_test = y[train_index], y[test_index]
        acc, pre, recall, macro_f1, micro_f1 = classify(train_X=X_train,
                                                        train_y=y_train,
                                                        test_X=X_test,
                                                        test_y=y_test)
        total_acc.append(acc)
        total_pre.append(pre)
        total_recall.append(recall)
        total_macro_f1.append(macro_f1)
        total_micro_f1.append(micro_f1)
        del X_train, X_test, y_train, y_test
    print("======================")
    print("avg acc:", np.mean(total_acc))
    print("avg pre:", np.mean(total_pre))
    print("avg recall:", np.mean(total_recall))
    print("avg macro_f1:", np.mean(total_macro_f1))
    print("avg micro_f1:", np.mean(total_micro_f1))
    print("======================")
예제 #2
0
def train_test():
    contents, y = create_training_content()
    tfidf = MYTFIDF()
    tfidf.tfidf_model_path = "/media/iiip/Elements/数据集/user_profiling/weibo/cache/tfidf_model.m"
    X_features = tfidf.get_tfidf_vector(corpus=contents)
    print(np.shape(X_features))
    del contents
    n_folds = 10
    kf = StratifiedKFold(n_splits=n_folds, shuffle=True)
    kf.get_n_splits(X_features, y)
    total_acc, total_pre, total_recall, total_macro_f1, total_micro_f1 = [], [], [], [], []
    for train_index, test_index in kf.split(X_features, y):
        print("TRAIN:", train_index, "TEST:", test_index)
        X_train, X_test = X_features[train_index], X_features[test_index]
        y_train, y_test = y[train_index], y[test_index]
        acc, pre, recall, macro_f1, micro_f1 = classify(train_X=X_train,
                                                        train_y=y_train,
                                                        test_X=X_test,
                                                        test_y=y_test)
        total_acc.append(acc)
        total_pre.append(pre)
        total_recall.append(recall)
        total_macro_f1.append(macro_f1)
        total_micro_f1.append(micro_f1)
        del X_train, X_test, y_train, y_test
    print("======================")
    print("avg acc:", np.mean(total_acc))
    print("avg pre:", np.mean(total_pre))
    print("avg recall:", np.mean(total_recall))
    print("avg macro_f1:", np.mean(total_macro_f1))
    print("avg micro_f1:", np.mean(total_micro_f1))
    print("======================")
예제 #3
0
def train_chi2_model():
    contents, y = create_training_content()
    contents = np.array(contents)
    chi2 = MYCHI2()
    chi2.cv_model_path = "/media/iiip/Elements/数据集/user_profiling/weibo/cache/cv_model.m"
    chi2.chi2_model_path = "/media/iiip/Elements/数据集/user_profiling/weibo/cache/chi2_model.m"
    n_folds = 10
    kf = StratifiedKFold(n_splits=n_folds, shuffle=True)
    kf.get_n_splits(contents, y)
    total_acc, total_pre, total_recall, total_macro_f1, total_micro_f1 = [], [], [], [], []
    for train_index, test_index in kf.split(contents, y):
        print("TRAIN:", train_index, "TEST:", test_index)
        X_train, X_test = contents[train_index], contents[test_index]
        y_train, y_test = y[train_index], y[test_index]
        chi2.train_chi2(X_train, y_train)
        X_chi2_train = chi2.get_chi2_vector(X_train)
        X_chi2_test = chi2.get_chi2_vector(X_test)
        del X_train, X_test
        acc, pre, recall, macro_f1, micro_f1 = classify(train_X=X_chi2_train, train_y=y_train, test_X=X_chi2_test, test_y=y_test)
        total_acc.append(acc)
        total_pre.append(pre)
        total_recall.append(recall)
        total_macro_f1.append(macro_f1)
        total_micro_f1.append(micro_f1)
        del X_chi2_train, X_chi2_test, y_train, y_test
    print("======================")
    print("avg acc:", np.mean(total_acc))
    print("avg pre:", np.mean(total_pre))
    print("avg recall:", np.mean(total_recall))
    print("avg macro_f1:", np.mean(total_macro_f1))
    print("avg micro_f1:", np.mean(total_micro_f1))
    print("======================")