예제 #1
0
def precision_micro_score(y_true, y_pred, labels):
    return precision_score(y_true, y_pred, labels, average="micro")


def recall_macro_score(y_true, y_pred, labels):
    return recall_score(y_true, y_pred, labels, average="macro")


def recall_micro_score(y_true, y_pred, labels):
    return recall_score(y_true, y_pred, labels, average="micro")


if __name__ == "__main__":
    # Load trining data
    training_encoded_data_path = "./Dataset/encoded_training_data_4362.json"
    X_train, y_train = FeatureTransformer.load_encoded_data(training_encoded_data_path)

    # Load test data
    test_data_path = "./Dataset/valid_data_1091.json"
    test_data = utils.load_data(test_data_path)
    df = pd.DataFrame(test_data)
    X_test = df.content.values
    y_test = df.label.values

    # Transform test data
    ft = FeatureTransformer()
    X_test = ft.fit_transform(X_test, y_train, vocab_path=VOCAB_PATH)

    # Define models
    mnb = MultinomialNB(alpha=0.004)
예제 #2
0
import os, time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import utils
from preprocessing import FeatureTransformer

if __name__ == "__main__":
    # Load data to explore
    training_file_path = "./Dataset/New_Data_v2/encoded_training_data_6751.json"
    # test_file_path = "./Dataset/data_sent.json"

    # training_data = utils.load_data(training_file_path)
    training_data, labels = FeatureTransformer.load_encoded_data(
        training_file_path)
    # training_size = len(training_data)
    # test_data = utils.load_data(test_file_path)
    # test_size = len(test_data)

    # print("Training data size : ", training_size)
    # print("Test data size : ", test_size)

    print("========================================")

    # training_df = utils.convert_original_data_to_df(training_data)

    # print(training_df.info())

    print("\nStatistic")
    # stats_by_label = training_df.label.value_counts().sort_index().reset_index()
    stats_by_label = pd.DataFrame(