def precision_micro_score(y_true, y_pred, labels): return precision_score(y_true, y_pred, labels, average="micro") def recall_macro_score(y_true, y_pred, labels): return recall_score(y_true, y_pred, labels, average="macro") def recall_micro_score(y_true, y_pred, labels): return recall_score(y_true, y_pred, labels, average="micro") if __name__ == "__main__": # Load trining data training_encoded_data_path = "./Dataset/encoded_training_data_4362.json" X_train, y_train = FeatureTransformer.load_encoded_data(training_encoded_data_path) # Load test data test_data_path = "./Dataset/valid_data_1091.json" test_data = utils.load_data(test_data_path) df = pd.DataFrame(test_data) X_test = df.content.values y_test = df.label.values # Transform test data ft = FeatureTransformer() X_test = ft.fit_transform(X_test, y_train, vocab_path=VOCAB_PATH) # Define models mnb = MultinomialNB(alpha=0.004)
import os, time import numpy as np import pandas as pd import matplotlib.pyplot as plt import utils from preprocessing import FeatureTransformer if __name__ == "__main__": # Load data to explore training_file_path = "./Dataset/New_Data_v2/encoded_training_data_6751.json" # test_file_path = "./Dataset/data_sent.json" # training_data = utils.load_data(training_file_path) training_data, labels = FeatureTransformer.load_encoded_data( training_file_path) # training_size = len(training_data) # test_data = utils.load_data(test_file_path) # test_size = len(test_data) # print("Training data size : ", training_size) # print("Test data size : ", test_size) print("========================================") # training_df = utils.convert_original_data_to_df(training_data) # print(training_df.info()) print("\nStatistic") # stats_by_label = training_df.label.value_counts().sort_index().reset_index() stats_by_label = pd.DataFrame(