import data_preprocess, embeddings import nn_models, multi_models, awekar_models from sklearn.model_selection import ShuffleSplit from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score import random import math import matplotlib.pyplot as plt print("Loading Data ...") data_source = 'data/twitter_data.pkl' print("preprocessing Data ...") data, classification = data_preprocess.pickleData(data_source) #bi-class data mult_data, mult_class = data_preprocess.pickleData_multi( data_source) #multi-class data #bully_data, bully_class = data_preprocess.pickleData_multi(data_source) del mult_data ostext = [] oslabel = [] oslabelmult = [] for l in range(len(classification)): if classification[l] != 0: ostext.append(data[l]) oslabel.append(classification[l]) oslabelmult.append(mult_class[l]) data = data + ostext classification = classification + oslabel mult_class = mult_class + oslabelmult
from sklearn.model_selection import ShuffleSplit from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score import math import matplotlib.pyplot as plt import random #files to write results in gru_res = 'results/multiclass/gru.txt' lstm_res = 'results/multiclass/lstm.txt' blstm_res = 'results/multiclass/blstm.txt' all_res = 'results/multiclass/all.txt' print("Loading Data ...") data_source = 'data/twitter_data.pkl' print("preprocessing Data ...") data, classification = data_preprocess.pickleData_multi(data_source) ##random oversampling to balance class bias ostext = [] oslabel = [] for l in range(len(classification)): if classification[l] != 0: ostext.append(data[l]) oslabel.append(classification[l]) data = data + ostext classification = classification + oslabel oversampled = list(zip(data, classification)) random.shuffle(oversampled) data, classification = zip(*oversampled)