예제 #1
0
import data_preprocess, embeddings
import nn_models, multi_models, awekar_models
from sklearn.model_selection import ShuffleSplit
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score
import random
import math
import matplotlib.pyplot as plt

print("Loading Data ...")

data_source = 'data/twitter_data.pkl'

print("preprocessing Data ...")

data, classification = data_preprocess.pickleData(data_source)  #bi-class data
mult_data, mult_class = data_preprocess.pickleData_multi(
    data_source)  #multi-class data
#bully_data, bully_class = data_preprocess.pickleData_multi(data_source)
del mult_data

ostext = []
oslabel = []
oslabelmult = []
for l in range(len(classification)):
    if classification[l] != 0:
        ostext.append(data[l])
        oslabel.append(classification[l])
        oslabelmult.append(mult_class[l])

data = data + ostext
classification = classification + oslabel
mult_class = mult_class + oslabelmult
from sklearn.model_selection import ShuffleSplit
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score
import math
import matplotlib.pyplot as plt
import random

#files to write results in
gru_res = 'results/multiclass/gru.txt'
lstm_res = 'results/multiclass/lstm.txt'
blstm_res = 'results/multiclass/blstm.txt'
all_res = 'results/multiclass/all.txt'

print("Loading Data ...")
data_source = 'data/twitter_data.pkl'
print("preprocessing Data ...")
data, classification = data_preprocess.pickleData_multi(data_source)

##random oversampling to balance class bias

ostext = []
oslabel = []
for l in range(len(classification)):
    if classification[l] != 0:
        ostext.append(data[l])
        oslabel.append(classification[l])
data = data + ostext
classification = classification + oslabel

oversampled = list(zip(data, classification))
random.shuffle(oversampled)
data, classification = zip(*oversampled)