train_set = []

for i in range(len(data_train)):
    data1 = data_train.tweet[i]
    data2 = data_train.label[i]
    train_tup = (data1, data2)
    train_set.append(train_tup)

tweet_train_set = [tweet for tweet,label in train_set]
label_train_set = [label for tweet,label in train_set]


## inisialisasi LSF data train ##
train_lsf = []

lsf_train = LSF.LSF(data_train)

for i in range(len(lsf_train)):
    lsf1 = lsf_train[i]
    lsf2 = lsf_train[i]
    lsf_train_tup = (lsf1,lsf2)
    train_lsf.append(lsf_train_tup)

train_lsf = np.array(train_lsf)

#%%
## inisialisasi data test ##
test_set = []

for i in range(len(data_testing)):
    data3 = data_testing.tweet[i]
# -*- coding: utf-8 -*-
"""
Created on Tue Apr 24 23:33:53 2018

@author: Latifah
"""

import pandas as pd
import LSF

dataset = pd.read_csv("testing_prepros(01).csv", encoding='utf-8')
#%%
score = pd.Series(LSF.LSF(dataset))

#%%
label = []
for i in range(score.shape[0]):
    if (score.iloc[i] >= 1):
        label.append(int(1))
    else:
        label.append(int(0))

#%%
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix

accuracy = accuracy_score(dataset.label, label) * 100
precision = precision_score(dataset.label, label) * 100
recall = recall_score(dataset.label, label) * 100
confusion_matrix = confusion_matrix(dataset.label, label)