Esempio n. 1
0
from nltk.classify.scikitlearn import SklearnClassifier
from sklearn.naive_bayes import MultinomialNB,BernoulliNB
from sklearn.linear_model import LogisticRegression,SGDClassifier
from sklearn.svm import SVC, LinearSVC, NuSVC
import nltk
import random
import nltk.corpus
import sklearn
import numpy
import csv
from sklearn import metrics
import sys
from nltk.stem.snowball import SnowballStemmer

nomeBase = "data/train.csv"
base = readBase(nomeBase)

tamBase = len(base)
print("Tamanho da base: "+str(tamBase))

#tokenização
i=0
documents = []
while (i<tamBase):
    conteudoLista = (base[i][0].split(),base[i][1])
    documents.append(conteudoLista)
    i += 1

#Misturando a base
random.shuffle(documents)
Esempio n. 2
0
from tensorflow import keras
from Util import readBase, trata, arrays

import numpy as np

# readBase('colecao_dourada_2_class_unbalanced.csv')

print(tf.__version__)

imdb = keras.datasets.imdb

# (train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)

(train_data, train_labels), (test_data, test_labels) = trata(
    readBase('colecao_dourada_2_class_unbalanced.csv'), 0.7)

# print("Training entries: {}, labels: {}".format(len(train_data), len(train_labels)))

len(train_data[0]), len(train_data[1])

# A dictionary mapping words to an integer index
'''
word_index = imdb.get_word_index()

# The first indices are reserved
word_index = {k:(v+3) for k,v in word_index.items()}
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2  # unknown
word_index["<UNUSED>"] = 3
Esempio n. 3
0
def executar(experimento, nome_Base, acento):
    nomeBase = nome_Base
    path = experimento + nomeBase
    print('executando:\n' + path)
    print('Sem acento:\n' + ('Sim' if (acento) else 'Não'))

    base = readBase(nomeBase)

    tamBase = len(base)
    i = 0
    documents = []
    #print base[0][0].split()
    tknzr = nltk.tokenize.TweetTokenizer()

    while (i < tamBase):
        if (acento):
            w = remocaoacento(tknzr.tokenize(base[i][0]))
        else:
            w = tknzr.tokenize(base[i][0])
        w = remocaopontos(w)
        conteudoLista = (w, base[i][1])
        documents.append(conteudoLista)
        i += 1

    ################################ Pre Processamento
    stopwords = nltk.corpus.stopwords.words('portuguese')

    stemmer = nltk.stem.RSLPStemmer()

    #h=0
    #j=len(documents)
    #while (h<j):
    #    g=len(documents[h][0])
    #    f=0
    #    while(f<g):
    #        stemmer.stem(documents[h][0][f])
    #        f+=1
    #    h += 1

    ################################

    random.shuffle(documents)

    all_words = []

    k = 0
    l = len(documents)
    while (k < l):
        m = len(documents[k][0])
        n = 0
        while (n < m):
            all_words.append(documents[k][0][n])
            n += 1
        k += 1
    print(str(len(all_words)))

    all_words = [w.lower() for w in all_words if w not in stopwords]

    all_words = [stemmer.stem(w) for w in all_words]
    # all_words = remocaopontos(all_words)

    #all_words = nltk.FreqDist(all_words) #calcula frequencia de palavras, definir o limite de palavras
    #all_words = nltk.LaplaceProbDist(nltk.FreqDist(all_words))
    #all_words = nltk.SimpleGoodTuringProbDist(nltk.FreqDist(all_words))
    #all_words = nltk.LidstoneProbDist(nltk.FreqDist(all_words), 0.1)
    #all_words = nltk.WittenBellProbDist(nltk.FreqDist(all_words))
    #nltk.WittenBellProbDist() procurar como mudar o ngram
    #all_words = nltk.MLEProbDist(nltk.FreqDist(all_words))
    #all_words = nltk.SimpleGoodTuringProbDist(nltk.FreqDist(w.lower() for w in all_words if w not in stopwords))

    #all_words = nltk.LidstoneProbDist(nltk.FreqDist(w.lower() for w in all_words if w not in stopwords), 0.1)

    ##all_words = nltk.FreqDist(nltk.FreqDist(w.lower() for w in all_words if w not in stopwords))
    all_words = nltk.FreqDist(nltk.FreqDist(all_words))

    #word_features = list(all_words.samples()) #se usando FreqDistlista com palavras que aparecem mais de 3000
    '''aqui que modifiquei
    word_features = list(all_words.keys())

    def find_features(document):
        words = set(document)
        features = {}
        for w in word_features:
            features[w] = (w in words)

        return features
    '''

    print(str(len(all_words.keys())))

    def wordbig(word_feature):
        words = []
        i = 0
        l = len(word_feature) - 1
        while (i < l):
            words.append(tuple([word_feature[i], word_feature[i + 1]]))
            i += 1
        return words

    def removerpalavras(todas_palavras, document):
        #remover as palavras que não estãoem todas as palavras
        linha = []
        for w in document:
            if (w in todas_palavras):
                linha.append(w)
        return linha

    def wordFeature(documents):
        #cria um dicionario de dados
        dicionario = []
        for w in documents:
            for q in w[0]:
                if (not q in dicionario):
                    dicionario.append(q)
        return dicionario

    documents = [[removerpalavras(all_words.keys(), w[0]), w[1]]
                 for w in documents]
    documents = [[wordbig(w[0]), w[1]] for w in documents]
    word_features = wordFeature(documents)

    def find_features(document):
        features = {}
        # i = 0
        # l = len(word_features)
        # while i<l:
        #     features[str(i)] = (w in document)
        #     i+=1
        for w in word_features:
            features[w] = (w in document)
        return features

    #aquii
    featuresets = [(find_features(rev), category)
                   for (rev, category) in documents]

    for w in featuresets:
        print(w)

    exit()

    kfold = 4

    baseInteira = featuresets

    tamT = len(featuresets)
    divisor = tamT // kfold

    ###### ajustar divisao
    baseDividida1 = featuresets[0:divisor]
    baseDividida2 = featuresets[divisor:(divisor * 2)]
    baseDividida3 = featuresets[(divisor * 2):(divisor * 3)]
    baseDividida4 = featuresets[(divisor * 3):tamT]

    #tamT = len(featuresets)
    #umQuarto = tamBase/4

    #training_set = featuresets[umQuarto:]
    #testing_set = featuresets[:umQuarto]

    #training_set = featuresets[100:]
    #testing_set = featuresets[0:100]

    ########################## 1 rodada
    #print "## RODADA 1 ##"

    training_set = baseDividida2 + baseDividida3 + baseDividida4
    testing_set = baseDividida1

    MNB_classifier = SklearnClassifier(MultinomialNB())
    MNB_classifier.train(training_set)
    testclas = MNB_classifier.classify_many([fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    MNBmc1 = sklearn.metrics.confusion_matrix(testgold, testclas)
    MNBa1 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    MNBpp1 = sklearn.metrics.precision_score(testgold, testclas,
                                             average=None) * 100
    precisaoMNB1 = sklearn.metrics.precision_score(testgold,
                                                   testclas,
                                                   average=None)
    g = 0
    somaPMNB1 = 0
    while (g < len(precisaoMNB1)):
        somaPMNB1 = somaPMNB1 + precisaoMNB1[g]
        g = g + 1
    MNBpt1 = (somaPMNB1 / len(precisaoMNB1)) * 100
    MNBrp1 = (sklearn.metrics.recall_score(testgold, testclas,
                                           average=None)) * 100
    recallMNB1 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRMNB1 = 0
    while (g < len(recallMNB1)):
        somaRMNB1 = somaRMNB1 + recallMNB1[g]
        g = g + 1
    MNBrt1 = (somaRMNB1 / len(recallMNB1)) * 100
    MNBfp1 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1MNB1 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFMNB1 = 0
    while (g < len(f1MNB1)):
        somaFMNB1 = somaFMNB1 + f1MNB1[g]
        g = g + 1
    MNBft1 = (somaFMNB1 / len(f1MNB1)) * 100
    '''
    BernoulliNB_classifier = SklearnClassifier(BernoulliNB())
    BernoulliNB_classifier.train(training_set)
    BernoulliNB_classifierRodada2 = nltk.classify.accuracy(BernoulliNB_classifier, testing_set)
    print("BernoulliNB_classifier accuracy percent:", BernoulliNB_classifierRodada2*100)
    '''
    LogisticRegression_classifier = SklearnClassifier(LogisticRegression())
    LogisticRegression_classifier.train(training_set)
    testclas = LogisticRegression_classifier.classify_many(
        [fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    Rmc1 = sklearn.metrics.confusion_matrix(testgold, testclas)
    Ra1 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    Rpp1 = sklearn.metrics.precision_score(testgold, testclas,
                                           average=None) * 100
    precisaoR1 = sklearn.metrics.precision_score(testgold,
                                                 testclas,
                                                 average=None)
    g = 0
    somaPR1 = 0
    while (g < len(precisaoR1)):
        somaPR1 = somaPR1 + precisaoR1[g]
        g = g + 1
    Rpt1 = (somaPR1 / len(precisaoR1)) * 100
    Rrp1 = (sklearn.metrics.recall_score(testgold, testclas,
                                         average=None)) * 100
    recallR1 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRR1 = 0
    while (g < len(recallR1)):
        somaRR1 = somaRR1 + recallR1[g]
        g = g + 1
    Rrt1 = (somaRR1 / len(recallR1)) * 100
    Rfp1 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1R1 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFR1 = 0
    while (g < len(f1R1)):
        somaFR1 = somaFR1 + f1R1[g]
        g = g + 1
    Rft1 = (somaFR1 / len(f1R1)) * 100
    '''
    SGDClassifier_classifier = SklearnClassifier(SGDClassifier())
    SGDClassifier_classifier.train(training_set)
    SGDClassifier_classifierRodada2 = nltk.classify.accuracy(SGDClassifier_classifier, testing_set)
    print("SGDClassifier_classifier accuracy percent:", SGDClassifier_classifierRodada2*100)

    SVC_classifier = SklearnClassifier(SVC())
    SVC_classifier.train(training_set)
    SVC_classifierRodada2 = nltk.classify.accuracy(SVC_classifier, testing_set)
    print("SVC_classifier accuracy percent:", SVC_classifierRodada2*100)
    '''
    LinearSVC_classifier = SklearnClassifier(LinearSVC())
    LinearSVC_classifier.train(training_set)
    testclas = LinearSVC_classifier.classify_many(
        [fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    Lmc1 = sklearn.metrics.confusion_matrix(testgold, testclas)
    La1 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    Lpp1 = sklearn.metrics.precision_score(testgold, testclas,
                                           average=None) * 100
    precisaoL1 = sklearn.metrics.precision_score(testgold,
                                                 testclas,
                                                 average=None)
    g = 0
    somaPL1 = 0
    while (g < len(precisaoL1)):
        somaPL1 = somaPL1 + precisaoL1[g]
        g = g + 1
    Lpt1 = (somaPL1 / len(precisaoL1)) * 100
    Lrp1 = (sklearn.metrics.recall_score(testgold, testclas,
                                         average=None)) * 100
    recallL1 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRL1 = 0
    while (g < len(recallL1)):
        somaRL1 = somaRL1 + recallL1[g]
        g = g + 1
    Lrt1 = (somaRL1 / len(recallL1)) * 100
    Lfp1 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1L1 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFL1 = 0
    while (g < len(f1L1)):
        somaFL1 = somaFL1 + f1L1[g]
        g = g + 1
    Lft1 = (somaFL1 / len(f1L1)) * 100

    ######################## Rodada 2
    #print "## RODADA 2 ##"

    training_set = baseDividida1 + baseDividida3 + baseDividida4
    testing_set = baseDividida2

    MNB_classifier = SklearnClassifier(MultinomialNB())
    MNB_classifier.train(training_set)
    testclas = MNB_classifier.classify_many([fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    MNBmc2 = sklearn.metrics.confusion_matrix(testgold, testclas)
    MNBa2 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    MNBpp2 = sklearn.metrics.precision_score(testgold, testclas,
                                             average=None) * 100
    precisaoMNB2 = sklearn.metrics.precision_score(testgold,
                                                   testclas,
                                                   average=None)
    g = 0
    somaPMNB2 = 0
    while (g < len(precisaoMNB2)):
        somaPMNB2 = somaPMNB2 + precisaoMNB2[g]
        g = g + 1
    MNBpt2 = (somaPMNB2 / len(precisaoMNB2)) * 100
    MNBrp2 = (sklearn.metrics.recall_score(testgold, testclas,
                                           average=None)) * 100
    recallMNB2 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRMNB2 = 0
    while (g < len(recallMNB2)):
        somaRMNB2 = somaRMNB2 + recallMNB2[g]
        g = g + 1
    MNBrt2 = (somaRMNB2 / len(recallMNB2)) * 100
    MNBfp2 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1MNB2 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFMNB2 = 0
    while (g < len(f1MNB2)):
        somaFMNB2 = somaFMNB2 + f1MNB2[g]
        g = g + 1
    MNBft2 = (somaFMNB2 / len(f1MNB2)) * 100
    '''
    BernoulliNB_classifier = SklearnClassifier(BernoulliNB())
    BernoulliNB_classifier.train(training_set)
    BernoulliNB_classifierRodada2 = nltk.classify.accuracy(BernoulliNB_classifier, testing_set)
    print("BernoulliNB_classifier accuracy percent:", BernoulliNB_classifierRodada2*100)
    '''
    LogisticRegression_classifier = SklearnClassifier(LogisticRegression())
    LogisticRegression_classifier.train(training_set)
    testclas = LogisticRegression_classifier.classify_many(
        [fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    Rmc2 = sklearn.metrics.confusion_matrix(testgold, testclas)
    Ra2 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    Rpp2 = sklearn.metrics.precision_score(testgold, testclas,
                                           average=None) * 100
    precisaoR2 = sklearn.metrics.precision_score(testgold,
                                                 testclas,
                                                 average=None)
    g = 0
    somaPR2 = 0
    while (g < len(precisaoR2)):
        somaPR2 = somaPR2 + precisaoR2[g]
        g = g + 1
    Rpt2 = (somaPR2 / len(precisaoR2)) * 100
    Rrp2 = (sklearn.metrics.recall_score(testgold, testclas,
                                         average=None)) * 100
    recallR2 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRR2 = 0
    while (g < len(recallR2)):
        somaRR2 = somaRR2 + recallR2[g]
        g = g + 1
    Rrt2 = (somaRR2 / len(recallR2)) * 100
    Rfp2 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1R2 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFR2 = 0
    while (g < len(f1R2)):
        somaFR2 = somaFR2 + f1R2[g]
        g = g + 1
    Rft2 = (somaFR2 / len(f1R2)) * 100
    '''
    SGDClassifier_classifier = SklearnClassifier(SGDClassifier())
    SGDClassifier_classifier.train(training_set)
    SGDClassifier_classifierRodada2 = nltk.classify.accuracy(SGDClassifier_classifier, testing_set)
    print("SGDClassifier_classifier accuracy percent:", SGDClassifier_classifierRodada2*100)

    SVC_classifier = SklearnClassifier(SVC())
    SVC_classifier.train(training_set)
    SVC_classifierRodada2 = nltk.classify.accuracy(SVC_classifier, testing_set)
    print("SVC_classifier accuracy percent:", SVC_classifierRodada2*100)
    '''
    LinearSVC_classifier = SklearnClassifier(LinearSVC())
    LinearSVC_classifier.train(training_set)
    testclas = LinearSVC_classifier.classify_many(
        [fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    Lmc2 = sklearn.metrics.confusion_matrix(testgold, testclas)
    La2 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    Lpp2 = sklearn.metrics.precision_score(testgold, testclas,
                                           average=None) * 100
    precisaoL2 = sklearn.metrics.precision_score(testgold,
                                                 testclas,
                                                 average=None)
    g = 0
    somaPL2 = 0
    while (g < len(precisaoL2)):
        somaPL2 = somaPL2 + precisaoL2[g]
        g = g + 1
    Lpt2 = (somaPL2 / len(precisaoL2)) * 100
    Lrp2 = (sklearn.metrics.recall_score(testgold, testclas,
                                         average=None)) * 100
    recallL2 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRL2 = 0
    while (g < len(recallL2)):
        somaRL2 = somaRL2 + recallL2[g]
        g = g + 1
    Lrt2 = (somaRL2 / len(recallL2)) * 100
    Lfp2 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1L2 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFL2 = 0
    while (g < len(f1L2)):
        somaFL2 = somaFL2 + f1L2[g]
        g = g + 1
    Lft2 = (somaFL2 / len(f1L2)) * 100

    ##################### rodada 3
    #print "## RODADA 3 ##"

    training_set = baseDividida1 + baseDividida2 + baseDividida4
    testing_set = baseDividida3

    MNB_classifier = SklearnClassifier(MultinomialNB())
    MNB_classifier.train(training_set)
    testclas = MNB_classifier.classify_many([fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    MNBmc3 = sklearn.metrics.confusion_matrix(testgold, testclas)
    MNBa3 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    MNBpp3 = sklearn.metrics.precision_score(testgold, testclas,
                                             average=None) * 100
    precisaoMNB3 = sklearn.metrics.precision_score(testgold,
                                                   testclas,
                                                   average=None)
    g = 0
    somaPMNB3 = 0
    while (g < len(precisaoMNB3)):
        somaPMNB3 = somaPMNB3 + precisaoMNB3[g]
        g = g + 1
    MNBpt3 = (somaPMNB3 / len(precisaoMNB3)) * 100
    MNBrp3 = (sklearn.metrics.recall_score(testgold, testclas,
                                           average=None)) * 100
    recallMNB3 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRMNB3 = 0
    while (g < len(recallMNB3)):
        somaRMNB3 = somaRMNB3 + recallMNB3[g]
        g = g + 1
    MNBrt3 = (somaRMNB3 / len(recallMNB3)) * 100
    MNBfp3 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1MNB3 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFMNB3 = 0
    while (g < len(f1MNB3)):
        somaFMNB3 = somaFMNB3 + f1MNB3[g]
        g = g + 1
    MNBft3 = (somaFMNB3 / len(f1MNB3)) * 100
    '''
    BernoulliNB_classifier = SklearnClassifier(BernoulliNB())
    BernoulliNB_classifier.train(training_set)
    BernoulliNB_classifierRodada2 = nltk.classify.accuracy(BernoulliNB_classifier, testing_set)
    print("BernoulliNB_classifier accuracy percent:", BernoulliNB_classifierRodada2*100)
    '''
    LogisticRegression_classifier = SklearnClassifier(LogisticRegression())
    LogisticRegression_classifier.train(training_set)
    testclas = LogisticRegression_classifier.classify_many(
        [fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    Rmc3 = sklearn.metrics.confusion_matrix(testgold, testclas)
    Ra3 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    Rpp3 = sklearn.metrics.precision_score(testgold, testclas,
                                           average=None) * 100
    precisaoR3 = sklearn.metrics.precision_score(testgold,
                                                 testclas,
                                                 average=None)
    g = 0
    somaPR3 = 0
    while (g < len(precisaoR3)):
        somaPR3 = somaPR3 + precisaoR3[g]
        g = g + 1
    Rpt3 = (somaPR3 / len(precisaoR3)) * 100
    Rrp3 = (sklearn.metrics.recall_score(testgold, testclas,
                                         average=None)) * 100
    recallR3 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRR3 = 0
    while (g < len(recallR3)):
        somaRR3 = somaRR3 + recallR3[g]
        g = g + 1
    Rrt3 = (somaRR3 / len(recallR3)) * 100
    Rfp3 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1R3 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFR3 = 0
    while (g < len(f1R3)):
        somaFR3 = somaFR3 + f1R3[g]
        g = g + 1
    Rft3 = (somaFR3 / len(f1R3)) * 100
    '''
    SGDClassifier_classifier = SklearnClassifier(SGDClassifier())
    SGDClassifier_classifier.train(training_set)
    SGDClassifier_classifierRodada2 = nltk.classify.accuracy(SGDClassifier_classifier, testing_set)
    print("SGDClassifier_classifier accuracy percent:", SGDClassifier_classifierRodada2*100)

    SVC_classifier = SklearnClassifier(SVC())
    SVC_classifier.train(training_set)
    SVC_classifierRodada2 = nltk.classify.accuracy(SVC_classifier, testing_set)
    print("SVC_classifier accuracy percent:", SVC_classifierRodada2*100)
    '''
    LinearSVC_classifier = SklearnClassifier(LinearSVC())
    LinearSVC_classifier.train(training_set)
    testclas = LinearSVC_classifier.classify_many(
        [fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    Lmc3 = sklearn.metrics.confusion_matrix(testgold, testclas)
    La3 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    Lpp3 = sklearn.metrics.precision_score(testgold, testclas,
                                           average=None) * 100
    precisaoL3 = sklearn.metrics.precision_score(testgold,
                                                 testclas,
                                                 average=None)
    g = 0
    somaPL3 = 0
    while (g < len(precisaoL3)):
        somaPL3 = somaPL3 + precisaoL3[g]
        g = g + 1
    Lpt3 = (somaPL3 / len(precisaoL3)) * 100
    Lrp3 = (sklearn.metrics.recall_score(testgold, testclas,
                                         average=None)) * 100
    recallL3 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRL3 = 0
    while (g < len(recallL3)):
        somaRL3 = somaRL3 + recallL3[g]
        g = g + 1
    Lrt3 = (somaRL2 / len(recallL2)) * 100
    Lfp3 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1L3 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFL3 = 0
    while (g < len(f1L3)):
        somaFL3 = somaFL3 + f1L3[g]
        g = g + 1
    Lft3 = (somaFL3 / len(f1L3)) * 100

    ############################ rodada 4
    #print "## RODADA 4 ##"

    training_set = baseDividida1 + baseDividida2 + baseDividida3
    testing_set = baseDividida4

    MNB_classifier = SklearnClassifier(MultinomialNB())
    MNB_classifier.train(training_set)
    testclas = MNB_classifier.classify_many([fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    MNBmc4 = sklearn.metrics.confusion_matrix(testgold, testclas)
    MNBa4 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    MNBpp4 = sklearn.metrics.precision_score(testgold, testclas,
                                             average=None) * 100
    precisaoMNB4 = sklearn.metrics.precision_score(testgold,
                                                   testclas,
                                                   average=None)
    g = 0
    somaPMNB4 = 0
    while (g < len(precisaoMNB4)):
        somaPMNB4 = somaPMNB4 + precisaoMNB4[g]
        g = g + 1
    MNBpt4 = (somaPMNB4 / len(precisaoMNB4)) * 100
    MNBrp4 = (sklearn.metrics.recall_score(testgold, testclas,
                                           average=None)) * 100
    recallMNB4 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRMNB4 = 0
    while (g < len(recallMNB4)):
        somaRMNB4 = somaRMNB4 + recallMNB4[g]
        g = g + 1
    MNBrt4 = (somaRMNB4 / len(recallMNB4)) * 100
    MNBfp4 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1MNB4 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFMNB4 = 0
    while (g < len(f1MNB4)):
        somaFMNB4 = somaFMNB4 + f1MNB4[g]
        g = g + 1
    MNBft4 = (somaFMNB4 / len(f1MNB4)) * 100
    '''
    BernoulliNB_classifier = SklearnClassifier(BernoulliNB())
    BernoulliNB_classifier.train(training_set)
    BernoulliNB_classifierRodada2 = nltk.classify.accuracy(BernoulliNB_classifier, testing_set)
    print("BernoulliNB_classifier accuracy percent:", BernoulliNB_classifierRodada2*100)
    '''
    LogisticRegression_classifier = SklearnClassifier(LogisticRegression())
    LogisticRegression_classifier.train(training_set)
    testclas = LogisticRegression_classifier.classify_many(
        [fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    Rmc4 = sklearn.metrics.confusion_matrix(testgold, testclas)
    Ra4 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    Rpp4 = sklearn.metrics.precision_score(testgold, testclas,
                                           average=None) * 100
    precisaoR4 = sklearn.metrics.precision_score(testgold,
                                                 testclas,
                                                 average=None)
    g = 0
    somaPR4 = 0
    while (g < len(precisaoR4)):
        somaPR4 = somaPR4 + precisaoR4[g]
        g = g + 1
    Rpt4 = (somaPR4 / len(precisaoR4)) * 100
    Rrp4 = (sklearn.metrics.recall_score(testgold, testclas,
                                         average=None)) * 100
    recallR4 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRR4 = 0
    while (g < len(recallR4)):
        somaRR4 = somaRR4 + recallR4[g]
        g = g + 1
    Rrt4 = (somaRR4 / len(recallR4)) * 100
    Rfp4 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1R4 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFR4 = 0
    while (g < len(f1R4)):
        somaFR4 = somaFR4 + f1R4[g]
        g = g + 1
    Rft4 = (somaFR4 / len(f1R4)) * 100
    '''
    SGDClassifier_classifier = SklearnClassifier(SGDClassifier())
    SGDClassifier_classifier.train(training_set)
    SGDClassifier_classifierRodada2 = nltk.classify.accuracy(SGDClassifier_classifier, testing_set)
    print("SGDClassifier_classifier accuracy percent:", SGDClassifier_classifierRodada2*100)

    SVC_classifier = SklearnClassifier(SVC())
    SVC_classifier.train(training_set)
    SVC_classifierRodada2 = nltk.classify.accuracy(SVC_classifier, testing_set)
    print("SVC_classifier accuracy percent:", SVC_classifierRodada2*100)
    '''
    LinearSVC_classifier = SklearnClassifier(LinearSVC())
    LinearSVC_classifier.train(training_set)
    testclas = LinearSVC_classifier.classify_many(
        [fs for (fs, l) in testing_set])
    testgold = [l for (fs, l) in testing_set]
    Lmc4 = sklearn.metrics.confusion_matrix(testgold, testclas)
    La4 = (sklearn.metrics.accuracy_score(testgold, testclas)) * 100
    Lpp4 = sklearn.metrics.precision_score(testgold, testclas,
                                           average=None) * 100
    precisaoL4 = sklearn.metrics.precision_score(testgold,
                                                 testclas,
                                                 average=None)
    g = 0
    somaPL4 = 0
    while (g < len(precisaoL4)):
        somaPL4 = somaPL4 + precisaoL4[g]
        g = g + 1
    Lpt4 = (somaPL4 / len(precisaoL4)) * 100
    Lrp4 = (sklearn.metrics.recall_score(testgold, testclas,
                                         average=None)) * 100
    recallL4 = sklearn.metrics.recall_score(testgold, testclas, average=None)
    g = 0
    somaRL4 = 0
    while (g < len(recallL4)):
        somaRL4 = somaRL4 + recallL4[g]
        g = g + 1
    Lrt4 = (somaRL4 / len(recallL4)) * 100
    Lfp4 = (sklearn.metrics.f1_score(testgold, testclas, average=None))
    f1L4 = sklearn.metrics.f1_score(testgold, testclas, average=None)
    g = 0
    somaFL4 = 0
    while (g < len(f1L4)):
        somaFL4 = somaFL4 + f1L4[g]
        g = g + 1
    Lft4 = (somaFL4 / len(f1L4)) * 100

    ################# medias
    #print "## MEDIA ##"

    #MULTINOMINAL
    # print(MNBmc1)
    # print(MNBmc2)
    # print(MNBmc3)
    # print(MNBmc4)
    MNBmc = (MNBmc1 + MNBmc2 + MNBmc3 + MNBmc4) / 4
    MNBa = (MNBa1 + MNBa2 + MNBa3 + MNBa4) / 4
    MNBamax = max([MNBa1, MNBa2, MNBa3, MNBa4])
    MNBamin = min([MNBa1, MNBa2, MNBa3, MNBa4])
    MNBpp = (MNBpp4 + MNBpp4 + MNBpp4 + MNBpp4) / 4
    MNBpt = (MNBpt1 + MNBpt2 + MNBpt3 + MNBpt4) / 4
    MNBpmax = max([MNBpt1, MNBpt2, MNBpt3, MNBpt4])
    MNBpmin = min([MNBpt1, MNBpt2, MNBpt3, MNBpt4])
    MNBrp = (MNBrp1 + MNBrp2 + MNBrp3 + MNBrp4) / 4
    MNBrt = (MNBrt1 + MNBrt2 + MNBrt3 + MNBrt4) / 4
    MNBrmax = max([MNBrt1, MNBrt2, MNBrt3, MNBrt4])
    MNBrmin = min([MNBrt1, MNBrt2, MNBrt3, MNBrt4])
    MNBfp = (MNBfp1 + MNBfp2 + MNBfp3 + MNBfp4) / 4
    MNBft = (MNBft1 + MNBft2 + MNBft3 + MNBft4) / 4
    MNBfmax = max([MNBft1, MNBft2, MNBft3, MNBft4])
    MNBfmin = min([MNBft1, MNBft2, MNBft3, MNBft4])
    '''
    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)
    ax.set_aspect('equal')
    plt.imshow(MNBmc, interpolation='nearest', cmap=plt.cm.ocean)
    plt.colorbar()
    plt.show()
    '''

    #REGRESSAO LINEAR
    Rmc = (Rmc1 + Rmc2 + Rmc3 + Rmc4) / 4
    Ra = (Ra1 + Ra2 + Ra3 + Ra4) / 4
    Ramax = max([Ra1, Ra2, Ra3, Ra4])
    Ramin = min([Ra1, Ra2, Ra3, Ra4])
    Rpp = (Rpp4 + Rpp4 + Rpp4 + Rpp4) / 4  #
    Rpt = (Rpt1 + Rpt2 + Rpt3 + Rpt4) / 4
    Rpmax = max([Rpt1, Rpt2, Rpt3, Rpt4])
    Rpmin = min([Rpt1, Rpt2, Rpt3, Rpt4])
    Rrp = (Rrp1 + Rrp2 + Rrp3 + Rrp4) / 4
    Rrt = (Rrt1 + Rrt2 + Rrt3 + Rrt4) / 4
    Rrmax = max([Rrt1, Rrt2, Rrt3, Rrt4])
    Rrmin = min([Rrt1, Rrt2, Rrt3, Rrt4])
    Rfp = (Rfp1 + Rfp2 + Rfp3 + Rfp4) / 4
    Rft = (Rft1 + Rft2 + Rft3 + Rft4) / 4
    Rfmax = max([Rft1, Rft2, Rft3, Rft4])
    Rfmin = min([Rft1, Rft2, Rft3, Rft4])

    #SVC LINEAR
    Lmc = (Lmc1 + Lmc2 + Lmc3 + Lmc4) / 4
    La = (La1 + La2 + La3 + La4) / 4
    Lamax = max([La1, La2, La3, La4])
    Lamin = min([La1, La2, La3, La4])
    Lpp = (Lpp4 + Lpp4 + Lpp4 + Lpp4) / 4
    Lpt = (Lpt1 + Lpt2 + Lpt3 + Lpt4) / 4
    Lpmax = max([Lpt1, Lpt2, Lpt3, Lpt4])
    Lpmin = min([Lpt1, Lpt2, Lpt3, Lpt4])
    Lrp = (Lrp1 + Lrp2 + Lrp3 + Lrp4) / 4
    Lrt = (Lrt1 + Lrt2 + Lrt3 + Lrt4) / 4
    Lrmax = max([Lrt1, Lrt2, Lrt3, Lrt4])
    Lrmin = min([Lrt1, Lrt2, Lrt3, Lrt4])
    Lfp = (Lfp1 + Lfp2 + Lfp3 + Lfp4) / 4
    Lft = (Lft1 + Lft2 + Lft3 + Lft4) / 4
    Lfmax = max([Lft1, Lft2, Lft3, Lft4])
    Lfmin = min([Lft1, Lft2, Lft3, Lft4])
    '''
    print "SVC Linear"
    print "Matriz de confusão: ", Lmc
    print "Acuracia: ", La
    print "Precisão parcial: ", Lpp
    print "Precisão total: ", Lpt
    print "Recall parcial: ", Lrp
    print "Recall total: ", Lrt
    print "F-medida parcial: ", Lfp
    print "F-medida total: ", Lft
    '''

    with open(path, mode='w') as csv_file:
        #writer = csv.writer(csv_file)
        csv_file.writelines('Algoritmo' + ';' + 'Multinominal Naïve-Bayes' +
                            '\n')
        csv_file.writelines('Iteração' + ';' + 'Acurácia' + ';' +
                            'Precisão parcial' + ';' + 'Precisão total' + ';' +
                            'revocação parcial' + ';' + 'revocação total' +
                            ';' + 'f-medida parcial' + ';' + 'f-medida total' +
                            '\n')
        csv_file.writelines('1;' + str(MNBa1) + ';' + str(MNBpp1) + ';' +
                            str(MNBpt1) + ';' + str(MNBrp1) + ';' +
                            str(MNBrt1) + ';' + str(MNBfp1) + ';' +
                            str(MNBft1) + '\n')
        csv_file.writelines('2;' + str(MNBa2) + ';' + str(MNBpp2) + ';' +
                            str(MNBpt2) + ';' + str(MNBrp2) + ';' +
                            str(MNBrt2) + ';' + str(MNBfp2) + ';' +
                            str(MNBft2) + '\n')
        csv_file.writelines('3;' + str(MNBa3) + ';' + str(MNBpp3) + ';' +
                            str(MNBpt3) + ';' + str(MNBrp3) + ';' +
                            str(MNBrt3) + ';' + str(MNBfp3) + ';' +
                            str(MNBft3) + '\n')
        csv_file.writelines('4;' + str(MNBa4) + ';' + str(MNBpp4) + ';' +
                            str(MNBpt4) + ';' + str(MNBrp4) + ';' +
                            str(MNBrt4) + ';' + str(MNBfp4) + ';' +
                            str(MNBft4) + '\n')
        csv_file.writelines('==================' + '\n')
        csv_file.writelines('Total' + '\n')
        csv_file.writelines('Média;' + str(MNBa) + ';' + str(MNBpp) + ';' +
                            str(MNBpt) + ';' + str(MNBrp) + ';' + str(MNBrt) +
                            ';' + str(MNBfp) + ';' + str(MNBft) + '\n')
        csv_file.writelines('Máximo;' + str(MNBamax) + "" + ';' +
                            str(MNBpmax) + "" + ';' + str(MNBrmax) + "" + ';' +
                            str(MNBfmax) + '\n')
        csv_file.writelines('Mínimo;' + str(MNBamin) + "" + ';' +
                            str(MNBpmin) + "" + ';' + str(MNBrmin) + "" + ';' +
                            str(MNBfmin) + '\n')
        csv_file.writelines('==================' + '\n')
        csv_file.writelines('Algoritmo' + ';' + 'Regressão Linear' + '\n')
        csv_file.writelines('Iteração' + ';' + 'Acurácia' + ';' +
                            'Precisão parcial' + ';' + 'Precisão total' + ';' +
                            'revocação parcial' + ';' + 'revocação total' +
                            ';' + 'f-medida parcial' + ';' + 'f-medida total' +
                            '\n')
        csv_file.writelines('1;' + str(Ra1) + ';' + str(Rpp1) + ';' +
                            str(Rpt1) + ';' + str(Rrp1) + ';' + str(Rrt1) +
                            ';' + str(Rfp1) + ';' + str(Rft1) + '\n')
        csv_file.writelines('2;' + str(Ra2) + ';' + str(Rpp2) + ';' +
                            str(Rpt2) + ';' + str(Rrp2) + ';' + str(Rrt2) +
                            ';' + str(Rfp2) + ';' + str(Rft2) + '\n')
        csv_file.writelines('3;' + str(Ra3) + ';' + str(Rpp3) + ';' +
                            str(Rpt3) + ';' + str(Rrp3) + ';' + str(Rrt3) +
                            ';' + str(Rfp3) + ';' + str(Rft3) + '\n')
        csv_file.writelines('4;' + str(Ra4) + ';' + str(Rpp4) + ';' +
                            str(Rpt4) + ';' + str(Rrp4) + ';' + str(Rrt4) +
                            ';' + str(Rfp4) + ';' + str(Rft4) + '\n')
        csv_file.writelines('==================' + '\n')
        csv_file.writelines('Total' + '\n')
        csv_file.writelines('Média;' + str(Ra) + ';' + str(Rpp) + ';' +
                            str(Rpt) + ';' + str(Rrp) + ';' + str(Rrt) + ';' +
                            str(Rfp) + ';' + str(Rft) + '\n')
        csv_file.writelines('Máximo;' + str(Ramax) + "" + ';' + str(Rpmax) +
                            "" + ';' + str(Rrmax) + "" + ';' + str(Rfmax) +
                            '\n')
        csv_file.writelines('Mínimo;' + str(Ramin) + "" + ';' + str(Rpmin) +
                            "" + ';' + str(Rrmin) + "" + ';' + str(Rfmin) +
                            '\n')
        csv_file.writelines('==================' + '\n')
        csv_file.writelines('Algoritmo' + ';' + 'SVC Linear' + '\n')
        csv_file.writelines('Iteração' + ';' + 'Acurácia' + ';' +
                            'Precisão parcial' + ';' + 'Precisão total' + ';' +
                            'revocação parcial' + ';' + 'revocação total' +
                            ';' + 'f-medida parcial' + ';' + 'f-medida total' +
                            '\n')
        csv_file.writelines('1;' + str(La1) + ';' + str(Lpp1) + ';' +
                            str(Lpt1) + ';' + str(Lrp1) + ';' + str(Lrt1) +
                            ';' + str(Lfp1) + ';' + str(Lft1) + '\n')
        csv_file.writelines('2;' + str(La2) + ';' + str(Lpp2) + ';' +
                            str(Lpt2) + ';' + str(Lrp2) + ';' + str(Lrt2) +
                            ';' + str(Lfp2) + ';' + str(Lft2) + '\n')
        csv_file.writelines('3;' + str(La3) + ';' + str(Lpp3) + ';' +
                            str(Lpt3) + ';' + str(Lrp3) + ';' + str(Lrt3) +
                            ';' + str(Lfp3) + ';' + str(Lft3) + '\n')
        csv_file.writelines('4;' + str(La4) + ';' + str(Lpp4) + ';' +
                            str(Lpt4) + ';' + str(Lrp4) + ';' + str(Lrt4) +
                            ';' + str(Lfp4) + ';' + str(Lft4) + '\n')
        csv_file.writelines('==================' + '\n')
        csv_file.writelines('Total' + '\n')
        csv_file.writelines('Média;' + str(La) + ';' + str(Lpp) + ';' +
                            str(Lpt) + ';' + str(Lrp) + ';' + str(Lrt) + ';' +
                            str(Lfp) + ';' + str(Lft) + '\n')
        csv_file.writelines('Máximo;' + str(Lamax) + "" + ';' + str(Lpmax) +
                            "" + ';' + str(Lrmax) + "" + ';' + str(Lfmax) +
                            '\n')
        csv_file.writelines('Mínimo;' + str(Lamin) + "" + ';' + str(Lpmin) +
                            "" + ';' + str(Lrmin) + "" + ';' + str(Lfmin) +
                            '\n')