#!/usr/bin/env python
# -*- coding: utf-8 -*-

import read_dataset as rd

train, test = rd.read()
dataset = rd.get_text(train)

file = open('news.txt', 'a')

for text in dataset:
    file.write(text + '\n')
  print('Escrevendo arquivo de log\n')
  file.write('Recall Macro: ' + str(recall_mean) + ' (+/-) ' + str(recall_std * 2) + '\n' )
  file.write('Precision Macro: ' + str(precision_mean) + ' (+/-) ' + str(precision_std * 2) + '\n' )
  file.write('F1 Macro: ' + str(f1_mean) + ' (+/-) ' +str(f1_std * 2) + '\n' )
  file.write('Accuracy: ' + str(accuracy_mean) + ' (+/-) ' +str(accuracy_std * 2) + '\n' )

  file.write('\n\n#############################################\n\n')
  file.close() 



## LENDO DATASET        ######################
train,test = rd.read()
categories = ['fake', 'real']

train_text = rd.get_text(train)
train_target = rd.get_target(train)

# test_text = rd.get_text(test)
# test_target = rd.get_target(test)
#################################################

combinations = get_combinations()
# combinations = use_custom()


for combination in combinations:
  analisar_features(train_text,
                    stem=combination['stem'],
                    remove_stop_words=combination['remove_stop_words'], 
                    remove_punct=combination['remove_punct'], 
Ejemplo n.º 3
0
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.pipeline import Pipeline
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import SGDClassifier
from sklearn import metrics
import numpy as np
from sklearn.datasets import fetch_20newsgroups

import read_dataset as rd

## LENDO DATASET        ######################
train, test = rd.read(percent_train=.5)
categories = ['fake', 'real']

train_text = rd.get_text(train)

train_target = rd.get_target(train)

test_text = rd.get_text(test)

test_target = rd.get_target(test)
#################################################

##              TREINANDO NAIVE               ##

print('Treinando modelo com Naive bayes...')
text_clf = Pipeline([
    ('vect', CountVectorizer()),
    ('tfidf', TfidfTransformer()),
    ('clf', MultinomialNB()),