import numpy as np import matplotlib.pyplot as plt from sklearn.datasets import make_classification from sklearn.ensemble import ExtraTreesClassifier from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer import read_dataset as rd from text_processor import Preprocessor #Read text train, test = rd.read() train_text = rd.get_text(train) y = rd.get_target(train) #proccess text processor = Preprocessor() train_text = processor.process_dataset(train_text, n_gram=0, stem=False, tags=False, remove_stop_words=True, remove_punct=True, pos=False, dep=False, alpha=False, ent=True, vectorizer=None)
file.write('Recall Macro: ' + str(recall_mean) + ' (+/-) ' + str(recall_std * 2) + '\n' ) file.write('Precision Macro: ' + str(precision_mean) + ' (+/-) ' + str(precision_std * 2) + '\n' ) file.write('F1 Macro: ' + str(f1_mean) + ' (+/-) ' +str(f1_std * 2) + '\n' ) file.write('Accuracy: ' + str(accuracy_mean) + ' (+/-) ' +str(accuracy_std * 2) + '\n' ) file.write('\n\n#############################################\n\n') file.close() ## LENDO DATASET ###################### train,test = rd.read() categories = ['fake', 'real'] train_text = rd.get_text(train) train_target = rd.get_target(train) # test_text = rd.get_text(test) # test_target = rd.get_target(test) ################################################# combinations = get_combinations() # combinations = use_custom() for combination in combinations: analisar_features(train_text, stem=combination['stem'], remove_stop_words=combination['remove_stop_words'], remove_punct=combination['remove_punct'], n_gram=combination['n_gram'],
from sklearn.pipeline import Pipeline from sklearn.naive_bayes import MultinomialNB from sklearn.linear_model import SGDClassifier from sklearn import metrics import numpy as np from sklearn.datasets import fetch_20newsgroups import read_dataset as rd ## LENDO DATASET ###################### train, test = rd.read(percent_train=.5) categories = ['fake', 'real'] train_text = rd.get_text(train) train_target = rd.get_target(train) test_text = rd.get_text(test) test_target = rd.get_target(test) ################################################# ## TREINANDO NAIVE ## print('Treinando modelo com Naive bayes...') text_clf = Pipeline([ ('vect', CountVectorizer()), ('tfidf', TfidfTransformer()), ('clf', MultinomialNB()), ]) text_clf.fit(train_text, train_target)