Ejemplo n.º 1
0
"""
cliente = base.iniciar_conexao()
con_colecao = base.iniciar_colecao(cliente, "colecao_processada")


def load_base():
    all_textos = base.carrega_colecao_completo("brumadinhoinflux",
                                               "colecao_completa")
    return all_textos


# print('spaCy Version: %s' % (spacy.__version__))
spacy_nlp = spacy.load('pt')
nlp = spacy.load("pt_core_news_sm")
spacy_stopwords = spacy.lang.pt.stop_words.STOP_WORDS
set_stop = stopWords.load_stop_words()
# combina as duas bases de stopWords
set_stop.union(spacy_stopwords)

all_textos = load_base()


def to_int_str(data):
    return str(int(data))


def remover_acentos(txt):
    return normalize('NFKD', txt).encode('ASCII', 'ignore').decode('ASCII')


def common_words(tokens):
"""
import ast
import re
# import base
import string
from collections import Counter
from unicodedata import normalize

import emoji
import spacy
import stopWords.StopWords as stopWords

# print('spaCy Version: %s' % (spacy.__version__))
nlp = spacy.load("pt_core_news_sm")
spacy_stopwords = spacy.lang.pt.stop_words.STOP_WORDS
set_stop = stopWords.load_stop_words()
# carrega adjetivos
set_adjetivos = stopWords.load_stop_words("adjetivos.txt")
# combina as duas bases de stopWords
set_stop = set_stop.union(spacy_stopwords)
set_stop = set_stop.union(set_adjetivos)


def remove_emoji(text):
    """remove emoji de uma string."""
    return emoji.get_emoji_regexp().sub(u'', text)


def to_int_str(data):
    """converte para inteiro."""
    return str(int(data))