def DefinePontuacao(incluir=[],excluir=[]):
    """Esta função permite incluir ou excluir elementos da lista 
pré-definida de sinais de pontuação de Python.
"""
    from string import punctuation as punct
    punct=[p for p in punct]
    if incluir:
        punct.extend(incluir)
    if excluir:
        punct=[p for p in punct if p not in excluir]
    return punct
Example #2
0
#Train:
tweets_df = pd.read_excel('/Users/oscar/test.xlsx',
                          header=0,
                          encoding='iso8859_15')

#Tweets to predict:
tweets = pd.read_excel(
    '/Users/oscar/Desktop/Sentiment/conneutros/Test_En.xlsx',
    header=0,
    encoding='iso8859_15')

#Stopwords + spanish "special" punctuation
spa_stop = stopwords.words('spanish')
punctuation = list(punctuation)
punctuation.extend(['¿', '!'])
spa_stop.extend(punctuation)
spa_stop.extend(['¿', '!'])

#spanisch stemmer:
stemmer = SnowballStemmer('spanish')
#reduce_len=True: "waaaaayyyy" -> "waaayyy"
tokenizer = TweetTokenizer(strip_handles=True, reduce_len=True)


def token_stemmer(token, stemmer):
    stemmed = []
    for i in token:
        stemmed.append(stemmer.stem(i))
    return stemmed