def DefinePontuacao(incluir=[],excluir=[]): """Esta função permite incluir ou excluir elementos da lista pré-definida de sinais de pontuação de Python. """ from string import punctuation as punct punct=[p for p in punct] if incluir: punct.extend(incluir) if excluir: punct=[p for p in punct if p not in excluir] return punct
#Train: tweets_df = pd.read_excel('/Users/oscar/test.xlsx', header=0, encoding='iso8859_15') #Tweets to predict: tweets = pd.read_excel( '/Users/oscar/Desktop/Sentiment/conneutros/Test_En.xlsx', header=0, encoding='iso8859_15') #Stopwords + spanish "special" punctuation spa_stop = stopwords.words('spanish') punctuation = list(punctuation) punctuation.extend(['¿', '!']) spa_stop.extend(punctuation) spa_stop.extend(['¿', '!']) #spanisch stemmer: stemmer = SnowballStemmer('spanish') #reduce_len=True: "waaaaayyyy" -> "waaayyy" tokenizer = TweetTokenizer(strip_handles=True, reduce_len=True) def token_stemmer(token, stemmer): stemmed = [] for i in token: stemmed.append(stemmer.stem(i)) return stemmed