Ejemplo n.º 1
0
    def vader_sentiment(self):

        handler = DataHandler(self.social_network, self.search_word)
        df_network = handler.read_network_dataset()
        df = df_network[df_network.tweet != '']

        prepross = Processing(self.social_network, self.search_word)
        analyzer = SentimentIntensityAnalyzer()

        predict_df = pd.DataFrame(
            None,
            columns=['date', 'hashtag', 'tweet', 'clean_tweet', 'sentiment'])

        for i, row in df.iterrows():

            clean_tweet = prepross.clean_text(row['tweet'])
            sentiment = analyzer.polarity_scores(clean_tweet)['compound']
            predict_df.loc[i] = [
                row['created_at'], row['hashtag'], row['tweet'], clean_tweet,
                sentiment
            ]

        predict_df.to_csv(r'data/output/dataset_predict.csv',
                          sep=';',
                          index=None)  #
    def pre_processing(self):

        handler = DataHandler(self.social_network, self.search_word)
        df_network = handler.read_network_dataset()
        df = df_network[df_network.tweets != '']

        nlp = spacy.load('pt_core_news_sm')
        #nlp = spacy.load('en_core_web_sm')
        nltk.download("stopwords")
        nltk.download('punkt')

        stop_words_ = STOP_WORDS.union(stopwords.words('english'))
        stop_words = [unidecode(stop).lower() for stop in stop_words_]

        nltk.download('rslp')

        all_words, all_words_n_gram = Processing.words_dataset(
            df['tweets'], stop_words, nlp)  # Get all dataset words

        bag_words = []
        bag_words_n_gram = []
        n_gram = []
        clean_tweets = []

        for sentence in df['tweets']:
            clean = Processing.clean_text(sentence, stop_words)
            token = Processing.lemma(clean.split(), nlp)
            concat = ' '.join(token)
            ngram = Processing.n_gram(concat)
            n_gram.append(Processing.n_gram(concat))
            bag_words_n_gram.append(
                Processing.bag_of_words(ngram, all_words_n_gram))
            bag_words.append(Processing.bag_of_words(concat.split(),
                                                     all_words))
            clean_tweets.append(concat)

        Processing.word_cloud(clean_tweets)

        dataset = pd.DataFrame({
            "Posts": clean_tweets,
            "BOW": bag_words,
            "N-gram": n_gram,
            "BOW-N": bag_words_n_gram
        })
        handler.store_processed_dataset(dataset)