Example #1
0
    def progress(self, chunk):
        text = str(self.combo.currentText())
        if text == 'German':
            from textblob_de import TextBlobDE as TextBlob
            logging.debug("Progress: %s" % text)
            logging.debug("Progress: %s" % chunk)

            blob = TextBlob(chunk)
            blob.sentences
            blob.tokens
            blob.tags
            blob.noun_phrases
            sent = blob.sentiment
            sentiment = ""
            if sent[0] < -0.20:
                sentiment = "Nicht Glücklich"
            elif sent[0] > 0.20:
                sentiment = "Glücklich"
            else:
                sentiment = "Neutral"

            print(chunk, ' , ', sentiment, ',', sent[0])
            self.textboxTranscript.insertPlainText("Transcription === " +
                                                   chunk)
            self.textboxTranscript.insertPlainText("\n--------------------")

            self.textboxTranscript.insertPlainText("\nSentiment === " +
                                                   sentiment)
            self.show()
        else:
            from textblob import TextBlob
            blob = TextBlob(chunk)
            blob.sentences
            blob.tokens
            blob.tags
            blob.noun_phrases
            sent = blob.sentiment

            sentiment = ""
            if sent[0] < -0.20:
                sentiment = "Not happy"
            elif sent[0] > 0.20:
                sentiment = "Happy"
            else:
                sentiment = "Neutral"

            print(chunk, ' , ', sentiment, sent[0])
            self.textboxTranscript.insertPlainText("Transcription === " +
                                                   chunk)
            self.textboxTranscript.insertPlainText("\n--------------------")

            self.textboxTranscript.insertPlainText("\nSentiment === " +
                                                   sentiment)
            self.show()
Example #2
0
    def progress(self, chunk):
        text = str(self.combo.currentText())
        if text == 'German':
            from textblob_de import TextBlobDE as TextBlob
            logging.debug("Progress: %s" % text)
            logging.debug("Progress: %s" % chunk)

            blob = TextBlob(chunk)
            blob.sentences
            blob.tokens
            blob.tags
            blob.noun_phrases
            sent = blob.sentiment
            sentiment = ""
            if sent[0] < 1:
                sentiment = "Nicht Gut"
            elif -0.80 < sent[0] < 0.20:
                sentiment = "Neutral"
            else:
                sentiment = "Gut"

            print(chunk, ' , ', sentiment)
            self.textboxTranscript.insertPlainText(chunk)
            self.textboxTranscript.insertPlainText("\n*********************")

            self.textboxTranscript.insertPlainText(
                "\nUnd die sentiment auf das satze ist ===== " + sentiment)
            self.show()
        else:
            from textblob import TextBlob
            blob = TextBlob(chunk)
            blob.sentences
            blob.tokens
            blob.tags
            blob.noun_phrases
            sent = blob.sentiment

            sentiment = ""
            if sent[0] < .60:
                sentiment = "Not Happy"
            elif -0.80 < sent[0] < 0.20:
                sentiment = "Neutral"
            else:
                sentiment = "Happy"

            print(chunk, ' , ', sentiment)
            self.textboxTranscript.insertPlainText(chunk)
            self.textboxTranscript.insertPlainText("\n*********************")

            self.textboxTranscript.insertPlainText(
                "\nAnd the sentiment of that sentence is ===== " + sentiment)
            self.show()
Example #3
0
    def scrape(self, response):
        articleItem = response.meta['item']
        articleItem['headline'] = response.css('h3::text').extract()
        articleItem['date_publish'] = response.css(
            "time::attr('title')").extract()
        articleItem['article_text'] = response.css(
            '.css-1jftgse p::text').extract()
        article_text = ''.join(articleItem['article_text'])
        articleItem['author'] = response.css(
            ".css-134vnn1 section:nth-child(3) li a span::text").extract()
        articleItem['author'] = ','.join(articleItem['author'])
        txt_blob = TextBlob(article_text)
        articleItem['sentiment'] = txt_blob.sentiment
        key_words = self.get_hotwords(article_text)
        top_key_words = [(kw[0] + ', ')
                         for kw in Counter(key_words).most_common(7)]
        articleItem['keywords'] = ''.join(top_key_words)
        articleItem['summary'] = summarize(article_text, ratio=0.2)
        articleItem['link'] = response.url

        if 'Sekundäre Navigation' in articleItem['headline']:
            articleItem['headline'].remove('Sekundäre Navigation')

        for i in articleItem['date_publish']:
            pattern = re.compile(r'\d{2}.\d{2}.\d{4}')
            result = re.search(pattern, i)
            articleItem['date_publish'] = result.group()

        yield articleItem
Example #4
0
 def process_tweet(self, tweet):
     raw_text = tweet["text"]
     text = re.sub(self.cleanup_regex, "", raw_text, 0)
     print("[Tweet] %s" % text)
     tb = TextBlob(" ".join(self.tweet_buffer))
     print(tb.sentiment.polarity)
     self.tweet_buffer.append(text)
     self.tweet_buffer = self.tweet_buffer[-1 * self.buffer_len:]
     self.tweet_count += 1
     if self.tweet_count % 100 == 0:
         print("[Status] Tweet count: {}".format(self.tweet_count))
         print("[Status] Tweets per second: {:.2}".format(
             self.tweet_count / (time.time() - self.time_start)))
         tb = TextBlob(" ".join(self.tweet_buffer))
         print("Sentiment last {} tweets: {:.5}".format(
             self.buffer_len, tb.sentiment.polarity))
Example #5
0
def lemmatize(text):
    # cleantext(text)
    # words = nltk.word_tokenize(str(text))
    aa = cleantext(text)
    blob = TextBlob(str(aa))

    return blob.words.lemmatize()
Example #6
0
def get_most_significant_words(corpus):
    words = []

    for text in corpus:
        blob = TextBlob(text)
        words += blob.words._collection

    return Counter(words)
Example #7
0
 def test_tag_blob_pattern_tok_include_punc(self):
     blob = TextBlob(self.text, tokenizer=PatternTokenizer(),
                     pos_tagger=PatternTagger(include_punc=True))
     tags = blob.tags
     logging.debug("tags: {0}".format(tags))
     words = ["Das", "ist", "ein", "schönes", "Auto", "."]
     for i, word_tag in enumerate(tags):
         assert_equal(word_tag[0], words[i])
Example #8
0
 def test_tag_blob_defaults(self):
     blob = TextBlob(self.text)
     tags = blob.tags
     logging.debug("tags: {0}".format(tags))
     words = ["Das", "ist", "ein", "schönes", "Auto"]
     for i, word_tag in enumerate(tags):
         assert_equal(word_tag[0], words[i])
     assert_equal(tags[-1][0], "Auto")
 def analize_sentiment(tweet):
     analysis = TextBlob(clean_tweet(tweet))
     if analysis.sentiment.polarity > 0:
         return 1
     elif analysis.sentiment.polarity == 0:
         return 0
     else:
         return -1
Example #10
0
def pipe_features_before_preprocessing(corpus):
    punctuation, caps_count, last_char = [], [], []
    for text in corpus:
        blob = TextBlob(text)
        punctuation.append(feature_extraction.get_punctuation_vector(text))
        caps_count.append(feature_extraction.get_caps_words_count(text))
        last_char.append(feature_extraction.get_last_char_vector(text))

    return punctuation, caps_count, last_char
Example #11
0
def get_wv_vec_sequence(texts):
    texts_wv = []
    for text in texts:
        text = pipe_preprocessing(text)
        blob = TextBlob(text)
        wv = feature_extraction.get_word_vec_repr(blob.words)
        texts_wv.append(wv.reshape((1, wv.shape[0], wv.shape[1])))

    return texts_wv
def trending(channel_name):
    # return polarity in the range [-1.0, 1.0]
    cm = channel_messages(rocket, channel_name)['messages']
    messages = " ".join(x["msg"] for x in cm)
    blob = TextBlob(messages)
    return {
        "trending topics":
        sorted(blob.word_counts.items(), key=lambda t: t[1], reverse=True)
    }
Example #13
0
def sentiment_analysis():
    """
    Forms a sentiment from given sentence
    :return: sentiment ranging from -1 (bad) to 1 (good) while 0 equals neutral.
    """
    data = request.get_json(force=True)
    print(data)
    sentence = data["sentence"]
    print(sentence)
    sentiment = TextBlob(sentence).sentiment
    return jsonify(sentiment)
    def get_sentiment(self, tweet):
        """ get a setinement score for a specific tweet

        Args:
            tweet (string): Text of tweet

        Returns:
            float: Sentiment score
        """
        analysis = TextBlob(tweet)

        return analysis.sentiment.polarity
    def lemmatize_words(self, text):
        """ Lemmatize a string of words

        Args:
            text (string): Text whcih should be lemmatize

        Returns:
            list: List of lemmatized words
        """
        analyse = TextBlob(text)
        wl = analyse.words.lemmatize()

        return wl
Example #16
0
    def predict(self, textarray):
        prediction = list()
        for text in tqdm(textarray):
            blob = TextBlob(text)
            sentiment = blob.sentiment.polarity

            if (sentiment == 0):
                prediction.append("neutral")
            if (sentiment > 0):
                prediction.append("positive")
            if (sentiment < 0):
                prediction.append("negative")
        return prediction
def sentiment_analyse(hits):
    words = prep.get_words(hits)
    lemmalist = prep.get_lemmalist(words, "all")

    #wort = "Nacht"

    senti_dict = {}

    for wort in gleichhäufige_worte:
        indices = []
        """for i in range(len(lemmalist)):

        if lemmalist[i] == wort:
            indices.append(i)

        #print(indices)"""
        indices = [i for i, x in enumerate(lemmalist) if x == wort]
        senti_list = []

        # get range from aspect
        for i in indices:
            #print(i)
            beginn = i-5 
            if beginn < 0:
                beginn = 0
            end = i + 6

            aspect = " ".join(lemmalist[beginn:end]).lower()
            #print(aspect)
            sentiment = 0
            counter = 0
            doc = nlp(aspect)
            for token in doc:
                if token._.sentiws is not None:
                    #print(token, token._.sentiws)
                    counter += 1
                    sentiment += token._.sentiws

            if counter != 0:
                sentiment = sentiment/counter

            senti_list.append(sentiment)

            blob3 = TextBlob(aspect)

            #print("TextBlob: ", blob3.sentiment)
            #print("spaCy_SentiWS: ", sentiment)
        print(f"Durchschnittlicher Sentimentwert von {wort} ist: {sum(senti_list)/len(senti_list)}")
        senti_dict[wort] = sum(senti_list)/len(senti_list)

    return senti_dict
Example #18
0
def get_embedding_indices(texts, maxlen=None):
    texts_indices = []
    for text in texts:
        text = pipe_preprocessing(text)
        blob = TextBlob(text)
        indices = feature_extraction.get_embedding_indices(blob.words)
        texts_indices.append(indices)

    if maxlen:
        return pad_sequences(texts_indices,
                             maxlen=maxlen,
                             padding='post',
                             truncating='post')
    return texts_indices
Example #19
0
 def on_data(self, data):
     
     try:
         # decode json
         dict_data = json.loads(data)
         
         # pass tweet into TextBlob            
         tweet = TextBlob(dict_data["text"])
         
         if dict_data["lang"] != "de":
             return;
         
         print(tweet)
         # output sentiment polarity
         #print tweet.sentiment.polarity
 
         # determine if sentiment is positive, negative, or neutral
         if tweet.sentiment.polarity < 0:
             sentiment = "negative"
         elif tweet.sentiment.polarity == 0:
             sentiment = "neutral"
         else:
             sentiment = "positive"
 
         # output sentiment
         createTimestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.strptime(dict_data['created_at'],'%a %b %d %H:%M:%S +0000 %Y'))
         #print int(time.mktime(time.strptime(dict_data['created_at'],'%a %b %d %H:%M:%S +0000 %Y')))
 
         # add text and sentiment info to elasticsearch
         es.index(index="tweets",
                  doc_type="tweet_sentiment",
                  body={"date": createTimestamp,
                        "user": dict_data["user"]["id_str"],
                        "message": dict_data["text"],
                        "language": dict_data["lang"],  
                        "hashtags": reduce(lambda x,y:  { 'text' : str(x["text"]) + " " + str(y["text"]) }, dict_data["entities"]["hashtags"])["text"] if len(dict_data["entities"]["hashtags"])>0 else "",
                        "location": dict_data["coordinates"]["coordinates"] if dict_data["coordinates"] is not None else None,
                        "polarity": tweet.sentiment.polarity,
                        "subjectivity": tweet.sentiment.subjectivity,
                        "sentiment": sentiment})
         
         self.tweet_count += 1            
         if self.tweet_count % 100 == 0:
             print('Indexed {0} tweets'.format(self.tweet_count))
         
     except:
         print("error in listener:", sys.exc_info()[0])
         
     return True
Example #20
0
 def test_tag_blob_nltk_tok_include_punc_stts(self):
     blob = TextBlob(
         self.text,
         tokenizer=NLTKPunktTokenizer(),
         pos_tagger=PatternTagger(
             include_punc=True,
             tagset='stts'))
     tags = blob.tags
     logging.debug("tags: {0}".format(tags))
     words = ["Das", "ist", "ein", "schönes", "Auto", "."]
     stts_tags = ["PDS", "VVFIN", "ARTIND", "ADJA", "NN", "S"]
     for i, word_tag in enumerate(tags):
         assert_equal(word_tag[0], words[i])
         assert_equal(word_tag[1], stts_tags[i])
     assert_equal(tags[-1][0], ".")
    def sentiment_tweet_analysis(df: pd.DataFrame) -> pd.DataFrame:
        """
        Processes full tweets in 'text' column to sentiment values, based on TextBlobDE.
        :param df: pd.DataFrame, containing preprocessed tweets to analyze
        :return: pd.DataFrame, containing sentiment values
        """
        # TODO: Make language compatible sentiment analysis (Problems might arise when using TextBlobDE on 'eng' tweets)
        tweet_sentiments = [TextBlob(tweet) for tweet in df['text']]
        sentiment_vals = [
            objects.sentiment.polarity for objects in tweet_sentiments
        ]

        return pd.DataFrame(list(
            zip(sentiment_vals, [tweet for tweet in df['text']])),
                            columns=["polarity", "tweet"])
    def sentiment_word_analysis(df: pd.DataFrame) -> pd.DataFrame:
        """
        Processes words in 'text' column to sentiment values, based on TextBlobDE.
        :param df: pd.DataFrame, containing preprocessed tweets to analyze
        :return: pd.DataFrame, containing sentiment values
        """
        # TODO: Make language compatible sentiment analysis (Problems might arise when using TextBlobDE on 'eng' tweets)
        word_sentiments = [[TextBlob(word) for word in tweet]
                           for tweet in df['text'].apply(lambda x: x.split())]
        sentiment_vals = [[[word.sentiment.polarity,
                            str(word)] for word in tweet]
                          for tweet in word_sentiments]

        return pd.DataFrame(list(itertools.chain(*sentiment_vals)),
                            columns=["polarity", "word"])
Example #23
0
    def sentiment(self, text):
        pos_count = 0
        pos_correct = 0
        neg_count = 0
        neg_correct = 0

        for line in text.split('.'):
            analysis = TextBlob(line)
            if analysis.sentiment.polarity > 0:
                pos_correct += 1
            pos_count += 1
            if analysis.sentiment.polarity <= 0:
                neg_correct += 1
            neg_count += 1
        return "Positive accuracy = {:0.2f}%".format(pos_correct/pos_count * 100.0) + \
               " Negative accuracy = {:0.2f}%".format(neg_correct/neg_count * 100.0)
Example #24
0
def get_wv_vec(texts, max_dimension=6000):
    texts_wv = []
    for text in texts:
        text = pipe_preprocessing(text)
        blob = TextBlob(text)
        wv = feature_extraction.get_word_vec_repr(blob.words)
        wv = wv.reshape([1, wv.shape[1] * wv.shape[0]])
        if wv.shape[1] > max_dimension:
            wv = np.reshape(wv[0, :max_dimension], (1, max_dimension))
        elif wv.shape[1] < max_dimension:
            wv = np.pad(wv, ((0, 0), (0, max_dimension - wv.shape[1])),
                        'constant',
                        constant_values=0)

        texts_wv.append(wv)

    return np.reshape(np.array(texts_wv), [len(texts_wv), max_dimension])
def get_sentiment(number):
    site = util.get_session_value(session.attributes, "siteName")

    if site is not None:
        obj = get_site_obj(site)
    else:
        session.attributes["lastCall"] = "senti"
        return question("Wonach wollen Sie suchen?")

    if obj is None:  # should never be called
        return question("Error. Wonach wollen Sie suchen?")

    links = util.get_session_value(session.attributes, "lastSearch")
    # if the site uses relative links, make absolute ones
    if str(links).count("http") < len(links):
        newLinks = []
        for link in links:
            if "http" not in link:
                newLinks.append(obj.baseURL + link)
            else:
                newLinks.append(link)
        links = newLinks

    if int(number) > len(links):
        return question(
            "Dieser Artikel existiert leider nicht, versuchen Sie eine andere Nummer."
        )

    url = links[int(number) - 1]
    NewsText = obj.read_article(url)

    newText = ""
    for text in NewsText:
        newText += text

    newText = TextBlob(newText)
    sent = newText.sentiment[0]

    if sent < 0:
        good = "eher negativ"
    else:
        good = "positiv"

    return question("Das Sentiment ist " + good)
Example #26
0
def metricate(tweetdf):
    """ takes a dataframe with tweets from
     one user and calculates some metrics for it.
     returns these values as dict
    """
    if len(tweetdf) == 0:
        return None

    user = tweetdf.name_id[0]
    #percent of retweets and replies of total tweets
    retweet_rate = tweetdf.t_isrt.mean()
    replie_rate = tweetdf.t_isrpl.mean()

    #now i want the tweets per day
    ix = len(tweetdf.index) - 1
    last_tweet = tweetdf.t_date[ix].to_pydatetime()
    td = datetime.datetime.now() - last_tweet

    days = td.total_seconds() / (3600 * 24)
    ts_perday = len(tweetdf) / days

    #hashtags
    hs = ""
    hashtags = tweetdf.t_hashtags
    hashtags = hashtags.explode().dropna()
    for h in hashtags:
        hs += h + ", "

    #sentiment analysis (experimental)
    sentiments = []
    for text in tweetdf.t_text.values:
        blob = TextBlob(text)
        sent = blob.polarity
        sentiments.append(sent)
    avg_sent = np.mean(sentiments)

    return {
        "name_id": user,
        "retweet_rate": retweet_rate,
        "replie_rate": replie_rate,
        "ts_perday": ts_perday,
        "hashtags": hs,
        "avg_sent": avg_sent
    }
Example #27
0
 def lemmatized_NLTK(self):
     seen = []
     blob = self.blob
     for word, lemma, tag in zip(blob.words, blob.words.lemmatize(),
                                 blob.tags):
         if tag[1] == 'JJ' or tag[1] == 'VB':
             if (word, lemma) in seen:
                 continue
             else:
                 seen.append((word, lemma))
                 self._lemmatized_NLTK.append((word, lemma))
     tmp = []
     for chunk in self.blob.noun_phrases:
         # blob the chunk and lemmatize it
         lemmatized_chunk = ' '.join(TextBlob(chunk).words.lemmatize())
         tmp.append((chunk, lemmatized_chunk))
     # self._lemmatized_NLTK.append(self.noun_chunks_NLTK)
     self._lemmatized_NLTK += tmp
     return self._lemmatized_NLTK
Example #28
0
def get_sentiment(row):

    # last column of each row is the text

    # polarity and subjectivity
    blob = TextBlob(row[-1])
    mood = blob.sentiment

    pol_score = mood.polarity
    sub_score = mood.subjectivity

    # polarity is a float within the range [-1.0, 1.0]
    # subjectivity is a float within the range [0.0, 1.0] where 0.0 is very objective and 1.0 is very subjective.

    # readability
    rb = readability.getmeasures(row[-1], lang='de')
    rb_score = rb['readability grades']['FleschReadingEase']

    return [row, pol_score, sub_score, rb_score]
Example #29
0
def pipe_features_after_preprocessing(corpus):
    sepl, word_vec, sum_vec, pos_similarity, neg_similarity, polarity_score, subjectivity_score = [], [], [], [], [], \
                                                                                                  [], []
    for text in corpus:
        if len(text
               ) > 0:  # after preprocessing the text could have 0 words left
            blob = TextBlob(text)
            sepl.append(feature_extraction.get_sentiment_phrase_score(text))
            word_vec.append(feature_extraction.get_word_vec_repr(blob.words))
            sum_vec.append(feature_extraction.get_sum_vec(blob.words))
            pos_similarity.append(
                feature_extraction.get_positive_word_vec_similarity(
                    blob.words))
            neg_similarity.append(
                feature_extraction.get_negative_word_vec_similarity(
                    blob.words))
            polarity_score.append(feature_extraction.get_polarity_score(text))
            subjectivity_score.append(
                feature_extraction.get_subjectivity_score(text))
    msw = get_most_significant_words(corpus)
    return sepl, pos_similarity, neg_similarity, polarity_score, word_vec, sum_vec, subjectivity_score, msw
Example #30
0
def pipe_features_after_preprocessing(text):
    if len(text) > 0:  # after preprocessing the text could have 0 words left
        blob = TextBlob(text)
        feature_vec = feature_extraction.get_sentiment_phrase_score(text)
        feature_vec = np.concatenate(
            (feature_vec,
             feature_extraction.get_positive_word_vec_similarity(blob.words)),
            axis=1)
        feature_vec = np.concatenate(
            (feature_vec,
             feature_extraction.get_negative_word_vec_similarity(blob.words)),
            axis=1)
        feature_vec = np.concatenate(
            (feature_vec, feature_extraction.get_polarity_score(text)), axis=1)
        feature_vec = np.concatenate(
            (feature_vec, feature_extraction.get_subjectivity_score(text)),
            axis=1)

        return feature_vec

    return np.zeros([1, 10])