Example #1
0
def handler(context, event):
    body = event.body.decode('utf-8')
    context.logger.debug_with('Analyzing ', 'sentence', body)

    analyzer = SentimentIntensityAnalyzer()

    score = analyzer.polarity_scores(body)

    return str(score)
def onTrigger(context, session):
  flow_file = session.get()
  if flow_file is not None:
    sentiment = VaderSentiment()
    session.read(flow_file,sentiment)
    analyzer = SentimentIntensityAnalyzer()
    vs = analyzer.polarity_scores(sentiment.content)
    flow_file.addAttribute("positive",str(vs['pos']))
    flow_file.addAttribute("negative",str(vs['neg']))
    flow_file.addAttribute("neutral",str(vs['neu']))
    session.transfer(flow_file, REL_SUCCESS)
Example #3
0
class Sentiment(object):

    def __init__(self):
        self._analyser = SentimentIntensityAnalyzer()
        self._return = {}

    def sentiment_analyzer_scores(self, sentence):
        score = self._analyser.polarity_scores(sentence)
        self._return["sentence"] = sentence
        self._return["score"] = score
        # print("{:-<40} {}\n".format(sentence, str(score)))
        return self._return

    def similarity(self, obj1, obj2, fuzzy_match=False, match_threshold=0.8):
        return textacy.similarity.jaccard(obj1, obj2, fuzzy_match=fuzzy_match, match_threshold=match_threshold)

    def hamming(self, str1, str2):
        textacy.similarity.hamming(str1, str2)

    """
    Returns the sentiment with maximum score
    
    pos, neg or neu
    
    """

    def sentiment(self):
        self._return["score"].pop("compound", None)
        return max(self._return["score"].items(), key=operator.itemgetter(1))[0]
def fetch_tweets(api, name):
    """
    Given a tweepy API object and the screen name of the Twitter user,
    create a list of tweets where each tweet is a dictionary with the
    following keys:

       id: tweet ID
       created: tweet creation date
       retweeted: number of retweets
       text: text of the tweet
       hashtags: list of hashtags mentioned in the tweet
       urls: list of URLs mentioned in the tweet
       mentions: list of screen names mentioned in the tweet
       score: the "compound" polarity score from vader's polarity_scores()

    Return a dictionary containing keys-value pairs:

       user: user's screen name
       count: number of tweets
       tweets: list of tweets, each tweet is a dictionary

    For efficiency, create a single Vader SentimentIntensityAnalyzer()
    per call to this function, not per tweet.
    """

    ret_user_info = dict()
    tweets = []
    user = api.get_user(name)
    ret_user_info['user'] = user.screen_name
    ret_user_info['count'] = user.statuses_count
    analyzer = SentimentIntensityAnalyzer()
    raw_tweets = api.user_timeline(screen_name = name,count=100)
    for raw_tweet in raw_tweets:
        tweet = dict()
        tweet['id'] = raw_tweet.id_str
        tweet['created'] = raw_tweet.created_at.date()
        tweet['retweeted'] = raw_tweet.retweet_count
        tweet['text'] = raw_tweet.text
        tweet['hashtags'] = [ raw_tweet.entities[u'hashtags'][i][u'text'] for i in range(len(raw_tweet.entities[u'hashtags'])) ]
        tweet['urls']     = [ raw_tweet.entities[u'urls'][i][u'expanded_url'] for i in range(len(raw_tweet.entities[u'urls'])) ]
        tweet['mentions'] = [ raw_tweet.entities[u'user_mentions'][i][u'screen_name'] for i in range(len(raw_tweet.entities[u'user_mentions'])) ]
        tweet['score']    = analyzer.polarity_scores(raw_tweet.text)['compound']
        tweets.append(tweet)
    ret_user_info['tweets'] = tweets

    return ret_user_info
Example #5
0
def artistanalyzer_result():
    # user_input = "jay z"
    user_input = request.form['user_input']
    user_input = user_input.replace(' ', '-')
    user_input = user_input.replace(' ', '')

    ### start timer ###
    start = datetime.now()

    ### scraping song links ###
    print('--- scraping song links ---')
    source = requests.get(
        f'https://www.songlyrics.com/{user_input}-lyrics/').text
    soup = BeautifulSoup(source, 'lxml')
    songlist = soup.find('div', class_='listbox')
    tracklist = songlist.find('table', class_='tracklist').tbody
    song_links = []
    artist_details = []
    for song in tracklist.find_all('tr', itemprop="itemListElement"):
        if song.td.text in [str(x) for x in range(50 + 1)]:
            link = song.find('a')['href']
            if link not in song_links:
                song_links.append(link)
    ### collecting song details ###
    print('--- scraping song details text ---')
    for val in song_links:
        song_title = val[27:-1].split('/', 1)[1]
        song_title = song_title[:-6].replace('-', ' ').capitalize()
        artist_name = user_input.replace('-', ' ').title()
        ### scraping song text ###
        songsource = requests.get(val).text
        soup2 = BeautifulSoup(songsource, 'lxml')
        block = soup2.find('div', id='songLyricsContainer')
        if block.find('p').text != False:
            text = block.find('p').text
            if 'feat.' not in text:
                permitted = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ "
                songtext = text.lower()
                songtext = ' '.join(word for word in songtext.split()
                                    if word[0] != '[')
                songtext = songtext.replace("\n", " ").strip()
                songtext = "".join(c for c in songtext if c in permitted)
                songtext = songtext.replace("  ", " ").capitalize()
                artist_details.append([artist_name, song_title, songtext])
    ### create a data frame ###
    print('--- Data Frame ---')
    df = pd.DataFrame(artist_details,
                      columns=['Artist Name', 'Song Title', 'Song Text'])
    ### analysing text ###
    # def sent_to_words(sentence):
    #         yield(gensim.utils.simple_preprocess(str(sentence), deacc=True))
    top_words = []
    num_words = []
    pol_sent = []
    analyzer = SentimentIntensityAnalyzer()
    for val in df['Song Text']:
        res = len(val.split())
        num_words.append(res)
        val = remove_stopwords(val.lower())

        polar = analyzer.polarity_scores(val)
        es = list(polar.items())
        pol = list(es[-1])
        song_1 = ' '.join(word for word in val.split() if len(word) > 3)
        split_val = song_1.split()
        count = Counter(split_val)
        comm = count.most_common(5)
        top_words.append(comm)
        pol_sent.append(pol[1])
    df['Num Words'] = num_words
    df['Top Words'] = top_words
    df['Pol Scores'] = pol_sent

    length_song = df['Num Words'].max()
    long_song = list(df.loc[df['Num Words'] == length_song, 'Song Title'])
    df = df[[
        'Artist Name', 'Song Title', 'Song Text', 'Num Words', 'Top Words',
        'Pol Scores'
    ]]
    list_2 = list(df['Pol Scores'])
    pol_list = []
    for val in list_2:
        if val > .50:
            val_ = ['Positive']
            pol_list.append(val_)
        elif val < .50:
            val_ = ['Negative']
            pol_list.append(val_)
        elif val == .50:
            val_ = ['Neutral']
            pol_list.append(val_)
    df['+/-'] = pol_list
    df = df[df['Song Text'] != 'We do not have the lyrics for soon come yet']
    table = HTML(df.to_html(classes='table table-striped'))
    pos_out = df['+/-'].mode()
    total_pol = pos_out[0][0]
    artist_name = df["Artist Name"][0]
    num_songs = len(df)
    num_words = df["Num Words"].sum()
    longest_song = long_song[0]
    longestword_count = df['Num Words'].max()
    dict_data = {
        'Artist Name': artist_name,
        'Number of Songs': num_songs,
        'Number of Words': num_words,
        'Longest Song': longest_song,
        'Longest Song Word Count': longestword_count,
        "Artist's Overall Polarity": total_pol
    }
    ### data frame ###
    # print(df.shape)
    # print(df.head())
    print(f'Artist Name: {artist_name}')
    print(f'Total number of songs: {num_songs}')
    print(f'Total Number of Words: {num_words}')
    print(
        f'Song with most words: {longest_song}, word count: {longestword_count}'
    )
    print(f'Overall polarity: {total_pol}')
    print('-----')
    #### finish timer ###
    print('--- runtime ---')
    break1 = datetime.now()
    print("Elapsed time: {0}".format(break1 - start))  # show timer

    return render_template('artistanalyzer.html',
                           tables=[table],
                           data=dict_data.items())
Example #6
0
def sentiment_analysis(content):

    senti_analyzer = SentimentIntensityAnalyzer()
    return senti_analyzer.polarity_scores(content)
import time


data_source_url = "https://raw.githubusercontent.com/javaidnabi31/Word-Embeddding-Sentiment-Classification/master/movie_data.csv"
df = pd.read_csv(data_source_url)
#print(df.head(3))
sentimentValues = []
x_train = df.loc[:24999, 'review'].values
y_train = df.loc[:24999, 'sentiment'].values
x_test = df.loc[25000: 50000, 'review'].values
y_test = df.loc[25000: 50000, 'sentiment'].values
db = pd.DataFrame.from_dict(y_test)
db.columns = ['yb']

analyzer = SentimentIntensityAnalyzer()
start_time = time.time()
for x in x_test:
    score = analyzer.polarity_scores(x)
    if score["compound"] < 0.0:
        result = 0
    elif score['compound'] > 0.0:
        result = 1
    sentimentValues.append(result)

dt = pd.DataFrame.from_dict(sentimentValues)
dt.columns = ['sentimentValues']

results = confusion_matrix(db['yb'], dt['sentimentValues'])
print('Confusion Matrix')
print(results)
Example #8
0
def analyze(text):
    analyser = SentimentIntensityAnalyzer()
    score = analyser.polarity_scores(text)
    return score['compound']
Example #9
0
def sentiment_scores_pos(sentence):
    # Create a SentimentIntensityAnalyzer object.
    sid_obj = SentimentIntensityAnalyzer()
    sentiment_dict = sid_obj.polarity_scores(sentence)
    print("Overall sentiment dictionary is : ", sentiment_dict)
    return (sentiment_dict['pos'] * 100)
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer  # Adpeted from https://github.com/cjhutto/vaderSentiment
import csv
import numpy as np
import pandas as pd

inputFile = open('Clean_file.csv')
fileObject = csv.reader(inputFile)

sentimentIntensityAnalyzer = SentimentIntensityAnalyzer()
dataFrame = pd.DataFrame(columns=['tweets', 'sentiments', 'scores'])
scores = []
sentiments = []
tweets = []
for index, row in enumerate(fileObject):
    readTweets = sentimentIntensityAnalyzer.polarity_scores(row[3])
    positive = readTweets['pos']
    negative = readTweets['neg']
    neutral = readTweets['neu']
    npArray = np.array([positive, negative, neutral])
    index = np.argmax(npArray)

    def switch_demo(index):
        swticher = {0: positive, 1: negative, 2: neutral}
        sentiments.append(index)

    scores.append(np.max(npArray))
    tweets.append(row[3])
dataFrame['tweet'] = tweets
dataFrame['sentiment'] = sentiments
dataFrame['sentiment_score'] = scores
dataFrame.to_csv('File_sentimental.csv', index=False)
Example #11
0
def get_analyzer(lexicon=None):
    if lexicon is None:
        return SentimentIntensityAnalyzer()
    lexicon_file_contents = "\n".join(get_lexicon_file_lines(lexicon))
    return CustomSentimentIntensityAnalyzer(lexicon_file_contents)
Example #12
0
#!/usr/bin/env python3
import pandas as pnd
dataoutput = pnd.read_csv("tweets.csv")
SentiFinal = pnd.DataFrame()

import nltk
from nltk.tokenize import word_tokenize
from nltk import pos_tag,pos_tag_sents

tweets = dataoutput["text"].map(lambda x: x.lower())

dataoutput["tweetinput"]=tweets
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
output = SentimentIntensityAnalyzer()

negative=[]
positive=[]
neutral=[]
compound=[]
sentiAnalysis=[]
sentiScore = []

for tweet in tweets:
    sentiValues = output.polarity_scores(tweet)
    negative.append(sentiValues["neg"])
    positive.append(sentiValues["pos"])
    neutral.append(sentiValues["neu"])
    compound.append(sentiValues["compound"])

    if sentiValues["neg"]>sentiValues["pos"]:
        sentiAnalysis.append("Negative")
Example #13
0
class TextAnalyzer:
    def __init__(
            self,
            # Model names: joeddav/xlm-roberta-large-xnli, facebook/bart-large-mnli
            model_name_or_path: str = None,
            initialize_model: bool = False,
            analyzer_config: AnalyzerConfig = None,
    ):
        self.classifier_model_name = model_name_or_path

        self.analyzer_config = analyzer_config or AnalyzerConfig(use_sentiment_model=False)

        if initialize_model is True or self.classifier_model_name is not None:
            from transformers import pipeline
            self.classifier_model = pipeline("zero-shot-classification", model=model_name_or_path)
            self.vader_sentiment_analyzer = None
        else:
            from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
            self.vader_sentiment_analyzer = SentimentIntensityAnalyzer()
            self.sentiment_model = None

    def _init_classifier_model(self, classifier_model_name: str):
        if self.classifier_model is not None:
            raise AttributeError("Classifier already initialized")

        from transformers import pipeline
        self.classifier_model = pipeline("zero-shot-classification", model=classifier_model_name)

    def init_vader_sentiment_analyzer(self):
        if self.vader_sentiment_analyzer is not None:
            raise AttributeError("Classifier already initialized")

        from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
        self.vader_sentiment_analyzer = SentimentIntensityAnalyzer()

    def _get_sentiment_score_from_vader(self, text: str) -> float:
        if self.vader_sentiment_analyzer is None:
            self.init_vader_sentiment_analyzer()

        scores = self.vader_sentiment_analyzer.polarity_scores(text)
        return scores["compound"]

    def _classify_text_from_model(
            self, text: str,
            labels: List[str],
            multi_class_classification: bool = True
    ) -> Dict[str, float]:
        if self.classifier_model is None:
            self._init_classifier_model(self.classifier_model_name)

        scores_data = self.classifier_model(text, labels, multi_class=multi_class_classification)

        score_dict = {label: score for label, score in zip(scores_data["labels"], scores_data["scores"])}
        return dict(sorted(score_dict.items(), key=lambda x: x[1], reverse=True))

    def analyze_input(
        self,
        source_response_list: List[AnalyzerRequest],
        analyzer_config: AnalyzerConfig = None,
        **kwargs
    ) -> List[AnalyzerResponse]:
        analyzer_config = analyzer_config or self.analyzer_config
        analyzer_output: List[AnalyzerResponse] = []

        labels = analyzer_config.labels or []
        if "positive" not in labels:
            labels.append("positive")
        if "negative" not in labels:
            labels.append("negative")

        for source_response in source_response_list:
            classification_map = {}
            if not analyzer_config.use_sentiment_model:
                sentiment_value = self._get_sentiment_score_from_vader(source_response.processed_text)
                if sentiment_value < 0.0:
                    classification_map["negative"] = -sentiment_value
                    classification_map["positive"] = 1.0 - classification_map["negative"]
                else:
                    classification_map["positive"] = sentiment_value
                    classification_map["negative"] = 1.0 - classification_map["positive"]
            else:
                classification_map = self._classify_text_from_model(
                    source_response.processed_text,
                    labels,
                    analyzer_config.multi_class_classification
                )

            analyzer_output.append(
                AnalyzerResponse(
                    processed_text=source_response.processed_text,
                    meta=source_response.meta,
                    classification=classification_map,
                    source_name=source_response.source_name,
                )
            )

        return analyzer_output
Example #14
0
    def init_vader_sentiment_analyzer(self):
        if self.vader_sentiment_analyzer is not None:
            raise AttributeError("Classifier already initialized")

        from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
        self.vader_sentiment_analyzer = SentimentIntensityAnalyzer()
Example #15
0
 def __init__(self):
     self.sentiment_analyzer = SentimentIntensityAnalyzer()
Example #16
0
 def vader_sentiment_raw(text):
     analyzer = SentimentIntensityAnalyzer()
     sent = analyzer.polarity_scores(text)['compound']
     return str(sent)
import pandas as pd

df = pd.read_csv('G:\python projects\FinallyMajorProject\Book1.csv')

# meathod 1 with simple analysis
import string
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

vs = SentimentIntensityAnalyzer()

# convering in lower case
df['review'] = df['review'].apply(lambda x: x.lower())


#  removing punctuation
def removerpunct(text):
    for punctuation in string.punctuation:
        text = text.replace(punctuation, '')
    return text


df['review'] = df['review'].apply(lambda x: removerpunct(x))

# predicting values of vader

df['compound_pred'] = df['review'].apply(
    lambda x: vs.polarity_scores(x)['compound'])
#  seprating negative and postive value with variable pos and neg

df['pred'] = df['compound_pred'].apply(lambda x: 'positive'
                                       if x > 0 else 'negative')
Example #18
0
def feature_encoder(dataobjects):
    """
    Features included in the code are:
    1. sentiment scores: pos, neg and neu
    easiness to read scales:
        2. flesch reading,
        3. dale_chall reading,
        4. gunning_foc score,
        5. smog_index and
        6. text standard scores.
        all these scores are included in the entire feature set
    7. perspective api scores (toxicity scores for the entire text)
    8. politeness score
    9. impolite-ness score
    10. politeness strategies
    11. POS tags

    :param dataobjects: reads the data objects (data frame) which incorporate the text
    :return: a feature encoded matrix of numeric entities for the entire data set
    """

    nlp = spacy.load('en_core_web_sm')
    feature_dict = {}
    feature_set = {}

    cnt = 0
    for line in dataobjects:
        if cnt == 0:
            cnt = 1
            continue
        feature_dict[cnt] = {}
        text = line[2]
        #sentiment scores: scores with pos, neg and neutral scores:
        analyzer = SentimentIntensityAnalyzer()
        vs = analyzer.polarity_scores(text)
        feature_dict[cnt]['pos'] = vs['pos']
        feature_dict[cnt]['neg'] = vs['neg']
        feature_dict[cnt]['neu'] = vs['neu']
        feature_set['pos'] = 1
        feature_set['neg'] = 1
        feature_set['neu'] = 1

        #easiness to read scores: flesch reading:
        sc = textstat.flesch_reading_ease(text)
        feature_dict[cnt]['easiness'] = sc
        feature_set['easiness'] = 1

        #easiness to read scores: dale chall reading:
        sc = textstat.dale_chall_readability_score(text)
        feature_dict[cnt]['easiness_dale'] = sc
        feature_set['easines_dale'] = 1

        #easiness to read scores: gunning fog reading:
        sc = textstat.gunning_fog(text)
        feature_dict[cnt]['easiness_fog'] = sc
        feature_set['easines_fog'] = 1

        #easiness to read scores: smog index reading:
        sc = textstat.smog_index(text)
        feature_dict[cnt]['easiness_smog'] = sc
        feature_set['easines_smog'] = 1

        #easiness to read scores: text standard reading:
        sc = textstat.text_standard(text, float_output=False)
        feature_dict[cnt]['easiness_standard'] = sc
        feature_set['easines_standard'] = 1

        #preprocessing text to make readable for perspective api scores:
        stry = str(text)
        sent = ''
        for a in stry:
            if a == ' ' or (a <= 'Z'
                            and a >= 'A') or (a <= 'z' and a >= 'a') or (
                                a <= '9' and a >= '0') or a == '?' or a == '.':
                sent += a

        #perspective api scores call:
        #comment_string ="""
        data = '{comment: {text:"' + sent + '"}, languages: ["en"], requestedAttributes: {TOXICITY:{}} }'
        response = requests.post(
            'https://commentanalyzer.googleapis.com/v1alpha1/comments:analyze',
            headers=headers,
            params=params,
            data=data)
        j = json.loads(response.text)
        feature_dict[cnt]['toxicity'] = 0.0
        try:
            feature_dict[cnt]['toxicity'] = j['attributeScores']['TOXICITY'][
                'summaryScore']['value']
        except:
            try:
                feature_dict[cnt]['toxicity'] = j['attributeScores'][
                    'TOXICITY']['summaryScore']['value']
            except:
                try:
                    feature_dict[cnt]['toxicity'] = j['attributeScores'][
                        'TOXICITY']['summaryScore']['value']
                except:
                    try:
                        feature_dict[cnt]['toxicity'] = j['attributeScores'][
                            'TOXICITY']['summaryScore']['value']
                    except:
                        feature_dict[cnt]['toxicity'] = 0.0
        feature_dict[cnt]['toxicity'] = 0.0
        feature_set['toxicity'] = 1

        #politeness strategies and politeness scores features:
        sc = get_scores_strategies_token_indices(text)
        feature_dict[cnt]['score_polite'] = sc['score_polite']
        feature_dict[cnt]['score_impolite'] = sc['score_impolite']
        feature_set['score_polite'] = 1
        feature_set['score_impolite'] = 1
        #print(feature_dict[cnt]['score_polite'])
        for a in sc['strategies']:
            feature_dict[cnt][a] = 1
            feature_set[a] = 1

        #POS tags in the text:
        doc = nlp(text)
        for token in doc:
            if (str(token.pos_) not in feature_set):
                feature_set[str(token.pos_)] = 1

            if not (str(token.pos_) in feature_dict[cnt]):
                feature_dict[cnt][str(token.pos_)] = 1
            else:
                feature_dict[cnt][str(token.pos_)] += 1
        cnt += 1

    #creating a systematic feature matrix from feature set
    feature_matrix = []
    for i in range(1, cnt):
        feature_list = []
        for key in feature_set.keys():
            if key in feature_dict[i]:
                feature_list.append(feature_dict[i][key])
            else:
                feature_list.append(0.0)
        feature_matrix.append(feature_list)

    return feature_matrix
Example #19
0
import pandas as pd
import numpy as np
import nltk

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

df1 = pd.read_csv('./interview1.csv')

app1 = df1['app'].tolist()
mental_health1 = df1['mental_health'].tolist()

app1_sum = 0
for sentence in app1:
	app1_sum += analyzer.polarity_scores(sentence)['compound']


mhealth1_sum = 0
for sentence in mental_health1:
	mhealth1_sum += analyzer.polarity_scores(sentence)['compound']
	
print("Interview 1")
print("app_avg = {}".format(app1_sum/len(app1)))
print("mental_health_avg = {}\n".format(mhealth1_sum/len(mental_health1)))


df2 = pd.read_csv('./interview2.csv')

app2 = df2['app'].tolist()
mental_health2 = df2['mental_health'].tolist()
Example #20
0
def analyze_vader_sentiment(text):
    vader_analyzer = SentimentIntensityAnalyzer()
    sentiment_dict = vader_analyzer.polarity_scores(text)
    return sentiment_dict['compound']
    temp = pd.DataFrame({'Tweets': twt.iloc[i,5], 'Polarity': blob.sentiment.polarity}, index = [0])
    
    FinalResults = FinalResults.append(temp)  


FinalResults['Sentiment'] = FinalResults['Polarity'].apply(lambda x: 'Positive' if x>0 else 'Negative' if x<0 else 'Neutral')

FinalResults['Sentiment'].describe()

#Results: Most of the tweets are Neutral

# Sentiment Analysis using Vader
FinalResults_Vader = pd.DataFrame()

# Creating engine
analyzer = SentimentIntensityAnalyzer()

# Run Engine
for i in range(0, twt.shape[0]):
    
    snt = analyzer.polarity_scores(twt.iloc[i,5])
    
    temp = pd.DataFrame({'Tweets': twt.iloc[i,5], 'Polarity': list(snt.items())[3][1]}, index = [0])

    FinalResults_Vader = FinalResults_Vader.append(temp)

FinalResults_Vader['Sentiment'] = FinalResults_Vader['Polarity'].apply(lambda x: 'Positive' if x>0 else 'Negative' if x<0 else 'Neutral')

FinalResults_Vader['Sentiment'].describe()

#Results: Most of the tweets are Negative
Example #22
0
class AnalyzerConfig(AppConfig):
    name = 'sentimentmonitor.apps.analyzer'
    label = 'analyzer'
    predictor = SentimentIntensityAnalyzer()  # Using VADER
Example #23
0
def vaderLexicon():
    # wrapper for getting the lexicon from vaderSentiment
    sa = SentimentIntensityAnalyzer()
    return sa.lexicon
 def __init__(self):
     print("Vader")
     self.analyzer = SentimentIntensityAnalyzer()
Example #25
0
def sentiment(s):
    # wrapper for the vaderSentiment polarity scores.
    # note - this seems to work for strings without tokenization
    sa = SentimentIntensityAnalyzer()
    return sa.polarity_scores(text=s)
Example #26
0
def gen_weight(date, comp, dfstk):
    #inputs to the function(convert it as arguments)
    datenow = datetime.now().date()
    # date=input("enter the date:(2019-05-30)")
    #date = "2019-06-04"
    company = ""
    # date=input("enter the date:(2019-04-17)")
    if comp == "CSCO":
        company = "cisco"
    elif comp == "MSFT":
        company = "microsoft"
    print(company, dfstk)
    # company=input("enter the name of the company:(cisco)")
    #company="cisco"
    header = ['Id', 'Text', 'price_date', 'Time', 'Followers count']

    #changing to datetime format and fetching first data
    print("\ncollecting tweets from each date...\n")
    date = datetime.strptime(date, "%Y-%m-%d").date()
    url = "https://raw.githubusercontent.com/d4datas/twitterdata/master/" + str(
        company) + "-final-" + str(date) + ".csv"
    df = pd.read_csv(url,
                     sep='\^',
                     error_bad_lines=False,
                     names=header,
                     usecols=['Id', 'Text', 'price_date', 'Followers count'],
                     skiprows='1',
                     engine='python')

    #iterately collect data till yesterday and concatenate
    datenow = datenow - timedelta(days=1)
    while date < datenow:
        print(date)
        date = date + timedelta(days=1)
        url = "https://raw.githubusercontent.com/d4datas/twitterdata/master/" + str(
            company) + "-final-" + str(date) + ".csv"
        df1 = pd.read_csv(
            url,
            sep='\^',
            error_bad_lines=False,
            names=header,
            usecols=['Id', 'Text', 'price_date', 'Followers count'],
            skiprows='1',
            engine='python')
        df = pd.concat([df, df1], ignore_index=True)

    #delete dulpicates in terms of id and text
    df.drop_duplicates(subset='Id', keep=False, inplace=True)
    df.drop_duplicates(subset='Text', keep=False, inplace=True)

    #changing date to common format(optional)
    #df['Date'] = pd.to_datetime(df['Date'])
    #df['Date'] = df['Date'].dt.date

    #preprocessing and replacing text from each row using re and preprocessor
    print("starting preprocessing of each row...\n")
    # time.sleep(2)
    for j, tweet_text in df.iterrows():
        print(j)
        tweet_text = df.at[j, 'Text']
        tweet_text = re.sub(',', ' ', tweet_text)
        sentence = tweet_text.lower()  #convert to lower case
        fsen = re.sub(
            " #| &amp; |\n|\t", " ",
            sentence)  #removing hastags line breaks and tabs with space
        fsen = re.sub(
            "#| \n| \t|\.", "",
            fsen)  #removing hastags line breaks and tabs without space
        fsen = re.sub(r'\d+', '', fsen)  #removing numbers
        #removing links from text
        sen = fsen.split(" ")
        lsen = []
        for i in sen:
            if "https://" not in str(i):
                #lsen.append(i)
                if "http://" not in str(i):
                    lsen.append(i)
        tweet_text = " ".join(lsen)
        tweet_text = p.clean(tweet_text)
        sen = tweet_text.split(" ")
        lsen = []
        for i in sen:
            if "/" not in str(i):
                lsen.append(i)

        tweet_text = " ".join(lsen)
        tweet_text.lstrip()
        df.at[j, 'Text'] = tweet_text
    df.drop_duplicates(subset='Text', keep=False, inplace=True)
    #print(df)
    #df.to_csv("csco_filtered.csv")

    #sentiment analysis starts
    analyser = SentimentIntensityAnalyzer()
    print("\nanalysing sentiment for each date data...\n")
    #extracting the dates to iterate
    dates = df['price_date'].unique().tolist()
    #print("list created")
    f = open('sentiment_data.csv', 'a')
    f.write(
        "price_date,negative,positive,neutral,compound,subjectivity,polarity\n"
    )
    f.close()

    def calcualte_senti(data, df):
        date = data
        mask = (df['price_date'] == str(date))
        df = df.loc[mask]
        total_rows = 1

        sum_pos = sum_neu = sum_pol = sum_sub = sum_comp = sum_neg = 0
        f = open('sentiment_data.csv', 'a')
        sen_writer = csv.writer(f)
        for text in df['Text']:
            total_rows = total_rows + 1
            vad = analyser.polarity_scores(text)
            analysis = TextBlob(text)
            pol = round(analysis.polarity, 4)
            sub = round(analysis.subjectivity, 4)
            sum_neg = sum_neg + vad['neg']
            sum_pos = sum_pos + vad['pos']
            sum_neu = sum_neu + vad['neu']
            sum_comp = sum_comp + vad['compound']
            sum_pol = sum_pol + pol
            sum_sub = sum_sub + sub

        #finding weighted values
        #print(sum_neg)
        neg_val = sum_neg / total_rows
        pos_val = sum_pos / total_rows
        neu_val = sum_neu / total_rows
        sub_val = sum_sub / total_rows
        pol_val = sum_pol / total_rows
        comp_val = sum_comp / total_rows
        sum_neg = sum_pos = sum_neu = sum_pol = sum_sub = sum_comp = 0

        #appending to csv
        f.write(','.join([
            str(date) + "," + str(neg_val) + "," + str(pos_val) + "," +
            str(neu_val) + "," + str(comp_val) + "," + str(sub_val) + "," +
            str(pol_val)
        ]) + '\n')
        f.close()
        #print(str(date)+","+str(neg_val)+","+str(pos_val)+","+str(neu_val)+","+str(comp_val)+","+str(sub_val)+","+str(pol_val))

    for date in dates:
        print(".")
        calcualte_senti(date, df)

    #dropping unwanted raws
    df = pd.read_csv("sentiment_data.csv", error_bad_lines=False)
    df['price_date'] = pd.to_datetime(df['price_date'],
                                      format='%Y-%m-%d',
                                      errors='coerce')
    df = df[pd.notnull(df['price_date'])]
    #print(df)

    #removing temporary file
    os.remove("sentiment_data.csv")

    #scaling the data using minmax method to 0.8-1.2 range
    print("\nscaling data...\n")
    # time.sleep(2)
    #OldRange = (OldMax - OldMin)
    oldrange = 1.5
    oldmin = -.5
    newmin = .8
    #NewRange = (NewMax - NewMin)
    newrange = 0.4
    l = []
    for data in df['compound']:
        #NewValue = (((OldValue - OldMin) * NewRange) / OldRange) + NewMin
        newvalue = (((float(data) - oldmin) * newrange) / oldrange) + newmin
        print(str(data) + '====>' + str(newvalue))
        l.append(newvalue)
    df['sentiment'] = l
    #print(df)
    #df1.to_csv('scaledsentiment.csv')

    # dfstk = pd.read_csv("CSCO.csv",error_bad_lines=False)
    #print(dfstk)

    #merging sentiment data and stock data
    print("merging data...\n")
    # time.sleep(2)
    df['price_date'] = pd.to_datetime(df['price_date'])
    dfstk['price_date'] = pd.to_datetime(dfstk['price_date'])
    dfmrg = pd.merge(df, dfstk, on="price_date")
    #print(dfmrg)
    #dfmrg.to_csv("sentistock.csv")
    #creating a copy with needed columns (adjust for needed columns)
    dfnl = dfmrg[[
        'price_date', 'open_price', 'close_price', 'low_price', 'high_price',
        'adj_close_price', 'volume', 'sentiment'
    ]].copy()

    #adding weighted value to the dataframe
    print("adding weightage...\n")
    # time.sleep(2)
    dfnl['weighted_close_price'] = dfnl['close_price'] * dfnl['sentiment']
    dfnl[
        'weight_adj_close_price'] = dfnl['adj_close_price'] * dfnl['sentiment']
    return dfnl


#dfnl.to_csv("final_data.csv")
Example #27
0
df.loc[df['score'] == 0, 'pred'] = 1
df.loc[df['score'] > 0, 'pred'] = 2

len(df[df['pred'] == df['label_binary']])

# confusion matrix with afinn
len(df)
confusion_matrix(df['pred'], df['label_binary'])

df111 = pd.DataFrame()
df111['aaa'] = Y_test1
len(df111[df111['aaa'] == 1])
len(df111)

# vader
analyser = SentimentIntensityAnalyzer()
score = []
for i in range(len(df.index)):
    my_dict = analyser.polarity_scores(
        (df.iloc[[i]]['tweet_text'].tolist()[0]))
    if 'compound' in my_dict: del my_dict['compound']
    score.append(max(my_dict.items(), key=operator.itemgetter(1))[0])
    print(i)

df['score'] = score
df['pred'] = 0
df.loc[df['score'] == 'neg', 'pred'] = 0
df.loc[df['score'] == 'neu', 'pred'] = 1
df.loc[df['score'] == 'pos', 'pred'] = 2
len(df[df['pred'] == df['label_binary']])
Example #28
0
def tw_sent(text):
    sid = SentimentIntensityAnalyzer()
    ss = sid.polarity_scores(text)
    #print(type(ss))
    #    print(ss)
    return (ss)
Example #29
0
 def __init__(self):
     self._analyser = SentimentIntensityAnalyzer()
     self._return = {}
    "VADER is not smart, handsome, nor funny.",  # negation sentence example
    "VADER is smart, handsome, and funny!",  # punctuation emphasis handled correctly (sentiment intensity adjusted)
    "VADER is very smart, handsome, and funny.",  # booster words handled correctly (sentiment intensity adjusted)
    "VADER is VERY SMART, handsome, and FUNNY.",  # emphasis for ALLCAPS handled
    "VADER is VERY SMART, handsome, and FUNNY!!!",  # combination of signals - VADER appropriately adjusts intensity
    "VADER is VERY SMART, uber handsome, and FRIGGIN FUNNY!!!",  # booster words & punctuation make this close to ceiling for score
    "The book was good.",  # positive sentence
    "The book was kind of good.",  # qualified positive sentence is handled correctly (intensity adjusted)
    "The plot was good, but the characters are uncompelling and the dialog is not great.",  # mixed negation sentence
    "At least it isn't a horrible book.",  # negated negative sentence with contraction
    "Make sure you :) or :D today!",  # emoticons handled
    "Today SUX!",  # negative slang with capitalization emphasis
    "Today only kinda sux! But I'll get by, lol"  # mixed sentiment example with slang and constrastive conjunction "but"
]

analyzer = SentimentIntensityAnalyzer()
for sentence in sentences:
    vs = analyzer.polarity_scores(sentence)
    print("{:-<65} {}".format(sentence, str(vs)))

    #note: depending on how you installed (e.g., using source code download versus pip install), you may need to import like this:
    #from vaderSentiment import SentimentIntensityAnalyzer

# --- examples -------
sentences = [
    "I love Sitoluama.",
    "I LOVE Sitoluama.",
    "I love Sitoluama!",
    "I love Sitoluama!!!",
    "I love Sitoluama :)",
    "I don't love Sitoluama.",
def tw_sent(text):
    sid = SentimentIntensityAnalyzer()
    ss = sid.polarity_scores(text)
    #print(type(ss))
#    print(ss)
    return(ss)
Example #32
0
File: test.py Project: lik/queso
import time
import sqlite3
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

analyser = SentimentIntensityAnalyzer()
db = sqlite3.connect('queso.db')
cursor = db.cursor()

for row in cursor.execute(
        '''SELECT nick, compound FROM "poo" group by nick ORDER BY compound ASC LIMIT 3'''
):
    print(row[0])
Example #33
0
 def __init__(self):
     self.analyzer = SentimentIntensityAnalyzer()
import jsonlines,csv
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
d={"Jan":"01","Feb":"02","Mar":"03","Apr":"04","May":"05"}
reader=jsonlines.open("data/tweet_idno4.jsonl")
count=0
analyzer=SentimentIntensityAnalyzer()
writer=csv.writer(open('tweet_sentiment5.csv', 'w', encoding='utf-8', newline=''))
writer.writerow(["Tweet_VS","Date","go"])
for obj in reader:
    # obj=reader.read()
    vs=analyzer.polarity_scores(obj['full_text'])
    time=obj['created_at'].split()
    writer.writerow([vs['compound'],"2020-"+d[time[1]]+"-"+time[2],"go"])
    # x=float(vs['compound'])
    #
    # print(time)
    # if x>0:
    #     print(x,"positive")
    # elif x==0:
    #     print(x,"neutral")
    # else:
    #     print(x,"negative")

    count+=1
    if count%10000==0:
        print(count)
Example #35
0
def reviews(biz_id):
    """Sentiment score for restaurant"""

    googlemaps_api = os.environ['GOOGLE_API_KEY']

    # we stored lat and lng as parameters/ form way dictionary values
    # in the HREF of restaurant name in html that loops over each restaurant div
    # we are extracting that information here and passing to reviews.html in jinja
    # and then passing that info to JS through data attributes
    latitude = request.args.get('lat')
    longitude = request.args.get('lng')
    restaurant_name = request.args.get('name')

    review = db.session.query(
        Review.review).filter(Review.biz_id == biz_id).all()

    final_list_d = []
    for tup in review:
        final_list_d.append(tup[0])

    analyser = SentimentIntensityAnalyzer()

    sum_compound_score = 0
    analyzed_reviews = []
    pos_word_list = set()
    neu_word_list = set()
    neg_word_list = set()

    word_cloud_dict = {}

    for sentence in final_list_d:

        snt = analyser.polarity_scores(sentence)
        sum_compound_score += snt['compound']

        tokenized_sent = word_tokenize(sentence)
        for word in tokenized_sent:
            word = word.lower()
            word_compound_score = (analyser.polarity_scores(word))['compound']

            if (word_compound_score) > 0:
                pos_word_list.add(word)
                # word_cloud_dict[word] = (word_compound_score*1000)

            elif (word_compound_score) < 0:
                neg_word_list.add(word)
                # word_cloud_dict[word] = (word_compound_score*1000)

            else:
                neu_word_list.add(word)
                # word_cloud_dict[word] = (word_compound_score*1000)

        analyzed_reviews.append(sentence + str(snt))

    for word in pos_word_list:
        pos_score = analyser.polarity_scores(word)
        word_cloud_dict[word] = (pos_score['compound'] * 1000)

        # print(word,pos_score)

    for word in neg_word_list:
        neg_score = analyser.polarity_scores(word)
        word_cloud_dict[word] = (neg_score['compound'] * 1000)

        # print(word,neg_score)

    # print(word_cloud_dict)

    avg_compound_score = sum_compound_score / len(analyzed_reviews)
    avg_compound_score = ("%.3f" % avg_compound_score)

    return render_template("reviews.html",
                           restaurant_name=restaurant_name,
                           biz_id=biz_id,
                           avg_score_for_restaurant=avg_compound_score,
                           data=analyzed_reviews,
                           api_key=googlemaps_api,
                           latitude=latitude,
                           longitude=longitude)
def semantic_summary(features, all_sentences):
    dir = os.path.dirname(__file__)
    sid = SentimentIntensityAnalyzer(dir + "/stoplists/vader_lexicon.txt")

    options = []

    for feature in features:
        option = {"feature": feature[0], "neg": 0, "pos": 0, "frq": feature[1]}

        support_sentences = find_support_sentences(feature[0], all_sentences)

        sentences_with_orientation = {"pos": [], "neg": []}

        for ss in support_sentences:
            s = ss[0]
            stars = ss[1]
            helpful = ss[2]

            # feats = dict([(word, True) for word in nltk.wordpunct_tokenize(s)] )
            # if word not in english_stopwords

            opt = ""
            scores = sid.polarity_scores(s)

            if scores["compound"] + 0.7 * (stars - 3) / 2 < -0.2:
                opt = "neg"
            elif scores["compound"] + 0.7 * (stars - 3) / 2 > 0.2:
                opt = "pos"

            # manual correction
            # positives
            if any(mw in nltk.word_tokenize(s)
                   for mw in ["positives", "pros"]):
                opt = "pos"
            elif any(mw in nltk.word_tokenize(s) for mw in ["negatives"]):
                opt = "neg"

            if opt != "":
                option[opt] = int(option[opt] + 1 + helpful * 0.5)
                sentences_with_orientation[opt].append([
                    s, (scores["compound"] + 0.7 * (float(stars) - 3) / 2) *
                    float(helpful * 0.3 + 1)
                ])

        if (len(sentences_with_orientation["pos"]) +
                len(sentences_with_orientation["neg"])) < 5:
            # options.pop(feature[0],None)
            continue

        if len(sentences_with_orientation["pos"]) > 0:
            sorted_sentences_with_orientation_pos = sorted(
                sentences_with_orientation["pos"],
                key=lambda v: v[1],
                reverse=True)
            # option["pos_summary"] = sorted_sentences_with_orientation_pos[0]

            option["pos_summary"] = top_summary(
                "\n ".join([t[0] for t in sentences_with_orientation["pos"]]),
                1, rank_summarizer)
        else:
            option["pos_summary"] = []

        if len(sentences_with_orientation["neg"]) > 0:

            sorted_sentences_with_orientation_neg = sorted(
                sentences_with_orientation["neg"], key=lambda v: v[1])
            # option["neg_summary"] = sorted_sentences_with_orientation_neg[0]

            # option["neg_summary"].append(top_summary("\n ".join([t[0] for t in sentences_with_orientation["neg"]]),1))
            option["neg_summary"] = top_summary(
                "\n ".join([t[0] for t in sentences_with_orientation["neg"]]),
                1, rank_summarizer)
        else:
            option["neg_summary"] = []

        options.append(option)

    options = sorted(options, key=lambda v: v["neg"] + v["pos"], reverse=True)
    return options
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

sentence = input('Enter the senntence for analysis :')

analyzer = SentimentIntensityAnalyzer()
sentiment = analyzer.polarity_scores(sentence)

if sentiment['compound'] >= 0.05:
    sentence_category = 'Positive'
elif -0.05 <= sentiment['compound'] < 0.05:
    sentence_category = 'Neutral'
else:
    sentence_category = 'Negative'

sentence_data = dict()

print('This sentence is:', sentence_category)
print(dict(sentiment))
Example #38
0
import sys
from decimal import Decimal
sys.path.append("../")
import numpy as np
from config import (consumer_key, 
                    consumer_secret, 
                    access_token, 
                    access_token_secret)


# In[114]:


# Import and Initialize Sentiment Analyzer
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()


# In[115]:


# Setup Tweepy API Authentication
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())


# In[116]:


# Target Search Term
Example #39
0
class TextAnalysis(AbstractUtil):
    def __init__(self):
        self.analyzer = SentimentIntensityAnalyzer()
        # self.test()

    def analyse(self,  text):
        # The compound score is computed by summing the valence scores of each word in the lexicon, adjusted according
        # to the rules, and then normalized to be between -1 (most extreme negative) and +1 (most extreme positive).
        # https://github.com/cjhutto/vaderSentiment
        return self.analyzer.polarity_scores(text)["compound"]

    # returns the article object and the analysis result
    def analyse_web_page_article(self, url):
        article = Article(url)
        article.download()
        article.parse()
        return article, self.analyse(article.text)

    # return a list of high influential value websites
    @staticmethod
    def get_high_value_websites():
        return [
            "https://www.youtube.com"
                ]

    @staticmethod
    def is_analysable_url(url):
        url_ending = str(url).split(".")[-1]
        return url_ending.lower() not in IMAGE_ENDINGS

    # official account tweets that can be used for testing purposes
    def test(self):
        texts = [
            "So excited at what I am working on for the future.  I don’t get to talk about what I am actively doing on a daily basis because it’s far ahead of our messaging but I am beyond excited about it! #substratum $sub",
            "Have you read about VeChain and INPI ASIA's integration to bring nanotechnology for digital identity to the VeChainThor blockchain? NDCodes resist high temperature, last over 100 years, are incredibly durable and invisible to the naked eye",
            "Crypto market update: BTC holds near $9K, ETH rising over $640, BCH grows 85% on the week",
            "Extremely excited & proud to announce that #Substratum Node is NOW Open Source! https://github.com/SubstratumNetwork/SubstratumNode …#NetNeutrality $SUB #cryptocurrency #bitcoin #blockchain #technology #SubSavesTheInternet",
            "A scientific hypothesis about how cats, infected with toxoplasmosis, are making humans buy Bitcoin was presented at last night's BAHFest at MIT.",
            "Net Neutrality Ends! Substratum Update 4.23.18",
            "One more test from @SubstratumNet for today. :)",
            "Goldman Sachs hires crypto trader as head of digital assets markets",
            "Big news coming! Scheduled to be 27th/28th April... Have a guess...😎",
            "A great step to safer #exchanges: @WandXDapp Joins REMME’s 2018 Pilot Program for testing functionality of certificate-based signup and login for end users. https://medium.com/remme/wandx-joins-remmes-2018-pilot-program-588379aaea4d … #nomorepasswords #blockchain #crypto $REM"
            "omeone transferred $99 million in litecoin — and it only cost them $0.40 in fees. My bank charges me a hell of a lot more to transfer a hell of a lot less. Can we hurry up with this crypto/blockchain revolution I'm tired of paying fees out of my ass to a bunch of fat cats",
            "This week's Theta Surge on http://SLIVER.tv  isn't just for virtual items... five PlayStation 4s will be given out to viewers that use Theta Tokens to reward the featured #Fortnite streamer! Tune in this Friday at 1pm PST to win!",
            "The European Parliament has voted for regulations to prevent the use of cryptocurrencies in money laundering and terrorism financing. As long as they have good intention i don' t care.. but how much can we trust them??!?!"
            "By partnering with INPI ASIA, the VeChainThor Platform incorporates nanotechnology with digital identification to provide solutions to some of the worlds most complex IoT problems.",
            "Thanks to the China Academy of Information and Communication Technology, IPRdaily and Nashwork for organizing the event.",
            "Delivered a two hour open course last week in Beijing. You can tell the awareness of blockchain is drastically increasing by the questions asked by the audience. But people need hand holding and business friendly features to adopt the tech.",
            "Introducing the first Oracle Enabler tool of the VeChainThor Platform: Multi-Party Payment Protocol (MPP).",
            "An open letter from Sunny Lu (CEO) on VeChainThor Platform.",
            "VeChain has finished the production of digital intellectual property services with partner iTaotaoke. This solution provides a competitive advantage for an industry in need of trust-free reporting and content protections.#GoVeChain",
            "Special thanks to @GaboritMickael to have invited @vechainofficial to present our solution and make a little demo to @AccentureFrance",
            "VeChain’s COO, @kfeng027, is invited to ‘Crypto Media Collection Vo.1’ held at DeNA’s campus by Coinjinja in Tokyo, one of the largest cryptocurrency information platforms. Kevin’s speech begins at 16:35 UTC+9, livestreamed via https://ssl.twitcasting.tv/coinjinja ",
            "VeChain will pitch their solutions potentially landing a co-development product with LVMH.  In attendance will be CEOs Bill McDermott (SAP), Chuck Robbins (CISCO), Ginni Rometty (IBM), and Stephane Richard (Orange) as speakers -",
            "As the only blockchain company selected, VeChain is among 30 of 800+ hand-picked startups to compete for the second edition of the LVMH Innovation Award. As a result, VeChain has been invited to join the Luxury Lab LVMH at Viva Technology in Paris from May 24-26, 2018.",
            "VeChain to further its partnership with RFID leader Xiamen Innov and newly announced top enterprise solution provider CoreLink by deploying a VeChainThor enterprise level decentralized application - AssetLink.",
            "Today, a group of senior leaders from TCL's Eagle Talent program visited the VeChain SH office. @VeChain_GU demonstrated our advanced enterprise solutions and it's relation to TCL's market. As a result, we're exploring new developments within TCL related to blockchain technology.",
            "VeChain announces a partnership with eGrid, a leading publicly listed ERP, SCM and CRM solution provider to synergistically provide comprehensive blockchain technology backing for a significant portion of China’s automobile industry.",
            "We are glad to be recognized as Top 10 blockchain technology solution providers in 2018. outprovides a platform for CIOs and decision makers to share their experiences, wisdom and advice. Read the full version article via",
            "Talked about TOTO at the blockchain seminar in R University of Science and Technology business school last Saturday. It covered 3000 MBA students across business schools in China."
        ]
        for text in texts:
            print(str(self.analyse(text)) + " => "+str(DecoderEncoder.encode_into_bytes(text)))