def positiveSentimentDetector(descriptions): # Create a SentimentIntensityAnalyzer object. sid_obj = SIA() # polarity_scores method of SentimentIntensityAnalyzer # oject gives a sentiment dictionary. # which contains pos, neg, neu, and compound scores. sentiment_dict = sid_obj.polarity_scores(descriptions) positiveScore = sentiment_dict["pos"] * 100 return positiveScore
def get_sentiment(df, series=str): # initialize sentiment classifier sia = SIA() # get sentiment sentiment = df[series].apply(sia.polarity_scores) # create sentiment df sentiment = pd.DataFrame(sentiment.tolist()) # merge sentiment with your df return df.merge(sentiment, how='left', left_index=True, right_index=True)
def searchHeadlines(a1): t = Article(a1) try: t.download() t.parse() t.nlp() s1 = SIA() ps = s1.polarity_scores(t.summary) a1 = a1.replace('https://www.google.com/url?q=https://', '') while (a1.find('https://') != -1): a1 = a1.replace('https://', '') while (a1.find('www') != -1): a1 = a1.replace('www', '') d = a1.split('.') if (d[0] == 'www' or d[0] == ''): n = 1 else: n = 0 string = '' if (len(d[n]) > 1): string = d[n] n = n + 1 while ('co' not in d[n] and 'com' not in d[n] and 'in' not in d[n]): if len(d[n]) > 2 and '%' not in d[n]: string = string + ' ' + d[n] n = n + 1 return string, ps['compound'] except: return '', 0
def get_sentiment(df, series=str): # initialize sentiment classifier sia = SIA() # get sentiment sentiment = df[series].apply(sia.polarity_scores) # create sentiment df sentiment = pd.DataFrame(sentiment.tolist()) # merge sentiment with your df df = df.merge(sentiment, how='left', left_index=True, right_index=True) df['sentiment'] = df['compound'].apply(categorize_sentiment) df['sentiment'] = pd.Categorical(df['sentiment']) binary_sentiment = df['sentiment'].str.get_dummies() df = df.merge(binary_sentiment, how='left', left_index=True, right_index=True) return df
#import logging #logging.basicConfig(level=logging.DEBUG) # Initialise the PCA9685 using the default address (0x40). pwm = Adafruit_PCA9685.PCA9685() # Alternatively specify a different address and/or bus: #pwm = Adafruit_PCA9685.PCA9685(address=0x41, busnum=2) # Configure min and max servo pulse lengths servo_min = 600 # Min pulse length out of 4096 servo_immigration = 1000 # Max pulse length out of 4096 servo_women = 2000 servo_tech = 2500 sid = SIA() # Helper function to make setting a servo pulse width simpler. def set_servo_pulse(self, channel, pulse): pulse_length = 1000000 # 1,000,000 us per second pulse_length //= 60 # 60 Hz print('{0}us per period'.format(pulse_length)) pulse_length //= 4096 # 12 bits of resolution print('{0}us per bit'.format(pulse_length)) pulse *= 1000 pulse //= pulse_length pwm.set_pwm(channel, 0, pulse) # Set frequency to 60hz, good for servos. pwm.set_pwm_freq(60)
def generate_sentiment_features(top_words, labels, X_train_ids, X_holdout_ids, corpus, i2s): """Create sentiment features Arguments - top_words: list of most informative or polarizing bias-words in corpus - labels: a dict mapping domain names to bias and credibility label information - X_train_ids: the set of row ids that are in the training set - X_holdout_ids: the set of row ids that are in the holdout/testing set - corpus: the set of news articles (documents) - i2s: a dict mapping integer vertex labels to string representations (domain names) Returns: - training_vector: a numpy ndarray containing sentiment scores for each word in top_words and each article in the training set - holdout_vector:a numpy ndarray containing sentiment scores for each word in top_words and each article in the holdout set - sentiment_stats_per_article: a dict mapping the context word to bias/credibility label information, article ID, and sentiment score - word2context: a dict mapping the context word to sentences in the corpus which include it """ sa = SIA() print('generating sentiment features...') # Get context word2context = {} sentiment_stats_per_article = {} training_feats = [] holdout_feats = [] # for each word in top_words, grab sentences that contain the word for w_idx, w in enumerate(top_words): training_feats.append([0] * len(X_train_ids)) holdout_feats.append([0] * len(X_holdout_ids)) sentiment_stats_per_article[w] = [] word2context[w] = create_context(w, corpus, i2s) # loop through each article first_pass = True first = True for line in word2context[w]: sdom = line['sdom'] label = labels[sdom]['bias'] article_id = line['article_ID'] # concatenate all text by label if labels[sdom]['bias'] in ["R"]: temp = ' '.join(line['sentences']) if first_pass: conservative_terms = [temp] first_pass = False else: conservative_terms = [ ' '.join([conservative_terms[0], temp]) ] if labels[sdom]['bias'] in ["L"]: temp = ' '.join(line['sentences']) if first: liberal_terms = [temp] first = False else: liberal_terms = [' '.join([liberal_terms[0], temp])] # remove shared terms from sentences and then compute sentiment score per article shared_terms = list( set(liberal_terms[0].split()) & set(conservative_terms[0].split())) for art in word2context[w]: article_id = art['article_ID'] temp = ' '.join(art['sentences']) art_leftovers = ' '.join( [word for word in temp.split() if word not in shared_terms]) score = sa.polarity_scores(art_leftovers)['compound'] sentiment_stats_per_article[w].append({ 'article_id': article_id, 'bias': labels[sdom]['bias'], 'cred': labels[sdom]['cred'], 'sentiment_score': score }) # check that article is in labeled training dataset if article_id in X_train_ids: idx = X_train_ids.index(article_id) training_feats[w_idx][idx] = score # check that article is in labeled holdout dataset if article_id in X_holdout_ids: idx = X_holdout_ids.index(article_id) holdout_feats[w_idx][idx] = score # # replace articles without context word with label average # avg_lib_score =np.mean([line['sentiment_score'] for line in sentiment_stats_per_article[w] if line['bias'] in ["L","LC"]]) # avg_conserv_score = np.mean([line['sentiment_score'] for line in sentiment_stats_per_article[w] if line['bias'] in ["R","RC"]]) # # zero_idx = np.where(training_feats==0)[0] # lib_idx = np.where() # training_feats = np.array(training_feats) # training_feats[zero_idx] # for idx in zero_idx: # if # training_feats[idx] = avg_lib_score # transform list of lists into numpy arrays so that each column is feature for word 'w' training_vector = np.transpose(np.array(training_feats)) holdout_vector = np.transpose(np.array(holdout_feats)) return training_vector, holdout_vector, sentiment_stats_per_article, word2context
## Input Chaperone 'results' data into dataframe ## Output the same Chaperone data as a series of floating point numbers NUMER_DATA = ['D', 'E', 'F', 'G', 'AA', 'AB', 'AF', 'AR', 'AS'] CAT_ORD_DATA = [ 'H', 'I', 'O', 'AC', 'AD', 'AT', 'AU', 'AV', 'AW', 'AX', 'AY', 'AZ' ] BIN_POS = ['J', 'K', 'L', 'M', 'R', 'S', 'T', 'U', 'AI'] BIN_PRES = ['W', 'X'] COMM_DATA = ['AE', 'BA'] ROLES = ['mentor', 'expert', 'tech', 'judge'] NLO, NHI = 1, 4 wlo, wmed, whi = 'low', 'med', 'high' chp_sheet = 'Startup-O Chaperone Evaluation Sheet- 1 MAR 2018.csv' res, rnk = 'results', 'Ranking test' sia_ana = SIA() qs = [ 'Does the management believe in building a strong in-house tech team ?', 'Relevant Industry/market background of team', 'How well can team execute their presented plans ?', 'What is the level of understanding and clarity of the financial projections by the team ?', 'Additional Comments', 'Potential Execution Risks', 'Are the hiring plans relevant to the roadmap and business growth ?', 'Does he understand the business space clearly or is he just a techie ?', 'Go to market plan strength', 'What is the level of uniqueness in the proposition for the venture?', 'Is it built using a relevant tech stack ? Are they using relevant technologies to build the product ?', 'Do the tech team (and biz team) understand and follow a form of agile / lean SDLC ?', 'How high is the business momentum and pipeline of business ?', 'Is the team sufficiently staffed ?',
# -*- coding: utf-8 -*- """ Created on Fri Jun 19 17:09:45 2020 @author: acibi """ import pandas as pd from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer as SIA analyser = SIA() def readdata(): return pd.read_csv("reuters_data1.csv") # Sentiment Analyzer Scores def sas(sentence, sentiment): score = analyser.polarity_scores(sentence) return score[sentiment] def sentiment_lister(sentiment, df): trlist = list() for sentence in df['processed_header']: trlist.append(sas(sentence,sentiment)) return trlist def add_sentiments(df): df['neg'] = sentiment_lister('neg', df) df['neu'] = sentiment_lister('neu', df) df['pos'] = sentiment_lister('pos', df) df['compound'] = sentiment_lister('compound', df)
import re #base query url #maximum size is 1000, increasing will not change number of comments returned #score threshold set to greater than 1 to avoid problems with normalizing sentiment base_url = "https://api.pushshift.io/reddit/search/comment/" + \ "?q={}&" + \ "{}" + \ "after={}&" + \ "before={}&" + \ "score=>1&" + \ "sort_type=score&" + \ "sort=desc&" + \ "size=1000" analyzer = SIA() KEYWORD = "musk" SUBREDDITS = ["all", "politics", "the_donald", "space"] #formats the desired subreddits to match REST query def format_subreddit(sub): if sub == "all": return "" return "subreddit={}&".format(sub) #converts unix time to string def utc_to_str(utc_time): utc_time = int(utc_time)
#Removing Irrelevant words tk_list = [] for item in good_list: grd_list = [] for ele in item: if ele not in useless_words: grd_list.append(ele) tk_list.append(grd_list) #Raw headlines to enter into our dataframe and perform sentiment analysis res = [' '.join(ele) for ele in new_list] #Sentiment Intensity Analyzer sia = SIA() data = [] for f in res: #Get polarity score result = sia.polarity_scores(f) #Create a new column headline result['headline'] = f data.append(result) #Make it into pandas dataframe df = pd.DataFrame.from_records(data) #Order the column df = df[['headline', 'pos', 'neu', 'neg', 'compound']] #Keep one similar headline and drop others df.drop_duplicates(subset="headline", keep='first', inplace=True) #Sorting in Ascending Order to Get Top 3 Lowest Sentiment digi_dat = df.sort_values(by=['compound'])
print(vsize) v2i = dict([(key, i) for i, key in enumerate(vocab)]) site_raw_tc = {} site_raw_ts = {} bvsize = len(bivocab) print(bvsize) bv2i = dict([(key, i) for i, key in enumerate(bivocab)]) site_raw_bc = {} site_raw_bs = {} #Build arrays for every site, containing counts of the terms and the average sentiment # Sentiment is collected for each term by adding the article's sentiment every #time the term is detected, then dividing by the term count to get the mean sa = SIA() for sdom in tqdm(arts.keys()): #for (sdom, doc) in tqdm(arts): doc = arts[sdom] mycat = s2l[sdom] if mycat in cats: if sdom not in site_raw_tc: site_raw_tc[sdom] = np.zeros(vsize) site_raw_ts[sdom] = np.zeros(vsize) site_raw_bc[sdom] = np.zeros(bvsize) site_raw_bs[sdom] = np.zeros(bvsize) c = sa.polarity_scores(doc.text)['compound'] for word in doc[:-1]:
Purpose = [] for i in result: Name.append(i[0][5:]) Purpose.append(i[1][9:]) df9 = pd.DataFrame({"Name": Name, "Purpose": Purpose}) # -----------file = Company Details.txt---------# f6 = open("Company Details.txt", "r").readlines() Name = [] Purpose = [] for i in f6[1:]: if i[:5] == "Name:": Name.append(i[5:-1]) elif i[:8] == "Purpose:": Purpose.append((i[8:])) df10 = pd.DataFrame({"Name": Name, "Purpose": Purpose}) frames = [df1, df2, df3, df4, df5, df6, df7, df8, df9, df10] df = pd.concat(frames) df = df.reset_index() df.drop("index", axis=True, inplace=True) df.drop_duplicates(subset="Name") result = [] for row in df["Purpose"]: pol_score = SIA().polarity_scores(row) pol_score["Purpose"] = row result.append(pol_score) df["Score"] = pd.DataFrame(result)["compound"] df1 = df.sort_values(by="Score", ascending=False)