def analyze_personality(twitter_name): ''' twitter_name::type -> [str] Function to determine the personality of user base on a csv file containing their most recent tweets. ''' global personality_dict data = [] csv_name = twitter_name + '_tweets.csv' if not os.path.isfile(csv_name): get_all_tweets(twitter_name) with open(csv_name) as file: reader = csv.reader(file) for row in reader: if len(row) > 0: # checks if the content of the tweet is a link to an article/image text = row[2][2:-1] if "https" in text: continue data.append(text) coae = ["conscientiousness", "openness", "agreeableness", "extraversion"] for i in data: mini_dic = indicoio.personality(i) result = max(mini_dic.keys(), key=(lambda k: mini_dic[k])) if result in coae: increment_personality(result) personality = max(personality_dict.keys(), key=(lambda k: personality_dict[k])) clear(coae) return personality
def main(): if len(sys.argv) != 3: return inname = sys.argv[1] outname = sys.argv[2] with open(inname, mode='r') as inFile: tweets = json.load(inFile) count = 0 for tweet in tweets: result = indicoio.personality(tweet['text']) tweet['extraversion'] = result['extraversion'] tweet['openness'] = result['openness'] tweet['agreeableness'] = result['agreeableness'] tweet['conscientiousness'] = result['conscientiousness'] count += 1 if count % 100 == 0: print(count) with open(outname, 'w') as outfile: json.dump(tweets, outfile) with open(outname, 'w') as outfile: json.dump(tweets, outfile)
def getResult(strArray): sent = indicoio.sentiment(strArray) pers = indicoio.personality(strArray) poli = indicoio.political(strArray) keyw = indicoio.keywords(strArray) result = dict([("sentiment", sent), ("personality", pers), ("political", 4098), ("keywords", keyw)]) return result
def test_personalities(self): test_string = "I love my friends!" response = personality(test_string) categories = ['extraversion', 'openness', 'agreeableness', 'conscientiousness'] self.assertTrue(isinstance(response, dict)) self.assertIsInstance(response['extraversion'], float) for category in categories: assert category in response.keys()
def test_batch_personality(self): test_string = "I love my friends!" response = personality([test_string,test_string]) categories = ['extraversion', 'openness', 'agreeableness', 'conscientiousness'] self.assertTrue(isinstance(response, list)) self.assertIsInstance(response[0]["extraversion"], float) for category in categories: assert category in response[0].keys() self.assertEqual(response[0]["extraversion"], response[1]["extraversion"])
def analyze_tweets_personality(self): try: self.personality_stats = Factor( indicoio.personality( self.person.all_text_as_one().content).items(), 'Personality stats') self.plotter.add_factor(self.personas_stats) except IndicoError: raise PersonAnalyzerException( 'Error while fetching data from indicoio')
def test_personalities(self): test_string = "I love my friends!" response = personality(test_string) categories = [ 'extraversion', 'openness', 'agreeableness', 'conscientiousness' ] self.assertTrue(isinstance(response, dict)) self.assertIsInstance(response['extraversion'], float) for category in categories: assert category in response.keys()
def test_batch_personality(self): test_string = "I love my friends!" response = personality([test_string, test_string]) categories = [ 'extraversion', 'openness', 'agreeableness', 'conscientiousness' ] self.assertTrue(isinstance(response, list)) self.assertIsInstance(response[0]["extraversion"], float) for category in categories: assert category in response[0].keys() self.assertEqual(response[0]["extraversion"], response[1]["extraversion"])
def get_personality(self): personality_scores = [0, 0, 0, 0] personality_dict = indicoio.personality(self.tweet_text) for key, value in personality_dict.iteritems(): if key == 'extraversion': personality_scores[0] += value elif key == 'openness': personality_scores[1] += value elif key == 'agreeableness': personality_scores[2] += value elif key == 'conscientiousness': personality_scores[3] += value return personality_scores
def analyze_text(text_tweets): personality_scores_list = [] emotion_scores_list = [] personality = indicoio.personality(text_tweets) emotion = indicoio.emotion(text_tweets) for x in personality: personality_scores_list.append(x) for y in emotion: emotion_scores_list.append(y) return personality_scores_list, emotion_scores_list
def q1(): user_input = input("My idea of a fun friday night is ___") print "Your input: " + str(user_input) emotion = indicoio.emotion(user_input) personality = indicoio.personality(user_input) personas = indicoio.personas(user_input) pprint(emotion) e_max = max(emotion, key=emotion.get) personas_max = max(personas, key=personas.get) personality_max = max(personality, key=personality.get) print "Congradulations, your emotion is " + str( e_max) + ", your personality is " + str( personality_max) + ", and your persona is " + str(personas_max)
def spam_filter(msg=input("Enter message = ")): msg = TextBlob(msg) current_lang = msg.detect_language() print("Language of this message is = ", current_lang) if (current_lang != 'en'): msg.translate(to='en') else: msg.correct() X_dtm = vect.fit_transform(X) test_dtm = vect.transform([str(msg)]) model.fit(X_dtm, y) result = model.predict(test_dtm) prob = model.predict_proba(test_dtm) if result == [1]: print("SPAM ALERT!") else: print("HAM") predsa = clf.predict(vectsa.transform([str(msg)])) if predsa == [1]: print("Positive Feeling") elif predsa == [0]: print("Negative Feeling") else: print("Can't analyze ur Felling...Try API ? ....") senti = indicoio.sentiment_hq(str(msg)) print("Online Help , Positivity of Incoming Message = ", senti) p = indicoio.personality(str(msg)) d = [] d.append([ p['agreeableness'], p['conscientiousness'], p['extraversion'], p['openness'], msg.sentiment.polarity, msg.sentiment.subjectivity ]) traits = pd.DataFrame(d, columns=[ 'agreeableness', 'conscientiousness', 'extraversion', 'openness', 'polarity', 'subjectivity' ]) print(profanity.contains_profanity(str(msg)), " Profanity") print(profanity.censor(str(msg))) print("Summarizing this message =", msg.noun_phrases) percent = pd.DataFrame(prob, columns=["% HAM", "%SPAM"]) print(traits) print(percent)
def getPersonality(self, strArray): result = indicoio.personality(strArray) extraversion = [] openness = [] agreeableness = [] conscientiousness = [] for things in result: extraversion.append(things["extraversion"]) openness.append(things["openness"]) agreeableness.append(things["agreeableness"]) conscientiousness.append(things["conscientiousness"]) t = [extraversion, openness, agreeableness, conscientiousness] return [extraversion, openness, agreeableness, conscientiousness]
def process_response(response): print(str(response)) questionType = response.question_id #questionType = Question.query.get(response.question_id); #print(str(question)); #questionType = QuestionType.query.get(question.type_id); print("processing a " + str(questionType)) result = {} user_input = response.response_text if (questionType == "Personality"): result = indicoio.personality(user_input) elif (questionType == "Emotion"): result = indicoio.emotion(user_input) elif (questionType == "Persona"): result = indicoio.personas(user_input) pprint(result) max_result = max(result, key=result.get) print("max response: " + max_result) return "" + max_result
def getOverallResult(self, strArray): result = indicoio.personality(strArray) extraversion = [] openness = [] agreeableness = [] conscientiousness = [] for things in result: extraversion.append(things["extraversion"]) openness.append(things["openness"]) agreeableness.append(things["agreeableness"]) conscientiousness.append(things["conscientiousness"]) result = indicoio.political(strArray) libertarian = [] green = [] liberal = [] conservative = [] for things in result: libertarian.append(things["Libertarian"]) green.append(things["Green"]) liberal.append(things["Liberal"]) conservative.append(things["Conservative"]) result = indicoio.sentiment(strArray) t = [ result, libertarian, green, liberal, conservative, extraversion, openness, agreeableness, conscientiousness, ] return t
def gimme_the_goods(text, tag_count=3, persona_count=3): # Consume some of that api for analysis sentiment = indicoio.sentiment(text) # TODO figure out a better way to handle this bug political = indicoio.political(text[0:1100]) personality = indicoio.personality(text) personas = indicoio.personas(text) tags = indicoio.text_tags(text, top_n=tag_count) # Sort the personas to grab top ones top_personas = dict( sorted(personas.items(), key=operator.itemgetter(1), reverse=True)[:persona_count]) # Truncate the values to 3 decimals for cleanliness roundness = 3 sentiment = truncate_values(sentiment, roundness) political = truncate_values(political, roundness) personality = truncate_values(personality, roundness) top_personas = truncate_values(top_personas, roundness) tags = truncate_values(tags, roundness) # Rearrange the personas a bit final_personas = [] for key, value in top_personas.items(): final_personas.append({ 'type': persona_mapping[key], 'name': key, 'value': value, }) return_dict = { 'sentiment': sentiment, 'political': political, 'personality': personality, 'personas': final_personas, 'tags': tags } return return_dict
def gimme_the_goods(text, tag_count=3, persona_count=3): # Consume some of that api for analysis sentiment = indicoio.sentiment(text) # TODO figure out a better way to handle this bug political = indicoio.political(text[0:1100]) personality = indicoio.personality(text) personas = indicoio.personas(text) tags = indicoio.text_tags(text, top_n=tag_count) # Sort the personas to grab top ones top_personas = dict(sorted(personas.items(), key=operator.itemgetter(1), reverse=True)[:persona_count]) # Truncate the values to 3 decimals for cleanliness roundness = 3 sentiment = truncate_values(sentiment, roundness) political = truncate_values(political, roundness) personality = truncate_values(personality, roundness) top_personas = truncate_values(top_personas, roundness) tags = truncate_values(tags, roundness) # Rearrange the personas a bit final_personas = [] for key, value in top_personas.items(): final_personas.append({ 'type': persona_mapping[key], 'name': key, 'value': value, }) return_dict = { 'sentiment': sentiment, 'political': political, 'personality': personality, 'personas': final_personas, 'tags': tags } return return_dict
def main_func(name): global data, personality csv_name = name + '_tweets.csv' if not os.path.isfile(csv_name): get_all_tweets(name) with open(csv_name) as file: reader = csv.reader(file) count = 0 for row in reader: if len(row) > 0: text = row[2][2:-1] #print(text) if "https" in text: continue data.append(text) count += 1 if count > 10: break options = { 'conscientiousness': cons, 'openness': ope, 'agreeableness': agr, 'extraversion': extra } t0 = time() for i in data: mini_dic = indicoio.personality(i) result = max(mini_dic.keys(), key=(lambda k: mini_dic[k])) if result in options: options[result]() else: pass the_personality = max(personality.keys(), key=(lambda k: personality[k])) clear() return the_personality
def initialize_events_by_personality(): ''' Function to scrape the most recent events from Waterloo Open Data and to use sentiment analysis to determine the personality trait of the events based on the given description. ''' now = datetime.datetime.now() event_info, all_events, event_names, processed = [], [], [], [] # The personality that are posible due to sentiment analysis algorithm: coae = ["conscientiousness", "openness", "agreeableness", "extraversion"] url = 'http://maps.waterloo.ca/OpenData/events.csv' with urllib.request.urlopen(url) as file: reader = csv.reader(file) next(reader) for index, row in enumerate(reader): # skips over empty rows of data if len(row) > 0: # Clean up: Category, Event Description and Name that contain embedded html tags Category = row[5].replace('<p>', "").replace('</p>', "").replace(""", "") Description = row[7].replace('<p>', "").replace('</p>', "").replace( """, "") Name = row[13].replace('<p>', "").replace('</p>', "").replace(""", "") # Clean up date information dateArr = row[2][:10].split("/") Month = int(dateArr[0]) Day = int(dateArr[1]) Year = int(dateArr[2]) Date = (Day, Month, Year) # If the event has already occured, skip the event if not (Year >= now.year and Month >= now.month and Day >= now.day): continue try: event_info.extend([name, category, description, Date]) # If the event is already accunted for, clear small_list and go to ext row if name in event_names: event_info = [] continue else: event_names.append(name) all_events.append(event_info) event_info = [] except UnicodeEncodeError: pass # As this is a csv file, a stoping row should be specified or reader will iterate over empty rows if index > 600: break for event in all_events: try: if event[2] == '': # if no description is present continue # Use sentiment analysis to determine personality of event mini_dic = indicoio.personality(str(event[2])) result = max(mini_dic.keys(), key=(lambda k: mini_dic[k])) # _ -> eventName, emotion, type of event, description, date tuple(day month year) _ = (event[0], result, event[1], event[2], event[3]) processed.append(_) except UnicodeEncodeError: pass def createCSV(): name = coae.pop(-1) label = name + ".csv" # Create for CSV files containing the events corresponding to the personality types with open(label, 'w', newline='') as f: thewriter = csv.writer(f) thewriter.writerow([ "Event Name", "Personality Type", "Type of Event", "Description", "Day", "Month", "Year" ]) for row in processed: if row[1] == name: thewriter.writerow([ row[0], row[1], row[2], row[3], row[4][0], row[4][1], row[4][2] ]) # Initialize threads to create an events CSV file for each personality t1 = threading.Thread(name='con', target=createCSV) t2 = threading.Thread(name='open', target=createCSV) t3 = threading.Thread(name='agre', target=createCSV) t4 = threading.Thread(name='extra', target=createCSV) threads = [t1] + [t2] + [t3] + [t4] for x in threads: x.start() for x in threads: x.join()
def personality_measure(self, text): return indicoio.personality(text)
count += 1 except UnicodeEncodeError: pass if count > 600: break processed = [] for el in bigger_list: try: #print("THIS IS ITTTTT '" +str(el[2]) + "'") if el[2] == '': continue mini_dic = indicoio.personality(str(el[2])) result = max(mini_dic.keys(), key=(lambda k: mini_dic[k])) #el.append[result] # eventName, emotion, type of event, description, date tuple(day month year) # index 0, 1, 2, 3, 4[0], 4[1], 4[2] _in = (el[0], result, el[1], el[2], el[3]) # pairs.append(el[0]) processed.append(_in) print(_in, '\n') except UnicodeEncodeError: pass print(len(processed)) def createCSV():
st._json['entities']['urls'][i]['expanded_url']) urls.append((count - 15.12200866779725) / 11.892945461889907) except: pass pers_values = [] for elem in statuses: txt = elem.text words = txt.split(' ') for word in words[::-1]: if '@' in word: words.remove(word) elif 'http' in word: words.remove(word) if words: try: pers = indicoio.personality(' '.join(words)) pers_values.append(pers) except Exception as e: print(e) op = np.mean([pers_values[i]['openness'] for i in range(len(pers_values))]) ag = np.mean( [pers_values[i]['agreeableness'] for i in range(len(pers_values))]) ex = np.mean( [pers_values[i]['extraversion'] for i in range(len(pers_values))]) co = np.mean( [pers_values[i]['conscientiousness'] for i in range(len(pers_values))]) url = np.mean(urls) bl_info = listing.get_fake_site_info(website[0]) if bl_info == 0: person = 1 else:
db="nytimes") cur = db.cursor() sentimentValues = [] politicalValues = [] personalityValues = [] emotionValues = [] start = 3474 itr = start for i in range(start, num + start): print("starting chunk " + str(itr) + " !") itr += 1 curText = allText[i][:1000] sentimentValues = indicoio.sentiment_hq(curText) politicalValues = indicoio.political(curText) personalityValues = indicoio.personality(curText) emotionValues = indicoio.emotion(curText) abst = repr(allText[i]).replace("'", '').replace('"', '') SQLline = 'INSERT INTO `article`(`abst`, `url`, `sent`, `serv`, `gree`, `libe`, `libt`, `agre`, `cons`, `extr`, `open`, `ange`, `fear`, `joyy`, `sadd`, `surp`) VALUES ("' + abst + '" ,"' + repr( articles[i]["url"]) + '",' + str(sentimentValues) + ',' + str( politicalValues["Conservative"] ) + ',' + str(politicalValues["Green"]) + ',' + str( politicalValues["Liberal"] ) + ',' + str(politicalValues["Libertarian"]) + ',' + str( personalityValues["agreeableness"] ) + ',' + str(personalityValues["conscientiousness"]) + ',' + str( personalityValues["extraversion"]) + ',' + str( personalityValues["openness"]) + ',' + str( emotionValues["anger"]) + ',' + str( emotionValues["fear"]) + ',' + str( emotionValues["joy"]) + ',' + str(
for word in nltk.word_tokenize(sent.lower()): if word in word_frequencies.keys(): if len(sent.split(' ')) < 30: if sent not in sentence_scores.keys(): sentence_scores[sent] = word_frequencies[word] else: sentence_scores[sent] += word_frequencies[word] summary_sentences = heapq.nlargest(len(sentences), sentence_scores, key=sentence_scores.get) video_info["summary_variable"] = summary_sentences politicalValues = indicoio.political(text) personalityValues = indicoio.personality(text) emotionValues = indicoio.emotion(text) video_info["political"] = politicalValues video_info["personality"] = personalityValues video_info["emotion"] = emotionValues video_info["url"] = url class MyEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, numpy.integer): return int(obj) elif isinstance(obj, numpy.floating): return float(obj) elif isinstance(obj, numpy.ndarray):
def info(yt_url): video_info = {} url = "https://www.youtube.com/watch?v=" + yt_url yt = YouTube(url) video_info["timestamped"] = [] # get the audio file a = yt.captions.get_by_language_code('en') caps = a.generate_srt_captions() caps = caps.split("\n\n") caps = [i.split("\n") for i in caps] text = "" for i in caps: for j in i[2:]: text += j line = " ".join(i[2:]) line = re.sub(r"<[^<]+?>", '', line) try: video_info["timestamped"].append([ i[1].split(" --> ")[0], i[1].split(" --> ")[1], line ]) except: pass text = re.sub(r"<[^>]*>", " ", text) text = re.sub(r'\s+', ' ', text) text = re.sub(r"<[^<]+?>", '', text) text = text.replace("...", ".") text = text.replace("…", "") text = text.replace(".", ". ") text = re.sub(r'\s+', ' ', text) sentences = nltk.sent_tokenize(text) video_info["full_transcript"] = text stopwords = nltk.corpus.stopwords.words('english') word_frequencies = {} for word in nltk.word_tokenize(text): if word not in stopwords: if word not in word_frequencies.keys(): word_frequencies[word] = 1 else: word_frequencies[word] += 1 maximum_frequency = max(word_frequencies.values()) for word in word_frequencies.keys(): word_frequencies[word] = (word_frequencies[word] / maximum_frequency) sentence_scores = {} for sent in sentences: for word in nltk.word_tokenize(sent.lower()): if word in word_frequencies.keys(): if len(sent.split(' ')) < 30: if sent not in sentence_scores.keys(): sentence_scores[sent] = word_frequencies[word] else: sentence_scores[sent] += word_frequencies[word] summary_sentences = heapq.nlargest(len(sentences), sentence_scores, key=sentence_scores.get) video_info["summary_variable"] = summary_sentences politicalValues = indicoio.political(text) personalityValues = indicoio.personality(text) emotionValues = indicoio.emotion(text) video_info["political"] = politicalValues video_info["personality"] = personalityValues video_info["emotion"] = emotionValues video_info["sentiment"] = indicoio.sentiment(text) video_info["url"] = url class MyEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, numpy.integer): return int(obj) elif isinstance(obj, numpy.floating): return float(obj) elif isinstance(obj, numpy.ndarray): return obj.tolist() else: return super(MyEncoder, self).default(obj) return json.dumps(video_info, cls=MyEncoder)
def execute(USERNAME, target, refresh): r_data = io_helper.read_raw(USERNAME, target) og = sys.stdout fpath = io_helper.out_path(USERNAME, target) def analysis(raw='', limit=5, text='', percent=True): global meta_dict # print lines if input is a list of non-dicts # if input is list of dicts, merge dicts and resend to analysis if isinstance(raw, list): for item in raw: if not isinstance(item, dict): print(item) else: create_meta_dict(item) analysis(meta_dict, limit, text, percent) # if input is dict: print k, v pairs # optional args for return limit and description text if isinstance(raw, dict): print(text) ct = 0 for v in sorted(raw, key=raw.get, reverse=True): ct += 1 if ct > limit: break if isinstance(raw[v], float): if percent: per = r'%' else: per = '' print(" " + v, str(round(raw[v] * 100, 2)) + per) else: print(v, raw[v]) print() def create_meta_dict(item): # merge list of dicts into master dict global meta_dict meta_dict[item['text']] = item['confidence'] return meta_dict rClean = '' for i in range(len(r_data)): if r_data[i - 1] == '\\': rClean = rClean[:-1] if r_data[i] != "'": continue if r_data[i] == '*': rClean += ' ' else: rClean += r_data[i] r_data = rClean del rClean indicoio.config.api_key = keycheck.get_key() # Big 5 big5 = { 'text': "Big 5 personality inventory matches: ", "payload": indicoio.personality(r_data) } # Meyers briggs mbtiLabels = indicoio.personas(r_data) mbti_dict = { 'architect': 'intj', 'logician': 'intp', 'commander': 'entj', 'debater': 'entp', 'advocate': 'infj', 'mediator': 'infp', 'protagonist': 'enfj', 'campaigner': 'enfp', 'logistician': 'istj', 'defender': 'isfj', 'executive': 'estj', 'consul': 'esfj', 'virtuoso': 'istp', 'adventurer': 'isfp', 'entrepreneur': 'estp', 'entertainer': 'esfp' } def replace_mbti(): for k, v in mbtiLabels.items(): k = k.replace(k, mbti_dict[k]) yield k k = (list(replace_mbti())) v = map(lambda x: x, mbtiLabels.values()) payload = (dict(zip(k, v))) mbti = { 'text': "Most likely personalilty styles: ", "payload": payload, 'ct': 5, 'percent': True } # Political pol = { 'text': "Political alignments: ", "payload": indicoio.political(r_data, version=1) } # Sentiment sen = { 'text': "Sentiment: ", "payload": { 'Percent positive': indicoio.sentiment(r_data) }, 'ct': 3 } # Emotion emo = { 'text': "Predominant emotions:", "payload": indicoio.emotion(r_data), 'ct': 5 } # Keywords kw = {'text': "Keywords: ", "payload": indicoio.keywords(r_data), 'ct': 5} # Text tags tt = { 'text': "Text tags: ", "payload": indicoio.text_tags(r_data), 'ct': 10 } # Place pla = { 'text': "Key locations: ", 'payload': indicoio.places(r_data, version=2), 'ct': 3, 'percent': True } def Karma(USERNAME): import praw import collections kList = [] user_agent = ("N2ITN") r = praw.Reddit(user_agent=user_agent) thing_limit = 100 user = r.get_redditor(USERNAME) gen = user.get_submitted(limit=thing_limit) karma_by_subreddit = {} for thing in gen: subreddit = thing.subreddit.display_name karma_by_subreddit[subreddit] = ( karma_by_subreddit.get(subreddit, 0) + thing.score) for w in sorted(karma_by_subreddit, key=karma_by_subreddit.get, reverse=True): kList.append(str(w) + ': ' + str(karma_by_subreddit[w])) kList.insert(0, 'Karma by Sub') print("\n\t".join(kList[:10])) def show(results): # Accepts bag of dicts, or single dict if not isinstance(results, dict): for X in results: show(X) else: if results == pla and pla['payload'] == []: print("Not enough information to infer place of origin") print() else: i = results analysis(raw=i.get('payload', ''), limit=i.get('ct', 5), text=i.get('text', ''), percent=i.get('percent', True)) with open(fpath, 'w') as outtie: sys.stdout = outtie print(target + USERNAME) print() show([kw, pla, big5, emo, sen, pol, mbti, tt]) Karma(USERNAME) sys.stdout = og return
#CALL INDICO.IO EMOTION API import indicoio indicoio.config.api_key = 'ba420e48e2322e7e99b674c9d1d3a5d2' extraversion_array = [] openness_array = [] agreeableness_array = [] conscientiousness_array = [] error2 = [] for y in range(0, 3000): try: result = indicoio.personality(array[y]) data_string = json.dumps(result) decoded_json = json.loads(data_string) extraversion_result = str(decoded_json["extraversion"] * 100) openness_result = str(decoded_json["openness"] * 100) agreeableness_result = str(decoded_json["agreeableness"] * 100) conscientiousness_result = str(decoded_json["conscientiousness"] * 100) extraversion_array.append(extraversion_result) openness_array.append(openness_result) agreeableness_array.append(agreeableness_result) conscientiousness_array.append(conscientiousness_result) print "FINISH " + str(y) except KeyError:
def eventPersonality(str): return(indicoio.personality(str))
if len(row) > 0: text = row[2][2:-1] #print(text) data.append(text) count +=1 if count > 8: break personality = {'conscientiousness': 0, 'openness': 0, 'agreeableness': 0, 'extraversion':0} options = {'conscientiousness': cons, 'openness': ope, 'agreeableness': agr, 'extraversion':extra} for i in data: mini_dic = indicoio.personality(i) #result = max(mini_dic) if result in options: options[result]() else: pass #print('\n') print_count += 1 if print_count > 5: break print("Trump is a {} person".format(max(personality))) def cons(): global personality
def evaluate(self, data_list): gstart = time.time() tweets = [] articles = [] tweets_text = [] tweets_username = [] for _, data in enumerate(data_list): if isinstance(data, str): tweets.append( self.api.get_status(data.split("/")[-1], tweet_mode='extended')) else: tweets.append(data) for element in tweets: articles_urls = [] for _ in range(len(element._json['entities']['urls'])): articles_urls.append( element._json['entities']['urls'][_]['expanded_url']) tweets_text.append(element._json['full_text']) tweets_username.append(element._json['user']['id']) articles.extend(articles_urls) score = [] user_eval = [] article_eval = [] page_quality = [] urls = [] final = [] start = time.time() for us in tweets_username: statuses = self.api.user_timeline(us, count=50) pers_values = [] for elem in statuses: txt = elem.text words = txt.split(' ') for word in words[::-1]: if '@' in word: words.remove(word) elif 'http' in word: words.remove(word) if words: try: pers = indicoio.personality(' '.join(words)) pers_values.append(pers) except Exception as e: print(e) op = np.mean( [pers_values[i]['openness'] for i in range(len(pers_values))]) ag = np.mean([ pers_values[i]['agreeableness'] for i in range(len(pers_values)) ]) ex = np.mean([ pers_values[i]['extraversion'] for i in range(len(pers_values)) ]) co = np.mean([ pers_values[i]['conscientiousness'] for i in range(len(pers_values)) ]) print([op, ag, ex, co]) user_eval.append( self.personality_clf.predict_proba([[ag, co, ex, op]])[0][0]) print('personality: {}'.format(time.time() - start)) for element in articles: start = time.time() url = unshorten_url(element) urls.append(url) print("unshortening: {}".format(time.time() - start)) start = time.time() count = self.banner_counter.iframe_detector( url) + self.banner_counter.count_ads_th(url) page_quality.append( (count - 15.12200866779725) / 11.892945461889907) print("banner counter: {}".format(time.time() - start)) start = time.time() bl_info = listing.get_fake_site_info(url) if bl_info == 0: user_eval[0] = 1 print("blacklisting: {}".format(time.time() - start)) start = time.time() txt = scrape(url) print(txt) print("scraping: {}".format(time.time() - start)) start = time.time() features = ml.extract_features(txt) features.append(count) features = np.asarray(features).reshape(1, -1) print("svm: {}".format(time.time() - start)) start = time.time() res = self.clf.predict_proba(features) print("prediction: {}".format(time.time() - start)) article_eval.append(res[0][0]) final.append( self.log_reg.predict_proba( [[user_eval[0], article_eval[0], page_quality[0]]])[0][0]) print("total time: {}".format(time.time() - gstart)) print('article eval vs final score: {} vs {}'.format( article_eval[0], final[0])) score.append({ # "tweet_url": element, # string "article_url": urls[0], # string "page_quality": page_quality[0], # string "text_evaluation": article_eval[0], # string "source_reliability": user_eval[0], # string "final_score": article_eval[0] }) return json.dumps(score)
def execute(USERNAME, target, refresh): r_data = io_helper.read_raw(USERNAME, target) og = sys.stdout fpath = io_helper.out_path(USERNAME, target) def analysis(raw='', limit=5, text='', percent=True): global meta_dict # print lines if input is a list of non-dicts # if input is list of dicts, merge dicts and resend to analysis if isinstance(raw, list): for item in raw: if not isinstance(item, dict): print(item) else: create_meta_dict(item) analysis(meta_dict, limit, text, percent) # if input is dict: print k, v pairs # optional args for return limit and description text if isinstance(raw, dict): print(text) ct = 0 for v in sorted(raw, key=raw.get, reverse=True): ct += 1 if ct > limit: break if isinstance(raw[v], float): if percent: per = r'%' else: per = '' print(" " + v, str(round(raw[v] * 100, 2)) + per) else: print(v, raw[v]) print() def create_meta_dict(item): # merge list of dicts into master dict global meta_dict meta_dict[item['text']] = item['confidence'] return meta_dict rClean = '' for i in range(len(r_data)): if r_data[i - 1] == '\\': rClean = rClean[:-1] if r_data[i] != "'": continue if r_data[i] == '*': rClean += ' ' else: rClean += r_data[i] r_data = rClean del rClean indicoio.config.api_key = keycheck.get_key() # Big 5 big5 = {'text': "Big 5 personality inventory matches: ", "payload": indicoio.personality(r_data)} # Meyers briggs mbtiLabels = indicoio.personas(r_data) mbti_dict = { 'architect': 'intj', 'logician': 'intp', 'commander': 'entj', 'debater': 'entp', 'advocate': 'infj', 'mediator': 'infp', 'protagonist': 'enfj', 'campaigner': 'enfp', 'logistician': 'istj', 'defender': 'isfj', 'executive': 'estj', 'consul': 'esfj', 'virtuoso': 'istp', 'adventurer': 'isfp', 'entrepreneur': 'estp', 'entertainer': 'esfp' } def replace_mbti(): for k, v in mbtiLabels.items(): k = k.replace(k, mbti_dict[k]) yield k k = (list(replace_mbti())) v = map(lambda x: x, mbtiLabels.values()) payload = (dict(zip(k, v))) mbti = {'text': "Most likely personalilty styles: ", "payload": payload, 'ct': 5, 'percent': True} # Political pol = {'text': "Political alignments: ", "payload": indicoio.political(r_data, version=1)} # Sentiment sen = {'text': "Sentiment: ", "payload": {'Percent positive': indicoio.sentiment(r_data)}, 'ct': 3} # Emotion emo = {'text': "Predominant emotions:", "payload": indicoio.emotion(r_data), 'ct': 5} # Keywords kw = {'text': "Keywords: ", "payload": indicoio.keywords(r_data), 'ct': 5} # Text tags tt = {'text': "Text tags: ", "payload": indicoio.text_tags(r_data), 'ct': 10} # Place pla = { 'text': "Key locations: ", 'payload': indicoio.places(r_data, version=2), 'ct': 3, 'percent': True } def Karma(USERNAME): import praw import collections kList = [] user_agent = ("N2ITN") r = praw.Reddit(user_agent=user_agent) thing_limit = 100 user = r.get_redditor(USERNAME) gen = user.get_submitted(limit=thing_limit) karma_by_subreddit = {} for thing in gen: subreddit = thing.subreddit.display_name karma_by_subreddit[subreddit] = (karma_by_subreddit.get(subreddit, 0) + thing.score) for w in sorted(karma_by_subreddit, key=karma_by_subreddit.get, reverse=True): kList.append(str(w) + ': ' + str(karma_by_subreddit[w])) kList.insert(0, 'Karma by Sub') print("\n\t".join(kList[:10])) def show(results): # Accepts bag of dicts, or single dict if not isinstance(results, dict): for X in results: show(X) else: if results == pla and pla['payload'] == []: print("Not enough information to infer place of origin") print() else: i = results analysis( raw=i.get('payload', ''), limit=i.get('ct', 5), text=i.get('text', ''), percent=i.get('percent', True) ) with open(fpath, 'w') as outtie: sys.stdout = outtie print(target + USERNAME) print() show([kw, pla, big5, emo, sen, pol, mbti, tt]) # Karma(USERNAME) sys.stdout = og return