OAUTH_TOKEN = '' OAUTH_TOKEN_SECRET = '' TWITTER_HANDLE = '' # your handle TWITTER_ID = 0 # your id (int type) # Handle input arguments argparser = argparse.ArgumentParser() argparser.add_argument('--langs') argparser.add_argument('--tags') args = argparser.parse_args() langs = args.langs.split(",") tags = args.tags.split(",") # Sentiment models for `score_sentiment` function sentiment_model = dict((lang, afinn.Afinn(language=lang)) for lang in langs) def score_sentiment(text, lang): return sentiment_model[lang].score(text) / len(text.split()) def wait(wait_time=900, user=None): # Print warning if user is not None: print( "Warning: Rate limit exceeded (user: %s), saving data and waiting %d seconds" % (user, wait_time)) else: print("Warning: Rate limit exceeded, waiting %d seconds" % wait_time)
# Capstone Data Collection Tool import os, json, sys, praw, afinn, spacy # pip install afinn spacy praw scipy && python -m spacy download en from scipy.stats import zscore # Read-only reddit client reddit = praw.Reddit(client_id=os.environ['REDDIT_CLIENT_ID'], client_secret=os.environ['REDDIT_CLIENT_SECRET'], user_agent='dct user agent') # Two NLP processors for now because I'm not sure how good spacy's sentiment scoring is afinn, spacy_nlp = afinn.Afinn(language='en', emoticons=True), spacy.load('en') # Command line arguments subreddit, time, num_posts, count = sys.argv[1], sys.argv[2], int(sys.argv[3]), 0 submissionList, submissionScores = list(), list() submissionTitleSentiments, submissionBodySentiments = list(), list() # Parse requested number of submissions over requested time period for submission in reddit.subreddit(subreddit).top(time): if count == num_posts: break # Terminate if count reached submission.comments.replace_more(limit=None) # No hierarchy for comments, analyze everything with equal importance # Save required data from post itself top = {'id': submission.id, 'created_utc': submission.created_utc, 'title': submission.title, 'body': submission.selftext, 'score': submission.score, 'distinguished': submission.distinguished} # Save nlp metrics from post itself top['title_sentiment'], top['body_sentiment'] = afinn.score(submission.title), afinn.score(submission.selftext) entities = spacy_nlp(submission.title).ents top['title_entities'] = [{item.label_: item.text} for item in entities]
# -*- coding: utf-8 -*- """ Created on Sun Feb 25 12:39:16 2018 @author: jeppe """ import afinn import codecs from sys import argv import glob import os af = afinn.Afinn(language="da") path = 'instagram/scrapefiles/' path1 = 'instagram/scrapefiles/' text_files = glob.glob(path + "*notrash.txt") results = [] cities = [ "amager", "indreby", "nørrebro", "torvehallerne", "valby", "vesterbro", "østerbro" ] print(text_files) for tf in text_files: path = os.path.splitext(tf)[0] # print(tf) with codecs.open(tf, encoding="latin-1") as f: text = f.read() score = af.score(text) results.append(score / (len(text))) # with codecs.open(articles,encoding = "latin-1") as f: print(results)
def afinn_elementwise(element): afinn_nlp = afinn.Afinn(language='en', emoticons=True) return afinn_nlp.score(element)
def make_sentiment_scorer(): """Returns a sentiment scorer. Use this method everywhere to ensure consistency.""" clf = afinn.Afinn(language="da") return clf