예제 #1
0
OAUTH_TOKEN = ''
OAUTH_TOKEN_SECRET = ''
TWITTER_HANDLE = ''  # your handle
TWITTER_ID = 0  # your id (int type)

# Handle input arguments
argparser = argparse.ArgumentParser()
argparser.add_argument('--langs')
argparser.add_argument('--tags')
args = argparser.parse_args()

langs = args.langs.split(",")
tags = args.tags.split(",")

# Sentiment models for `score_sentiment` function
sentiment_model = dict((lang, afinn.Afinn(language=lang)) for lang in langs)


def score_sentiment(text, lang):
    return sentiment_model[lang].score(text) / len(text.split())


def wait(wait_time=900, user=None):
    # Print warning
    if user is not None:
        print(
            "Warning: Rate limit exceeded (user: %s), saving data and waiting %d seconds"
            % (user, wait_time))
    else:
        print("Warning: Rate limit exceeded, waiting %d seconds" % wait_time)
# Capstone Data Collection Tool

import os, json, sys, praw, afinn, spacy # pip install afinn spacy praw scipy && python -m spacy download en
from scipy.stats import zscore

# Read-only reddit client
reddit = praw.Reddit(client_id=os.environ['REDDIT_CLIENT_ID'],
                     client_secret=os.environ['REDDIT_CLIENT_SECRET'],
                     user_agent='dct user agent')

# Two NLP processors for now because I'm not sure how good spacy's sentiment scoring is
afinn, spacy_nlp = afinn.Afinn(language='en', emoticons=True), spacy.load('en')

# Command line arguments
subreddit, time, num_posts, count = sys.argv[1], sys.argv[2], int(sys.argv[3]), 0

submissionList, submissionScores = list(), list()
submissionTitleSentiments, submissionBodySentiments = list(), list()

# Parse requested number of submissions over requested time period
for submission in reddit.subreddit(subreddit).top(time):
    if count == num_posts: break # Terminate if count reached
    submission.comments.replace_more(limit=None) # No hierarchy for comments, analyze everything with equal importance

    # Save required data from post itself
    top = {'id': submission.id, 'created_utc': submission.created_utc, 'title': submission.title,
    'body': submission.selftext, 'score': submission.score, 'distinguished': submission.distinguished}
    # Save nlp metrics from post itself
    top['title_sentiment'], top['body_sentiment'] = afinn.score(submission.title), afinn.score(submission.selftext)
    entities = spacy_nlp(submission.title).ents
    top['title_entities'] = [{item.label_: item.text} for item in entities]
예제 #3
0
# -*- coding: utf-8 -*-
"""
Created on Sun Feb 25 12:39:16 2018

@author: jeppe
"""
import afinn
import codecs
from sys import argv
import glob
import os

af = afinn.Afinn(language="da")
path = 'instagram/scrapefiles/'
path1 = 'instagram/scrapefiles/'
text_files = glob.glob(path + "*notrash.txt")
results = []
cities = [
    "amager", "indreby", "nørrebro", "torvehallerne", "valby", "vesterbro",
    "østerbro"
]
print(text_files)
for tf in text_files:
    path = os.path.splitext(tf)[0]
    #    print(tf)
    with codecs.open(tf, encoding="latin-1") as f:
        text = f.read()
        score = af.score(text)
        results.append(score / (len(text)))
#        with codecs.open(articles,encoding = "latin-1") as f:
print(results)
예제 #4
0
def afinn_elementwise(element):
    afinn_nlp = afinn.Afinn(language='en', emoticons=True)
    return afinn_nlp.score(element)
예제 #5
0
def make_sentiment_scorer():
    """Returns a sentiment scorer. Use this method everywhere to ensure consistency."""
    clf = afinn.Afinn(language="da")
    return clf