def get_tone_for_user(username, subreddit=None, comments_limit=1): """ Compute watson mood of user "username" on subreddit "subreddit" by looking at last comments. """ comments = [x.body for x in get_comments(username, comments_limit)] comment_to_tone = [] for comment in comments: json_data = query_watson(comment) tone = parse(json_data) comment_to_tone.append((comment, tone)) return comment_to_tone
def fetch_store_comments(conn): two_weeks_ago = datetime.now() - timedelta(days=14) for channel in youtube.channels: print "Channel:", channel for video in youtube.get_videos(channel): # Video data video_id = video['resourceId']['videoId'] video_url = youtube.video_url(video_id) video = { 'id': video_id, 'channel': video['channelTitle'], 'date': video['publishedAt'], 'title': video['title'], 'url': video_url, } video_date = datetime.strptime(video['date'], "%Y-%m-%dT%H:%M:%S.%fZ") if video_date < two_weeks_ago: break print "Video:", video['title'] try: conn.execute("""INSERT INTO video VALUES (?, ?, ?, ?, ?) """, (video['id'], video['channel'], video['date'], video['title'], video['url'])) except sqlite3.IntegrityError, e: pass # Youtube comment data print " Downloading Youtube comments" num_comments = 0 for comment in youtube.get_comments(video_id): num_comments += 1 comment = { 'id': comment['id'], 'site': 'youtube', 'video': video_id, 'date': comment['updatedAt'], 'author': comment['authorDisplayName'], 'comment': comment['textDisplay'], 'url': comment['authorGoogleplusProfileUrl'], 'up_votes': comment['likeCount'], 'down_votes': 0, } new_comment = store_comment(comment) if not new_comment: break print " Finished downloading ({}) Youtube comments".format(num_comments) # Reddit comment data print " Downloading Reddit comments" num_comments = 0 for thread in reddit.get_threads(video_url): for comment in reddit.get_comments(thread['id']): num_comments += 1 date = datetime.utcfromtimestamp(comment['created_utc']).isoformat() if 'created_utc' in comment else None if 'body' in comment: comment = { 'id': comment['id'], 'site': 'reddit', 'video': video_id, 'date': date, 'author': comment['author'], 'comment': comment['body'], 'url': 'https://reddit.com/r/{}/comments/{}/{}/{}'.format(comment['subreddit'], thread['id'], comment['author'], comment['id']), 'up_votes': comment['ups'], 'down_votes': comment['downs'], } new_comment = store_comment(comment) if not new_comment: break print " Finished downloading ({}) Reddit comments".format(num_comments) conn.commit()
def grab_data(self, thread_id): """ Gets posts, inserts/updates the database, inserts history entry :return: nothing """ # threads = ['42e77i'] # init skip variable, should be false by default skip = False # empty tblHistory dict history = dict() # get created datetime for tblHistory history['created'] = datetime.utcnow() # make sure oauth tokens are good, since grabbing threads can take a while self.o.refresh() # get post data from reddit retdata = get_post_data(self.r, thread_id) # get post data from database # dbdata = get_post_data_from_db(self.Session, thread) # package the data for skip_logic # package = dict() # package['reddit'] = dict(thread_id=retdata['id'], comments=retdata['comments'], archived=retdata['archived']) # package['database'] = dbdata # if the database doesn't contain a record for the post, it will return false # if that happens then we don't want to run it through the skip logic # get comments for post from reddit data = get_comments(self.r, thread_id) # if data['status'] == 'C' then the retrieval was successful, so proceed if data['status'] == 'C': # query the database to see if the post already exists, will either get ID or None post_id = check_post_table(self.Session, thread_id) # if the post does not exist (no id returned) then insert the post data into db # you can do bulk_comment_insert on everything because this is all new data if not post_id: post_id = insert_post_data(self.Session, retdata) bulk_comment_insert(self.Session, data['comments'], post_id) else: # go through all comments and insert into database for comment in data['comments']: insert_comment_data(self.Session, comment, post_id) # get finished time for tblHistory history['finished'] = datetime.utcnow() # build tblhistory entry history['message'] = 'Fetched post ID {0} with {1} comments'.format(retdata['id'], len(data['comments'])) print(history['message']) # set status for now, until I'm able to implement error handling history['status'] = 'C' elif data['status'] == 'F': # data['status'] == 'F' so we build the message and send to insert_history history = dict( status=data['status'], finished=datetime.utcnow(), message=data['thread'] + ' failed due to ' + data['errormsg'] ) # insert message insert_history(self.Session, history) print("\n")