def _scan_posts(self, url, newest): seen = 0 data = self.spider._get_json(url) posts = data['data']['children'] after = data['data']['after'] for i, c in enumerate(posts): post = c['data'] if i == 0: next_newest = post['id'] if post['id'] <= newest: seen = len(posts) - i break title = post['title'].lower() selftext = post['selftext'].lower() for k in self._mentioned_keywords(title, text2=selftext): mention = Mention() mention.keyword_uid = k.uid mention.thread_id = post['id'] mention.author = post['author'] mention.subreddit = post['subreddit'] mention.created = unix_string(int(post['created_utc'])) session.commit() return (seen, after, next_newest)
def _scan_comments(self, url, newest): seen = 0 data = self.spider._get_json(url) comments = data['data']['children'] after = data['data']['after'] for i, c in enumerate(comments): comment = c['data'] if i == 0: next_newest = comment['id'] if comment['id'] <= newest: seen = len(comments) - i - 1 break body = comment['body'].lower() for k in self._mentioned_keywords(body): mention = Mention() mention.keyword_uid = k.uid mention.thread_id = comment['link_id'][3:] mention.comment_id = comment['id'] mention.author = comment['author'] mention.subreddit = comment['subreddit'] mention.created = unix_string(int(comment['created_utc'])) session.commit() return (seen, after, next_newest)