def backfill_comments(): (username, password) = db.get_credentials('reddit') reddit.login(username, password) cur = db.conn.cursor() query = ''' select id, userid, postid, subreddit, text, created, legacy, permalink, ups, downs from comments where legacy = 1 order by id ''' execur = cur.execute(query) results = execur.fetchall() for (commentid, userid, postid, subreddit, text, created, legacy, permalink, ups, downs) in results: # Get comment from reddit post = Reddit.get('http://www.reddit.com/comments/%s/_/%s' % (postid, commentid)) if len(post.comments) > 0: comment = post.comments[0] # Update db query = ''' update comments set postid = ?, subreddit = ?, text = ?, created = ?, permalink = ?, legacy = 0, ups = ?, downs = ? where id = ? ''' cur.execute(query, (postid, subreddit, text, created, permalink, legacy, ups, downs, commentid) ) db.commit() cur.close()
def is_valid_request(child, db, log): ''' Ensures request is from an account older than MINIMUM_REQUESTER_AGE days, and the accounts last request was over MINIMUM_REQUEST_DAYS days ago. If not, removes the request and comments with the reason for removal Returns: True if post is valid request, False if request is not valid and was removed. ''' if type(child) != Post: return True request_is_valid = False # Check if last request was < MINIMUM_REQUEST_DAYS days ago now = timegm(gmtime()) for (date, permalink) in db.select('date, permalink', 'amarch_requests', 'username = ?', [child.author]): if date + (3600 * 24 * AmArch.MINIMUM_REQUEST_DAYS) > now: # Last request was < MINIMUM_REQUEST_DAYS days ago, check if the request was 'removed' post = Reddit.get(permalink) if post.banned_by == None: # Last request was < MINIMUM_REQUEST_DAYS days ago, wasn't removed child.remove(mark_as_spam=False) log('AmArch.is_valid_request: Request < %d days old: %s' % (AmArch.MINIMUM_REQUEST_DAYS, child.permalink())) body = '## Rule: [Requests must be at least %d days apart](/r/AmateurArchives/about/sidebar)\n\n' % AmArch.MINIMUM_REQUEST_DAYS body += 'The [**last request**](%s) from your account was submitted %s' % (permalink, Reddit.utc_timestamp_to_hr(post.created)) response = child.reply(body) response.distinguish() child.flair('last req < %dd' % AmArch.MINIMUM_REQUEST_DAYS) return False else: # XXX OPTIMIZATION # Last request was > MINIMUM_REQUEST_DAYS days ago but was removed # Therefore: User account must be > MINIMUM_REQUESTER_AGE days old request_is_valid = True if not request_is_valid: # Check if user is < MINIMUM_REQUESTER_AGE days old user = Reddit.get_user_info(child.author) if user.created > now - (3600 * 24 * AmArch.MINIMUM_REQUESTER_AGE): child.remove(mark_as_spam=False) log('AmArch.is_valid_request: Requester /u/%s < %d days old: %s' % (child.author, AmArch.MINIMUM_REQUESTER_AGE, child.permalink())) body = '## Rule: [Requests must be from accounts more than %d days old](/r/AmateurArchives/about/sidebar)\n\n' % AmArch.MINIMUM_REQUESTER_AGE body += 'The account (/u/%s) was created %s.' % (child.author, Reddit.utc_timestamp_to_hr(user.created)) response = child.reply(body) response.distinguish() child.flair('user < %dd' % AmArch.MINIMUM_REQUESTER_AGE) return False # Request is valid. Add it to the database for checking in the future log('AmArch.is_valid_request: Allowing request from /u/%s' % child.author) if db.count('amarch_requests', 'username = ?', [child.author]) == 0: db.insert('amarch_requests', (child.author, child.created, child.permalink())) else: db.update('amarch_requests', 'date = ?, permalink = ?', 'username = ?', [child.created, child.permalink(), child.author]) return True
def backfill_comments(): (username, password) = db.get_credentials('reddit') reddit.login(username, password) cur = db.conn.cursor() query = ''' select id, userid, postid, subreddit, text, created, legacy, permalink, ups, downs from comments where legacy = 1 order by id ''' execur = cur.execute(query) results = execur.fetchall() for (commentid, userid, postid, subreddit, text, created, legacy, permalink, ups, downs) in results: # Get comment from reddit post = Reddit.get('http://www.reddit.com/comments/%s/_/%s' % (postid, commentid)) if len(post.comments) > 0: comment = post.comments[0] # Update db query = ''' update comments set postid = ?, subreddit = ?, text = ?, created = ?, permalink = ?, legacy = 0, ups = ?, downs = ? where id = ? ''' cur.execute(query, (postid, subreddit, text, created, permalink, legacy, ups, downs, commentid)) db.commit() cur.close()
oldpost['id'] = post.id.rjust(6, '0') oldpost['ups'] = post.ups oldpost['downs'] = post.downs Reddit.debug('updating post %s by %s' % (post.id, post.author)) update_post(oldpost) db.conn.commit() ids_to_fetch = list() print 'running total: %d' % total if len(ids_to_fetch) > 0: total += len(ids_to_fetch) ids_to_fetch.append('1234') url = 'http://www.reddit.com/by_id/t3_%s.json' % ',t3_'.join( ids_to_fetch) try: posts = reddit.get(url) except HTTPError, e: print 'HTTPError: %s' % str(e) posts = [] for post in posts: oldpost = {} oldpost['title'] = post.title oldpost['url'] = post.url oldpost['selftext'] = post.selftext oldpost['subreddit'] = post.subreddit oldpost['created'] = int(post.created) oldpost['permalink'] = post.permalink() oldpost['over_18'] = int(post.over_18) oldpost['legacy'] = 0 oldpost['id'] = post.id.rjust(6, '0') oldpost['ups'] = post.ups
oldpost['legacy'] = 0 oldpost['id'] = post.id.rjust(6, '0') oldpost['ups'] = post.ups oldpost['downs'] = post.downs Reddit.debug('updating post %s by %s' % (post.id, post.author)) update_post(oldpost) db.conn.commit() ids_to_fetch = list() print 'running total: %d' % total if len(ids_to_fetch) > 0: total += len(ids_to_fetch) ids_to_fetch.append('1234') url = 'http://www.reddit.com/by_id/t3_%s.json' % ',t3_'.join(ids_to_fetch) try: posts = reddit.get(url) except HTTPError, e: print 'HTTPError: %s' % str(e) posts = [] for post in posts: oldpost = {} oldpost['title'] = post.title oldpost['url'] = post.url oldpost['selftext'] = post.selftext oldpost['subreddit'] = post.subreddit oldpost['created'] = int(post.created) oldpost['permalink'] = post.permalink() oldpost['over_18'] = int(post.over_18) oldpost['legacy'] = 0 oldpost['id'] = post.id.rjust(6, '0') oldpost['ups'] = post.ups
asstastic ''' SUBS = [x.strip() for x in SUBS_TEXT.strip().split('\n')] while '' in SUBS: SUBS.remove('') db = DB() last_post = db.get_config('last_post') reddit_url = 'http://www.reddit.com/r/%s/new.json' % '+'.join(SUBS) print 'firehose from %s' % reddit_url while True: sleep(2) try: posts = Reddit.get(reddit_url) except Exception, e: #print 'error when querying %s: %s' % (reddit_url, str(e)) continue for post in posts: if last_post != None and post.id == last_post: break if post.selftext != None: # TODO self-text, skip it continue url = post.url shorturl = 'http://redd.it/%s' % post.id subreddit = post.subreddit author = post.author title = post.title print ','.join( [shorturl, url, subreddit, author, title] )