Exemple #1
0
def backfill_comments():
	(username, password) = db.get_credentials('reddit')
	reddit.login(username, password)

	cur = db.conn.cursor()
	query = '''
		select
				id,
				userid,
				postid,
				subreddit,
				text,
				created,
				legacy,
				permalink,
				ups,
				downs
		from comments
		where legacy = 1
		order by id
	'''
	execur = cur.execute(query)
	results = execur.fetchall()

	for (commentid,
	     userid,
	     postid,
	     subreddit,
	     text,
	     created,
	     legacy,
	     permalink,
	     ups,
	     downs) in results:
		# Get comment from reddit
		post = Reddit.get('http://www.reddit.com/comments/%s/_/%s' % (postid, commentid))
		if len(post.comments) > 0:
			comment = post.comments[0]
			# Update db
			query = '''
				update comments
					set
						postid    = ?,
						subreddit = ?,
						text      = ?,
						created   = ?,
						permalink = ?,
						legacy    = 0,
						ups       = ?,
						downs     = ?
					where
						id = ?
			'''
			cur.execute(query, (postid, subreddit, text, created, permalink, legacy, ups, downs, commentid) )
			db.commit()
	cur.close()
Exemple #2
0
	def is_valid_request(child, db, log):
		'''
			Ensures request is from an account older than MINIMUM_REQUESTER_AGE days,
			and the accounts last request was over MINIMUM_REQUEST_DAYS days ago.
			If not, removes the request and comments with the reason for removal

			Returns:
				True if post is valid request,
				False if request is not valid and was removed.
		'''
		if type(child) != Post: return True

		request_is_valid = False

		# Check if last request was < MINIMUM_REQUEST_DAYS days ago
		now = timegm(gmtime())
		for (date, permalink) in db.select('date, permalink', 'amarch_requests', 'username = ?', [child.author]):
			if date + (3600 * 24 * AmArch.MINIMUM_REQUEST_DAYS) > now:
				# Last request was < MINIMUM_REQUEST_DAYS days ago, check if the request was 'removed'
				post = Reddit.get(permalink)
				if post.banned_by == None:
					# Last request was < MINIMUM_REQUEST_DAYS days ago, wasn't removed
					child.remove(mark_as_spam=False)
					log('AmArch.is_valid_request: Request < %d days old: %s' % (AmArch.MINIMUM_REQUEST_DAYS, child.permalink()))
					body  = '## Rule: [Requests must be at least %d days apart](/r/AmateurArchives/about/sidebar)\n\n' % AmArch.MINIMUM_REQUEST_DAYS
					body += 'The [**last request**](%s) from your account was submitted %s' % (permalink, Reddit.utc_timestamp_to_hr(post.created))
					response = child.reply(body)
					response.distinguish()
					child.flair('last req < %dd' % AmArch.MINIMUM_REQUEST_DAYS)
					return False
				else:
					# XXX OPTIMIZATION
					# Last request was > MINIMUM_REQUEST_DAYS days ago but was removed
					# Therefore: User account must be > MINIMUM_REQUESTER_AGE days old
					request_is_valid = True

		if not request_is_valid:
			# Check if user is < MINIMUM_REQUESTER_AGE days old
			user = Reddit.get_user_info(child.author)
			if user.created > now - (3600 * 24 * AmArch.MINIMUM_REQUESTER_AGE):
				child.remove(mark_as_spam=False)
				log('AmArch.is_valid_request: Requester /u/%s < %d days old: %s' % (child.author, AmArch.MINIMUM_REQUESTER_AGE, child.permalink()))
				body  = '## Rule: [Requests must be from accounts more than %d days old](/r/AmateurArchives/about/sidebar)\n\n' % AmArch.MINIMUM_REQUESTER_AGE
				body += 'The account (/u/%s) was created %s.' % (child.author, Reddit.utc_timestamp_to_hr(user.created))
				response = child.reply(body)
				response.distinguish()
				child.flair('user < %dd' % AmArch.MINIMUM_REQUESTER_AGE)
				return False

		# Request is valid. Add it to the database for checking in the future
		log('AmArch.is_valid_request: Allowing request from /u/%s' % child.author)
		if db.count('amarch_requests', 'username = ?', [child.author]) == 0:
			db.insert('amarch_requests', (child.author, child.created, child.permalink()))
		else:
			db.update('amarch_requests', 'date = ?, permalink = ?', 'username = ?', [child.created, child.permalink(), child.author])
		return True
Exemple #3
0
def backfill_comments():
    (username, password) = db.get_credentials('reddit')
    reddit.login(username, password)

    cur = db.conn.cursor()
    query = '''
		select
				id,
				userid,
				postid,
				subreddit,
				text,
				created,
				legacy,
				permalink,
				ups,
				downs
		from comments
		where legacy = 1
		order by id
	'''
    execur = cur.execute(query)
    results = execur.fetchall()

    for (commentid, userid, postid, subreddit, text, created, legacy,
         permalink, ups, downs) in results:
        # Get comment from reddit
        post = Reddit.get('http://www.reddit.com/comments/%s/_/%s' %
                          (postid, commentid))
        if len(post.comments) > 0:
            comment = post.comments[0]
            # Update db
            query = '''
				update comments
					set
						postid    = ?,
						subreddit = ?,
						text      = ?,
						created   = ?,
						permalink = ?,
						legacy    = 0,
						ups       = ?,
						downs     = ?
					where
						id = ?
			'''
            cur.execute(query, (postid, subreddit, text, created, permalink,
                                legacy, ups, downs, commentid))
            db.commit()
    cur.close()
Exemple #4
0
                oldpost['id'] = post.id.rjust(6, '0')
                oldpost['ups'] = post.ups
                oldpost['downs'] = post.downs
                Reddit.debug('updating post %s by %s' % (post.id, post.author))
                update_post(oldpost)
            db.conn.commit()
            ids_to_fetch = list()
            print 'running total: %d' % total

    if len(ids_to_fetch) > 0:
        total += len(ids_to_fetch)
        ids_to_fetch.append('1234')
        url = 'http://www.reddit.com/by_id/t3_%s.json' % ',t3_'.join(
            ids_to_fetch)
        try:
            posts = reddit.get(url)
        except HTTPError, e:
            print 'HTTPError: %s' % str(e)
            posts = []
        for post in posts:
            oldpost = {}
            oldpost['title'] = post.title
            oldpost['url'] = post.url
            oldpost['selftext'] = post.selftext
            oldpost['subreddit'] = post.subreddit
            oldpost['created'] = int(post.created)
            oldpost['permalink'] = post.permalink()
            oldpost['over_18'] = int(post.over_18)
            oldpost['legacy'] = 0
            oldpost['id'] = post.id.rjust(6, '0')
            oldpost['ups'] = post.ups
Exemple #5
0
				oldpost['legacy']    = 0
				oldpost['id']        = post.id.rjust(6, '0')
				oldpost['ups']       = post.ups
				oldpost['downs']     = post.downs
				Reddit.debug('updating post %s by %s' % (post.id, post.author))
				update_post(oldpost)
			db.conn.commit()
			ids_to_fetch = list()
			print 'running total: %d' % total

	if len(ids_to_fetch) > 0:
		total += len(ids_to_fetch)
		ids_to_fetch.append('1234')
		url = 'http://www.reddit.com/by_id/t3_%s.json' % ',t3_'.join(ids_to_fetch)
		try:
			posts = reddit.get(url)
		except HTTPError, e:
			print 'HTTPError: %s' % str(e)
			posts = []
		for post in posts:
			oldpost = {}
			oldpost['title']     = post.title
			oldpost['url']       = post.url
			oldpost['selftext']  = post.selftext
			oldpost['subreddit'] = post.subreddit
			oldpost['created']   = int(post.created)
			oldpost['permalink'] = post.permalink()
			oldpost['over_18']   = int(post.over_18)
			oldpost['legacy']    = 0
			oldpost['id']        = post.id.rjust(6, '0')
			oldpost['ups']       = post.ups
Exemple #6
0
asstastic
'''

SUBS = [x.strip() for x in SUBS_TEXT.strip().split('\n')]
while '' in SUBS: SUBS.remove('')

db = DB()

last_post = db.get_config('last_post')

reddit_url = 'http://www.reddit.com/r/%s/new.json' % '+'.join(SUBS)
print 'firehose from %s' % reddit_url
while True:
	sleep(2)
	try:
		posts = Reddit.get(reddit_url)
	except Exception, e:
		#print 'error when querying %s: %s' % (reddit_url, str(e))
		continue
	for post in posts:
		if last_post != None and post.id == last_post:
			break
		if post.selftext != None:
			# TODO self-text, skip it
			continue
		url = post.url
		shorturl  = 'http://redd.it/%s' % post.id
		subreddit = post.subreddit
		author    = post.author
		title     = post.title
		print ','.join( [shorturl, url, subreddit, author, title] )