def move_submissions(): q = TwitchStream.query for stream in q: if stream.submission_id: submission = get_or_create(Submission, submission_id=stream.submission_id) if submission not in stream.submissions: stream.submissions.append(submission) db.session.commit()
def add_submissions(): submissions = r.get_subreddit('watchpeoplecode').get_new(limit=None) for s in submissions: for url in get_submission_urls(s): stream = get_stream_from_url(url, s.id) if stream: submission = get_or_create(Submission, submission_id=s.id) if submission not in stream.submissions: stream.submissions.append(submission) db.session.commit()
def add_streamers(): submissions = r.get_subreddit('watchpeoplecode').get_new(limit=None) for s in submissions: for url in get_submission_urls(s): stream = get_stream_from_url(url, s.id) if stream and stream.streamer is None: reddit_username = get_reddit_username(s, url) if reddit_username: stream.streamer = get_or_create(Streamer, reddit_username=reddit_username) stream._update_status() db.session.add(stream) db.session.commit()
def add_streamers(): submissions = r.get_subreddit('watchpeoplecode').get_new(limit=None) for s in submissions: for url in get_submission_urls(s): stream = get_stream_from_url(url, s.id) if stream and stream.streamer is None: reddit_username = get_reddit_username(s, url) if reddit_username: stream.streamer = get_or_create( Streamer, reddit_username=reddit_username) stream._update_status() db.session.add(stream) db.session.commit()
def download_thebutton(ts_interval, largest_timestamp): r = praw.Reddit(user_agent='filthypresser project 0.1 by /u/godlikesme') if largest_timestamp is None: largest_timestamp = int(time.time()) + 12*3600 cts2 = largest_timestamp cts1 = largest_timestamp - ts_interval current_ts_interval = ts_interval processed_submissions = 0 while True: try: search_results = list(r.search('timestamp:{}..{}'.format(cts1, cts2), subreddit='thebutton', syntax='cloudsearch')) except Exception as e: logging.exception(e) continue logging.info("Got {} submissions in interval {}..{}".format(len(search_results), cts1, cts2)) if len(search_results) == 25: current_ts_interval /= 2 cts1 = cts2 - current_ts_interval logging.debug("Reducing ts interval to {}".format(current_ts_interval)) continue for s in search_results: # FIXME check url length etc try: dbs = db.session.merge(Submission(reddit_id=s.id, score=s.score, permalink=s.permalink, created_utc=datetime.utcfromtimestamp(s.created_utc), url=s.url, title=s.title, author_username=s.author.name if s.author else None, author_flair_text=s.author_flair_text if s.author else None, selftext=s.selftext)) user = get_or_create(User, username=s.author.name) if s.author: dbs.user = user db.session.add(dbs) processed_submissions += 1 # submission.replace_more_comments(limit=None) for c in filter(lambda c: type(c) == praw.objects.Comment, praw.helpers.flatten_tree(s.comments)): dbc = db.session.merge(Comment(reddit_id=c.id, score=c.score, permalink=c.permalink, created_utc=datetime.utcfromtimestamp(c.created_utc), author_username=c.author.name if c.author else None, author_flair_text=c.author_flair_text if c.author else None, body=c.body, parent_reddit_id=c.parent_id)) db.session.add(dbc) dbc.submission = dbs if c.author: user = get_or_create(User, username=c.author.name) dbc.user = user except Exception as e: logging.exception(e) db.session.rollback() cts2 = cts1 cts1 = cts2 - current_ts_interval # FIXME, probably use logging print " PROCESSED_SUBMISSIONS", datetime.now(), processed_submissions try: db.session.commit() except Exception as e: logging.exception(e) db.session.rollback() if cts1 < 0: break if len(search_results) <= 7: current_ts_interval *= 2 logging.debug("Increasing ts interval to {}".format(current_ts_interval))