def run(self): while True: stream = submission_stream(self.r, 'all', verbosity=0) try: for post in stream: self._do_post(post) except (HTTPException, requests.ReadTimeout, requests.ConnectionError) as e: LOG.error('{}: {}'.format(type(e), e)) else: LOG.error('Stream ended.') LOG.info('Sleeping for {} minutes.'.format(RETRY_MINUTES)) sleep(60 * RETRY_MINUTES)
def stream_strategy(): from queue import Queue from threading import Thread from praw.helpers import submission_stream, comment_stream post_queue = Queue() threads = [] threads.append(Thread(target=lambda: stream_handler( post_queue, comment_stream( r, "+".join(SUBREDDIT_LIST), limit=100, verbosity=0 ), handle_comment ))) threads.append(Thread(target=lambda: stream_handler( post_queue, submission_stream( r, "+".join(SUBREDDIT_LIST), limit=100, verbosity=0 ), handle_submission ))) for thread in threads: thread.daemon = True thread.start() while True: try: post_receiver(post_queue) except Exception as e: for thread in threads: if not thread.isAlive(): raise KeyboardInterrupt from e bot_tools.print_exception(e)
already_processed = loadProcessed() print("%s - Starting with already processed: %s\n==========\n\n" % (datetime.now(), already_processed)) count = 0 count_actual = 0 running = True # Start up Tensorflow CNN with trained model predictor = tensorflow_chessbot.ChessboardPredictor() while running: # get submission stream try: submissions = submission_stream(r, subreddit, limit=submission_read_limit) # for each submission for submission in submissions: count += 1 # print out some debug info logInfoPerSubmission(submission, count, count_actual) # Skip if already processed if submission.id in already_processed: continue # check if submission title is a question if isPotentialChessboardTopic(submission): # Use CNN to make a prediction
# Load list of already processed comment ids already_processed = loadProcessed() print("%s - Starting with %d already processed\n==========\n\n" % (datetime.now(), len(already_processed))) count = 0 count_actual = 0 running = True # Start up Tensorflow CNN with trained model predictor = tensorflow_chessbot.ChessboardPredictor() while running: # get submission stream try: submissions = submission_stream(r, subreddit, limit=submission_read_limit) # for each submission for submission in submissions: count += 1 # print out some debug info is_processed = submission.id in already_processed logInfoPerSubmission(submission, count, count_actual, is_processed) # Skip if already processed if is_processed: continue # check if submission title is a question if isPotentialChessboardTopic(submission): # Use CNN to make a prediction
def crawl_continuously(storage_dir): r = praw.Reddit(user_agent='SearchingReddit project 0.2 by /u/godlikesme') for s in submission_stream(r, "learnprogramming"): save_submission(s, storage_dir)
def crawl_continuously(storage_dir): r = praw.Reddit(user_agent='SearchingReddit project 0.2 by /u/godlikesme') for s in submission_stream(r, "learnprogramming"): s.replace_more_comments(limit=None) save_submission(s, storage_dir)
except: pass def has_subredditurl_in_link(submission, SUBREDDITURL): try: if (SUBREDDITURL in submission.url.lower()): print("Found url in link") return True except: pass while True: try: for submission in submission_stream(r, SUBREDDIT, 1000, 1): #for submission in submissions_between(r,SUBREDDIT,None,None,False,None,1): #for submission in r.get_subreddit(SUBREDDIT).get_new(limit=1000): #submission= r.get_submission(submission_id="4w9kzn") subredditurls = [] urlstocomment = "" #print (submission.title) if submission.over_18 != True: subredditurls = get_subredditurls(submission) for url in subredditurls: SUBREDDITURL = str(r.get_subreddit( url[3:]).url[1:-1]).lower() if (str(submission.subreddit.url[1:-1]).lower() == SUBREDDITURL): print("same sub as parent - skipped") elif (has_subredditurl_in_submissiontext(