Exemple #1
0
    def run(self):
        while True:
            stream = submission_stream(self.r, 'all', verbosity=0)

            try:
                for post in stream:
                    self._do_post(post)
            except (HTTPException, requests.ReadTimeout,
                    requests.ConnectionError) as e:
                LOG.error('{}: {}'.format(type(e), e))
            else:
                LOG.error('Stream ended.')

            LOG.info('Sleeping for {} minutes.'.format(RETRY_MINUTES))
            sleep(60 * RETRY_MINUTES)
Exemple #2
0
    def run(self):
        while True:
            stream = submission_stream(self.r, 'all', verbosity=0)

            try:
                for post in stream:
                    self._do_post(post)
            except (HTTPException, requests.ReadTimeout,
                    requests.ConnectionError) as e:
                LOG.error('{}: {}'.format(type(e), e))
            else:
                LOG.error('Stream ended.')

            LOG.info('Sleeping for {} minutes.'.format(RETRY_MINUTES))
            sleep(60 * RETRY_MINUTES)
def stream_strategy():
    from queue import Queue
    from threading import Thread
    from praw.helpers import submission_stream, comment_stream

    post_queue = Queue()

    threads = []
    threads.append(Thread(target=lambda: stream_handler(
        post_queue,
        comment_stream(
            r,
            "+".join(SUBREDDIT_LIST),
            limit=100,
            verbosity=0
        ),
        handle_comment
    )))
    threads.append(Thread(target=lambda: stream_handler(
        post_queue,
        submission_stream(
            r,
            "+".join(SUBREDDIT_LIST),
            limit=100,
            verbosity=0
        ),
        handle_submission
    )))

    for thread in threads:
        thread.daemon = True
        thread.start()

    while True:
        try:
            post_receiver(post_queue)
        except Exception as e:
            for thread in threads:
                if not thread.isAlive():
                    raise KeyboardInterrupt from e
            bot_tools.print_exception(e)
already_processed = loadProcessed()
print("%s - Starting with already processed: %s\n==========\n\n" %
      (datetime.now(), already_processed))

count = 0
count_actual = 0
running = True

# Start up Tensorflow CNN with trained model
predictor = tensorflow_chessbot.ChessboardPredictor()

while running:
    # get submission stream
    try:
        submissions = submission_stream(r,
                                        subreddit,
                                        limit=submission_read_limit)
        # for each submission
        for submission in submissions:
            count += 1
            # print out some debug info
            logInfoPerSubmission(submission, count, count_actual)

            # Skip if already processed
            if submission.id in already_processed:
                continue

            # check if submission title is a question
            if isPotentialChessboardTopic(submission):

                # Use CNN to make a prediction
# Load list of already processed comment ids
already_processed = loadProcessed()
print("%s - Starting with %d already processed\n==========\n\n" % (datetime.now(), len(already_processed)))

count = 0
count_actual = 0
running = True

# Start up Tensorflow CNN with trained model
predictor = tensorflow_chessbot.ChessboardPredictor()

while running:
  # get submission stream
  try:
    submissions = submission_stream(r, subreddit, limit=submission_read_limit)
    # for each submission
    for submission in submissions:
      count += 1
      # print out some debug info
      is_processed = submission.id in already_processed
      logInfoPerSubmission(submission, count, count_actual, is_processed)

      # Skip if already processed
      if is_processed:
        continue
      
      # check if submission title is a question
      if isPotentialChessboardTopic(submission):
        
        # Use CNN to make a prediction
def crawl_continuously(storage_dir):
    r = praw.Reddit(user_agent='SearchingReddit project 0.2 by /u/godlikesme')
    for s in submission_stream(r, "learnprogramming"):
        save_submission(s, storage_dir)
Exemple #7
0
def crawl_continuously(storage_dir):
    r = praw.Reddit(user_agent='SearchingReddit project 0.2 by /u/godlikesme')
    for s in submission_stream(r, "learnprogramming"):
        s.replace_more_comments(limit=None)
        save_submission(s, storage_dir)
    except:
        pass


def has_subredditurl_in_link(submission, SUBREDDITURL):
    try:
        if (SUBREDDITURL in submission.url.lower()):
            print("Found url in link")
            return True
    except:
        pass


while True:
    try:
        for submission in submission_stream(r, SUBREDDIT, 1000, 1):
            #for submission in submissions_between(r,SUBREDDIT,None,None,False,None,1):
            #for submission in r.get_subreddit(SUBREDDIT).get_new(limit=1000):
            #submission= r.get_submission(submission_id="4w9kzn")
            subredditurls = []
            urlstocomment = ""
            #print (submission.title)
            if submission.over_18 != True:
                subredditurls = get_subredditurls(submission)
                for url in subredditurls:
                    SUBREDDITURL = str(r.get_subreddit(
                        url[3:]).url[1:-1]).lower()
                    if (str(submission.subreddit.url[1:-1]).lower() ==
                            SUBREDDITURL):
                        print("same sub as parent - skipped")
                    elif (has_subredditurl_in_submissiontext(