Beispiel #1
0
def update_stored_posts(db, posts):
    posts_fullnames = []
    subreddits = {}
    for post in posts:
        subreddit = post.get("subreddit")
        if subreddit not in subreddits:
            sbrdt = db.get_subreddit(subreddit)
            if sbrdt and sbrdt.get("params"):
                sbrdt_params = sbrdt.get("params")
                subreddits[subreddit] = sbrdt_params
        posts_fullnames.append(post.get("fullname"))

    posts = update_posts(posts_fullnames)

    for post in posts:
        sbrdt_params = subreddits.get(post.get("subreddit"))
        if sbrdt_params:
            retriever = Retriever()
            del sbrdt_params['lrtime']
            processed_post = retriever.process_post(post, **sbrdt_params)
            if processed_post:
                db.update_post(to_save(processed_post))
            else:
                db.delete_post(post.get("fullname"), post.get("video_id"))
Beispiel #2
0
def update_stored_posts(db, posts):
    posts_fullnames = []
    subreddits = {}
    for post in posts:
        subreddit = post.get("subreddit")
        if subreddit not in subreddits:
            sbrdt = db.get_subreddit(subreddit)
            if sbrdt and sbrdt.get("params"):
                sbrdt_params = sbrdt.get("params")
                subreddits[subreddit] = sbrdt_params
        posts_fullnames.append(post.get("fullname"))

    posts = update_posts(posts_fullnames)

    for post in posts:
        sbrdt_params = subreddits.get(post.get("subreddit"))
        if sbrdt_params:
            retriever = Retriever()
            del sbrdt_params['lrtime']
            processed_post = retriever.process_post(post, **sbrdt_params)
            if processed_post:
                db.update_post(to_save(processed_post))
            else:
                db.delete_post(post.get("fullname"), post.get("video_id"))
Beispiel #3
0
    def run(self):
        log.info("SPW will start...")
        while 1:
            task = self.tq.get()
            name = task.get("name")
            try:
                self.db.update_subreddit_info(name, {"error": "All ok"})
                subreddit = self.db.get_subreddit(name)
                if not subreddit:
                    log.error("not subreddit of this name %s", name)
                    raise Exception("not subreddit o name:%s" % name)

                try:
                    posts = reddit_get_new(name)
                    self.db.add_raw_posts(name, posts)
                    if not posts:
                        raise Exception("no posts :( ")
                except Exception as e:
                    self.db.update_subreddit_info(name, {"error": str(e)})
                    log.error("can not find any posts for %s" % name)
                    continue

                interested_posts = []
                interested_posts_ids = []

                params = subreddit.get("params")
                lrtime = params.get("lrtime")
                first_post_created = posts[-1].get("created_utc")
                # отсеиваем посты, те которые более новые и те виде_ид которые уже есть
                for post in reversed(posts):
                    created_time = post.get("created_utc")
                    if (created_time - first_post_created) < lrtime:
                        if not self.db.is_post_video_id_present(post.get("video_id")):
                            interested_posts.append(post)

                        interested_posts_ids.append(post.get("fullname"))
                    else:
                        break

                count = 0
                retriever = Retriever()
                for post in retriever.process_subreddit(interested_posts, params):
                    count += 1
                    self.db.save_post(post, "observation")

                log.info("SPW for %s retrieved: %s posts \n interested posts: (%s) \n added: %s" % (
                    name, len(posts), len(interested_posts), count))

                time_window = get_current_step(posts)
                self.db.update_subreddit_info(name, {"time_window": time_window,
                                                     "count_all_posts": len(posts),
                                                     "count_interested_posts": len(interested_posts),
                                                     "statistic": retriever.statistic,
                                                     "head_post_id": interested_posts_ids[0]})

                next_time_step = ensure_time_step(subreddit.get("head_post_id"),
                                                  first_post_created,
                                                  lrtime,
                                                  interested_posts_ids,
                                                  posts)

                log.info("SPW for %s next time step will be: %s" % (name, next_time_step))
                self.db.toggle_subreddit(name, next_time_step)

            except Exception as e:
                log.exception("exception with task for subreddit: {%s}\n%s", name, e)
                sleep(1)
                continue
Beispiel #4
0
    def run(self):
        log.info("SPW will start...")
        while 1:
            task = self.tq.get()
            name = task.get("name")
            try:
                self.db.update_subreddit_info(name, {"error": "All ok"})
                subreddit = self.db.get_subreddit(name)
                if not subreddit:
                    log.error("not subreddit of this name %s", name)
                    raise Exception("not subreddit o name:%s" % name)

                try:
                    posts = reddit_get_new(name)
                    self.db.add_raw_posts(name, posts)
                    if not posts:
                        raise Exception("no posts :( ")
                except Exception as e:
                    self.db.update_subreddit_info(name, {"error": str(e)})
                    log.error("can not find any posts for %s" % name)
                    continue

                interested_posts = []
                interested_posts_ids = []

                params = subreddit.get("params")
                lrtime = params.get("lrtime")
                first_post_created = posts[-1].get("created_utc")
                # отсеиваем посты, те которые более новые и те виде_ид которые уже есть
                for post in reversed(posts):
                    created_time = post.get("created_utc")
                    if (created_time - first_post_created) < lrtime:
                        if not self.db.is_post_video_id_present(
                                post.get("video_id")):
                            interested_posts.append(post)

                        interested_posts_ids.append(post.get("fullname"))
                    else:
                        break

                count = 0
                retriever = Retriever()
                for post in retriever.process_subreddit(
                        interested_posts, params):
                    count += 1
                    self.db.save_post(post, "observation")

                log.info(
                    "SPW for %s retrieved: %s posts \n interested posts: (%s) \n added: %s"
                    % (name, len(posts), len(interested_posts), count))

                time_window = get_current_step(posts)
                self.db.update_subreddit_info(
                    name, {
                        "time_window": time_window,
                        "count_all_posts": len(posts),
                        "count_interested_posts": len(interested_posts),
                        "statistic": retriever.statistic,
                        "head_post_id": interested_posts_ids[0]
                    })

                next_time_step = ensure_time_step(
                    subreddit.get("head_post_id"), first_post_created, lrtime,
                    interested_posts_ids, posts)

                log.info("SPW for %s next time step will be: %s" %
                         (name, next_time_step))
                self.db.toggle_subreddit(name, next_time_step)

            except Exception as e:
                log.exception("exception with task for subreddit: {%s}\n%s",
                              name, e)
                sleep(1)
                continue