def update_stored_posts(db, posts): posts_fullnames = [] subreddits = {} for post in posts: subreddit = post.get("subreddit") if subreddit not in subreddits: sbrdt = db.get_subreddit(subreddit) if sbrdt and sbrdt.get("params"): sbrdt_params = sbrdt.get("params") subreddits[subreddit] = sbrdt_params posts_fullnames.append(post.get("fullname")) posts = update_posts(posts_fullnames) for post in posts: sbrdt_params = subreddits.get(post.get("subreddit")) if sbrdt_params: retriever = Retriever() del sbrdt_params['lrtime'] processed_post = retriever.process_post(post, **sbrdt_params) if processed_post: db.update_post(to_save(processed_post)) else: db.delete_post(post.get("fullname"), post.get("video_id"))
def run(self): log.info("SPW will start...") while 1: task = self.tq.get() name = task.get("name") try: self.db.update_subreddit_info(name, {"error": "All ok"}) subreddit = self.db.get_subreddit(name) if not subreddit: log.error("not subreddit of this name %s", name) raise Exception("not subreddit o name:%s" % name) try: posts = reddit_get_new(name) self.db.add_raw_posts(name, posts) if not posts: raise Exception("no posts :( ") except Exception as e: self.db.update_subreddit_info(name, {"error": str(e)}) log.error("can not find any posts for %s" % name) continue interested_posts = [] interested_posts_ids = [] params = subreddit.get("params") lrtime = params.get("lrtime") first_post_created = posts[-1].get("created_utc") # отсеиваем посты, те которые более новые и те виде_ид которые уже есть for post in reversed(posts): created_time = post.get("created_utc") if (created_time - first_post_created) < lrtime: if not self.db.is_post_video_id_present( post.get("video_id")): interested_posts.append(post) interested_posts_ids.append(post.get("fullname")) else: break count = 0 retriever = Retriever() for post in retriever.process_subreddit( interested_posts, params): count += 1 self.db.save_post(post, "observation") log.info( "SPW for %s retrieved: %s posts \n interested posts: (%s) \n added: %s" % (name, len(posts), len(interested_posts), count)) time_window = get_current_step(posts) self.db.update_subreddit_info( name, { "time_window": time_window, "count_all_posts": len(posts), "count_interested_posts": len(interested_posts), "statistic": retriever.statistic, "head_post_id": interested_posts_ids[0] }) next_time_step = ensure_time_step( subreddit.get("head_post_id"), first_post_created, lrtime, interested_posts_ids, posts) log.info("SPW for %s next time step will be: %s" % (name, next_time_step)) self.db.toggle_subreddit(name, next_time_step) except Exception as e: log.exception("exception with task for subreddit: {%s}\n%s", name, e) sleep(1) continue