def __init__(self, pq=None, ps=None, ms=None): super(ImportantYoutubePostSupplier, self).__init__() self.posts_storage = ps or PostsStorage(self.name) self.main_storage = ms or HumanStorage(self.name) self.posts_supplier = YoutubeChannelsHandler(self.posts_storage) self.pd = ProcessDirector("im po su") log.info("important post supplier inited")
def __init__(self): self.states_handler = StatesHandler(name="post generator") self.generators_storage = PostsGeneratorsStorage(name="pg gens") self.posts_storage = PostsStorage(name="pg posts") self.sub_gens = {} self.sub_process = {} for sub, state in self.states_handler.get_posts_generator_states( ).iteritems(): if S_WORK in state: self.start_generate_posts(sub)
def fill_generated_posts_by_humans(): hs = HumanStorage(name="script") ps = PostsStorage(hs=hs) for post in ps.posts.find({"human": {"$exists": False}, "state": PS_READY}): result = ps.posts.update_one(post, {"$set": {"human": random.choice(hs.get_humans_of_sub(post.get("sub")))}}) print result
def __init__(self): super(CopyPostGenerator, self).__init__() self.sub_store = SubredditsRelationsStore(name="copy_pg for sub relations store") self.user_agent = DEFAULT_USER_AGENT self.post_storage = PostsStorage(name="copy_pg for posts store")
class CopyPostGenerator(RedditHandler, Generator): def __init__(self): super(CopyPostGenerator, self).__init__() self.sub_store = SubredditsRelationsStore(name="copy_pg for sub relations store") self.user_agent = DEFAULT_USER_AGENT self.post_storage = PostsStorage(name="copy_pg for posts store") def found_copy_in_sub(self): pass def get_title(self, url): def check_title(title): url_tokens = normalize(url, lambda x: x) title_tokens = normalize(title, lambda x: x) if len(set(url_tokens).intersection(set(title_tokens))) > 0: return False return True try: res = get(url, headers={"User-Agent": self.user_agent}) if res.status_code == 200: title = None soup = BeautifulSoup(res.content, 'html.parser') for meta in soup.findAll("meta"): if meta.attrs.get("name") and "title" in meta.attrs.get("name"): title = meta.attrs.get("content") break if not title and soup.title: title = soup.title.string if title and check_title(title): return title except Exception as e: pass def get_title_from_comments(self, post, title): if post.created_utc - time.time() < MIN_COMMENT_CANDIDATE_DELAY: return if post.num_comments < 10: return if post.num_reports: return title_tokens = normalize(title, lambda x: x) for comment in self.comments_sequence(post.comments): if not isinstance(comment, MoreComments) and comment.created_utc + MIN_COMMENT_CANDIDATE_DELAY < post.created_utc: comment_tokens = normalize(comment.body, lambda x: x) if tokens_equals(title_tokens, comment_tokens): return comment.body def generate_data(self, subreddit, key_words): related_subs = self.sub_store.get_related_subs(subreddit) hot_and_new = self.get_hot_and_new(subreddit, sort=cmp_by_created_utc) for post in hot_and_new: url_hash = URL_HASH(post.url) if self.post_storage.check_post_hash_exists(url_hash): continue if post.ups > MIN_RATING and post.ups < MAX_RATING: title = self.get_title(prepare_url(post.url)) post_title = post.title if not title or len(title.strip()) == len(post_title.strip()): comments_title = self.get_title_from_comments(post, post_title) if comments_title: title = comments_title else: continue if title and is_valid_title(title): post = PostSource(post.url, title.strip(), for_sub=random.choice(related_subs)) if self.post_storage.add_generated_post(post, subreddit, important=False): yield post
def __init__(self, ps=None): cm = ConfigManager() self.youtube = build(cm.get('YOUTUBE_API_SERVICE_NAME'), YOUTUBE_API_VERSION, developerKey=cm.get('YOUTUBE_DEVELOPER_KEY')) self.posts_storage = ps or PostsStorage(name="youtube posts supplier")
def clear_important_posts(): ps = PostsStorage() for post in ps.posts.find({"important": True}): ps.posts.delete_one(post) print "delete: ", post
def clear_posts(): ps = PostsStorage() ps.posts.delete_many({"important": False})
def __init__(self): super(CopyPostGenerator, self).__init__() self.sub_store = SubredditsRelationsStore( name="copy_pg for sub relations store") self.user_agent = DEFAULT_USER_AGENT self.post_storage = PostsStorage(name="copy_pg for posts store")
class CopyPostGenerator(RedditHandler, Generator): def __init__(self): super(CopyPostGenerator, self).__init__() self.sub_store = SubredditsRelationsStore( name="copy_pg for sub relations store") self.user_agent = DEFAULT_USER_AGENT self.post_storage = PostsStorage(name="copy_pg for posts store") def found_copy_in_sub(self): pass def get_title(self, url): def check_title(title): url_tokens = normalize(url, lambda x: x) title_tokens = normalize(title, lambda x: x) if len(set(url_tokens).intersection(set(title_tokens))) > 0: return False return True try: res = get(url, headers={"User-Agent": self.user_agent}) if res.status_code == 200: title = None soup = BeautifulSoup(res.content, 'html.parser') for meta in soup.findAll("meta"): if meta.attrs.get("name") and "title" in meta.attrs.get( "name"): title = meta.attrs.get("content") break if not title and soup.title: title = soup.title.string if title and check_title(title): return title except Exception as e: pass def get_title_from_comments(self, post, title): if post.created_utc - time.time() < MIN_COMMENT_CANDIDATE_DELAY: return if post.num_comments < 10: return if post.num_reports: return title_tokens = normalize(title, lambda x: x) for comment in self.comments_sequence(post.comments): if not isinstance( comment, MoreComments ) and comment.created_utc + MIN_COMMENT_CANDIDATE_DELAY < post.created_utc: comment_tokens = normalize(comment.body, lambda x: x) if tokens_equals(title_tokens, comment_tokens): return comment.body def generate_data(self, subreddit, key_words): related_subs = self.sub_store.get_related_subs(subreddit) hot_and_new = self.get_hot_and_new(subreddit, sort=cmp_by_created_utc) for post in hot_and_new: url_hash = URL_HASH(post.url) if self.post_storage.check_post_hash_exists(url_hash): continue if post.ups > MIN_RATING and post.ups < MAX_RATING: title = self.get_title(prepare_url(post.url)) post_title = post.title if not title or len(title.strip()) == len(post_title.strip()): comments_title = self.get_title_from_comments( post, post_title) if comments_title: title = comments_title else: continue if title and is_valid_title(title): post = PostSource(post.url, title.strip(), for_sub=random.choice(related_subs)) if self.post_storage.add_generated_post(post, subreddit, important=False): yield post
user = g.user return render_template("main.html", **{"username": user.name}) log = logging.getLogger("web") db = HumanStorage(name="hs server") REDIRECT_URI = "http://rr-alexeyp.rhcloud.com/authorize_callback" C_ID = None C_SECRET = None splitter = re.compile('[^\w\d_-]*') srs = SubredditsRelationsStore("server") posts_storage = PostsStorage("server", hs=db) posts_generator = PostsGenerator() @app.route("/posts") @login_required def posts(): subs = db.get_subs_of_all_humans() qp_s = {} subs_states = {} for sub in subs: qp_s[sub] = posts_storage.get_posts_for_sub_with_state( sub, state=PS_PREPARED) subs_states[ sub] = posts_generator.states_handler.get_posts_generator_state( sub) or S_STOP