Ejemplo n.º 1
0
    def __init__(self, pq=None, ps=None, ms=None):
        super(ImportantYoutubePostSupplier, self).__init__()

        self.posts_storage = ps or PostsStorage(self.name)
        self.main_storage = ms or HumanStorage(self.name)

        self.posts_supplier = YoutubeChannelsHandler(self.posts_storage)

        self.pd = ProcessDirector("im po su")

        log.info("important post supplier inited")
Ejemplo n.º 2
0
    def __init__(self):
        self.states_handler = StatesHandler(name="post generator")
        self.generators_storage = PostsGeneratorsStorage(name="pg gens")
        self.posts_storage = PostsStorage(name="pg posts")
        self.sub_gens = {}
        self.sub_process = {}

        for sub, state in self.states_handler.get_posts_generator_states(
        ).iteritems():
            if S_WORK in state:
                self.start_generate_posts(sub)
Ejemplo n.º 3
0
def fill_generated_posts_by_humans():
    hs = HumanStorage(name="script")
    ps = PostsStorage(hs=hs)
    for post in ps.posts.find({"human": {"$exists": False}, "state": PS_READY}):
        result = ps.posts.update_one(post, {"$set": {"human": random.choice(hs.get_humans_of_sub(post.get("sub")))}})
        print result
Ejemplo n.º 4
0
 def __init__(self):
     super(CopyPostGenerator, self).__init__()
     self.sub_store = SubredditsRelationsStore(name="copy_pg for sub relations store")
     self.user_agent = DEFAULT_USER_AGENT
     self.post_storage = PostsStorage(name="copy_pg for posts store")
Ejemplo n.º 5
0
class CopyPostGenerator(RedditHandler, Generator):
    def __init__(self):
        super(CopyPostGenerator, self).__init__()
        self.sub_store = SubredditsRelationsStore(name="copy_pg for sub relations store")
        self.user_agent = DEFAULT_USER_AGENT
        self.post_storage = PostsStorage(name="copy_pg for posts store")

    def found_copy_in_sub(self):
        pass

    def get_title(self, url):
        def check_title(title):
            url_tokens = normalize(url, lambda x: x)
            title_tokens = normalize(title, lambda x: x)
            if len(set(url_tokens).intersection(set(title_tokens))) > 0:
                return False
            return True

        try:
            res = get(url, headers={"User-Agent": self.user_agent})
            if res.status_code == 200:
                title = None
                soup = BeautifulSoup(res.content, 'html.parser')

                for meta in soup.findAll("meta"):
                    if meta.attrs.get("name") and "title" in meta.attrs.get("name"):
                        title = meta.attrs.get("content")
                        break

                if not title and soup.title:
                    title = soup.title.string

                if title and check_title(title):
                    return title

        except Exception as e:
            pass

    def get_title_from_comments(self, post, title):
        if post.created_utc - time.time() < MIN_COMMENT_CANDIDATE_DELAY: return
        if post.num_comments < 10: return
        if post.num_reports: return

        title_tokens = normalize(title, lambda x: x)
        for comment in self.comments_sequence(post.comments):
            if not isinstance(comment,
                              MoreComments) and comment.created_utc + MIN_COMMENT_CANDIDATE_DELAY < post.created_utc:
                comment_tokens = normalize(comment.body, lambda x: x)
                if tokens_equals(title_tokens, comment_tokens):
                    return comment.body

    def generate_data(self, subreddit, key_words):
        related_subs = self.sub_store.get_related_subs(subreddit)
        hot_and_new = self.get_hot_and_new(subreddit, sort=cmp_by_created_utc)
        for post in hot_and_new:
            url_hash = URL_HASH(post.url)
            if self.post_storage.check_post_hash_exists(url_hash):
                continue
            if post.ups > MIN_RATING and post.ups < MAX_RATING:
                title = self.get_title(prepare_url(post.url))
                post_title = post.title
                if not title or len(title.strip()) == len(post_title.strip()):
                    comments_title = self.get_title_from_comments(post, post_title)
                    if comments_title:
                        title = comments_title
                    else:
                        continue
                if title and is_valid_title(title):
                    post = PostSource(post.url, title.strip(), for_sub=random.choice(related_subs))
                    if self.post_storage.add_generated_post(post, subreddit, important=False):
                        yield post
Ejemplo n.º 6
0
 def __init__(self, ps=None):
     cm = ConfigManager()
     self.youtube = build(cm.get('YOUTUBE_API_SERVICE_NAME'),
                          YOUTUBE_API_VERSION,
                          developerKey=cm.get('YOUTUBE_DEVELOPER_KEY'))
     self.posts_storage = ps or PostsStorage(name="youtube posts supplier")
Ejemplo n.º 7
0
def clear_important_posts():
    ps = PostsStorage()

    for post in ps.posts.find({"important": True}):
        ps.posts.delete_one(post)
        print "delete: ", post
Ejemplo n.º 8
0
def clear_posts():
    ps = PostsStorage()
    ps.posts.delete_many({"important": False})
Ejemplo n.º 9
0
 def __init__(self):
     super(CopyPostGenerator, self).__init__()
     self.sub_store = SubredditsRelationsStore(
         name="copy_pg for sub relations store")
     self.user_agent = DEFAULT_USER_AGENT
     self.post_storage = PostsStorage(name="copy_pg for posts store")
Ejemplo n.º 10
0
class CopyPostGenerator(RedditHandler, Generator):
    def __init__(self):
        super(CopyPostGenerator, self).__init__()
        self.sub_store = SubredditsRelationsStore(
            name="copy_pg for sub relations store")
        self.user_agent = DEFAULT_USER_AGENT
        self.post_storage = PostsStorage(name="copy_pg for posts store")

    def found_copy_in_sub(self):
        pass

    def get_title(self, url):
        def check_title(title):
            url_tokens = normalize(url, lambda x: x)
            title_tokens = normalize(title, lambda x: x)
            if len(set(url_tokens).intersection(set(title_tokens))) > 0:
                return False
            return True

        try:
            res = get(url, headers={"User-Agent": self.user_agent})
            if res.status_code == 200:
                title = None
                soup = BeautifulSoup(res.content, 'html.parser')

                for meta in soup.findAll("meta"):
                    if meta.attrs.get("name") and "title" in meta.attrs.get(
                            "name"):
                        title = meta.attrs.get("content")
                        break

                if not title and soup.title:
                    title = soup.title.string

                if title and check_title(title):
                    return title

        except Exception as e:
            pass

    def get_title_from_comments(self, post, title):
        if post.created_utc - time.time() < MIN_COMMENT_CANDIDATE_DELAY: return
        if post.num_comments < 10: return
        if post.num_reports: return

        title_tokens = normalize(title, lambda x: x)
        for comment in self.comments_sequence(post.comments):
            if not isinstance(
                    comment, MoreComments
            ) and comment.created_utc + MIN_COMMENT_CANDIDATE_DELAY < post.created_utc:
                comment_tokens = normalize(comment.body, lambda x: x)
                if tokens_equals(title_tokens, comment_tokens):
                    return comment.body

    def generate_data(self, subreddit, key_words):
        related_subs = self.sub_store.get_related_subs(subreddit)
        hot_and_new = self.get_hot_and_new(subreddit, sort=cmp_by_created_utc)
        for post in hot_and_new:
            url_hash = URL_HASH(post.url)
            if self.post_storage.check_post_hash_exists(url_hash):
                continue
            if post.ups > MIN_RATING and post.ups < MAX_RATING:
                title = self.get_title(prepare_url(post.url))
                post_title = post.title
                if not title or len(title.strip()) == len(post_title.strip()):
                    comments_title = self.get_title_from_comments(
                        post, post_title)
                    if comments_title:
                        title = comments_title
                    else:
                        continue
                if title and is_valid_title(title):
                    post = PostSource(post.url,
                                      title.strip(),
                                      for_sub=random.choice(related_subs))
                    if self.post_storage.add_generated_post(post,
                                                            subreddit,
                                                            important=False):
                        yield post
Ejemplo n.º 11
0
    user = g.user
    return render_template("main.html", **{"username": user.name})


log = logging.getLogger("web")

db = HumanStorage(name="hs server")

REDIRECT_URI = "http://rr-alexeyp.rhcloud.com/authorize_callback"
C_ID = None
C_SECRET = None

splitter = re.compile('[^\w\d_-]*')

srs = SubredditsRelationsStore("server")
posts_storage = PostsStorage("server", hs=db)
posts_generator = PostsGenerator()


@app.route("/posts")
@login_required
def posts():
    subs = db.get_subs_of_all_humans()
    qp_s = {}
    subs_states = {}
    for sub in subs:
        qp_s[sub] = posts_storage.get_posts_for_sub_with_state(
            sub, state=PS_PREPARED)
        subs_states[
            sub] = posts_generator.states_handler.get_posts_generator_state(
                sub) or S_STOP