コード例 #1
0
ファイル: crawling.py プロジェクト: Kungreye/InfoCatcher
def fetch(url):
    # parse a feed from a URL, file, stream or string, and return a FeedParserDict.
    d = feedparser.parse(url)
    entries = d.entries  # list of entry-level data

    posts = []

    for entry in entries:
        try:
            content = entry.content and entry.content[0].value
        except AttributeError:
            content = entry.summary or entry.title
        try:
            created_at = datetime.strptime(entry.published,
                                           '%Y-%m-%dT%H:%M:%S.%fZ')
        except ValueError:
            created_at = datetime.strptime(
                entry.published,
                '%a, %d %b %Y %H:%M:%S %z')  # e.g.  Fri, 12 Oct ...
        try:
            tags = entry.tags
        except AttributeError:
            tags = []

        # Note: `author_id` should corresponds to a registered user_id.
        ok, post = Post.create_or_update(author_id=1,
                                         title=entry.title,
                                         orig_url=entry.link,
                                         content=strip_tags(content),
                                         created_at=created_at,
                                         tags=[tag.term for tag in tags])
        if ok:
            posts.append(post)
コード例 #2
0
def fetch(url):
    d = feedparser.parse(url)
    entries = d.entries

    for entry in entries:
        try:
            content = entry.content and entry.content[0].value
        except AttributeError:
            content = entry.summary or entry.title
        try:
            created_at = datetime.strptime(entry.published, '%Y-%m-%dT%H:%M:%S.%fZ')
        except ValueError:
            created_at = datetime.strptime(entry.published, '%a, %d %b %Y %H:%M:%S %z')
        try:
            tags = entry.tags
        except AttributeError:
            @dataclass
            class Tag:
                term: str

            tags = random.sample([Tag('python'), Tag('amazon'), Tag('golang'), Tag('php'), Tag('docker'), Tag('wechat'),
                                  Tag('android'), Tag('wiki'), Tag('c++'), Tag('k8s'), Tag('mac'), Tag('linux'),
                                  Tag('HDFS')],
                                 k=random.randint(0, 5))

        ok, _ = Post.create_or_update(
            author_id=6, title=entry.title, orig_url=entry.link,
            content=strip_tags(content), created_at=created_at,
            tags=[tag.term for tag in tags])
コード例 #3
0
 def test_create_post(self):
     ok, post = Post.create_or_update(
         author_id=2, title="帖子",
         content="好高兴呀")
     assert post.url() == '/post/1/'
     assert post.content == "好高兴呀"
     # query_post = Post.query.filter_by(id=post.id).first()
     query_post = Post.query.filter(Post.id == post.id).first()
     assert query_post.url() == '/post/1/'
     # Post继承了db.Model, 而db.Model在ext.py里被设置成BaseModel, BaseModel能正常处理保存在redis中的字段
     assert query_post.content == "好高兴呀"
コード例 #4
0
def get_user_feed(from_id, page):
    """ 获取用户的`FEED_KEY`的文章 """
    feed_key = FEED_KEY.format(from_id)
    update_key = ACTIVITY_UPDATED_KEY.format(from_id)
    if not rdb.get(update_key):
        items = ActivityFeed.get_all()
        if items:
            rdb.zadd(feed_key, dict(items))
        rdb.set(update_key, 1, ex=ONE_MINUTE * 5)
    start = (page - 1) * PER_PAGE
    end = start + PER_PAGE - 1
    post_ids = rdb.zrange(feed_key, start, end)
    items = Post.get_multi([int(id) for id in post_ids])
    total = rdb.zcard(feed_key)
    return Pagination(None, page, PER_PAGE, total, items)
コード例 #5
0
def get_user_feed(user_id, page):
    feed_key = FEED_KEY.format(user_id)
    update_key = ACTIVITY_UPDATED_KEY.format(user_id)
    if rdb.get(update_key):
        items = ActivityFeed.get_all()
        if items:
            rdb.zadd(feed_key, *sum([(int(time), id) for id, time in items],
                                    ()))  # noqa
        rdb.set(update_key, 1, ex=ONE_MINUTE * 5)
    start = (page - 1) * PER_PAGE
    end = start + PER_PAGE
    post_ids = rdb.zrange(feed_key, start, end)
    items = Post.get_multi([int(id) for id in post_ids])
    total = rdb.zcard(feed_key)
    return Pagination(None, page, PER_PAGE, total, items)
コード例 #6
0
def tag(ident):
    ident = ident.lower()
    tag = Tag.get_by_name(ident)
    if not tag:
        tag = Tag.get(ident)
        if not tag:
            abort(404)
    page = request.args.get('page', default=1, type=int)
    type = request.args.get('type', default='hot')  # hot/latest
    if type == 'latest':
        posts = PostTag.get_posts_by_tag(ident, page)
    elif type == 'hot':
        posts = Item.get_post_ids_by_tag(ident, page, type)  # 从Elasticsearch中查找 # noqa
        posts.items = Post.get_multi(posts.items)
    else:
        # 未知类型
        posts = []
    return render_template('tag.html', tag=tag, ident=ident, posts=posts,
                           type=type)  # 模板能忽略post类型的错误,即使传入posts=[]
コード例 #7
0
def tag(identifier):
    identifier = identifier.lower()
    tag = Tag.get_by_name(identifier)
    if not tag:
        tag = Tag.get_or_404(identifier)
    page = request.args.get("page", type=int, default=1)
    type = request.args.get("type", default="latest")
    if type == "latest":
        posts = PostTag.get_post_by_tag(identifier, page)
    elif type == "hot":
        posts = Item.get_post_ids_by_tag(tag, page, order_by="hot")
        posts.items = Post.get_multi(posts.items)
    else:
        posts = []
    return render_template("tag.html",
                           tag=tag,
                           identifier=identifier,
                           posts=posts,
                           type=type)
コード例 #8
0
ファイル: post_api.py プロジェクト: VanLiuZhi/flask_starlight
 def getPostList(self):
     logging.info('getPostList')
     data = request.json
     _fields = ['type', 'page', 'limit', 'query_str', 'orderby']
     type, page, limit, query_str, order_by = get_params_to_dict(
         data, _fields)
     if type == 'normal':
         res = Post.get_posts_list(page, limit, order_by)
         return {
             'items': marshal(res.items, PostListSchema()),
             'total': res.total
         }
     elif type == 'search':
         return self.searchPost(query_str, page, limit, True)
     elif type == 'tag':
         tag = query_str.lower()
         res = PostTag.get_posts_by_tag(tag, page, limit)
         if not res:
             return {'items': [], 'total': 0}
         return {
             'items': marshal(res.items, PostListSchema()),
             'total': res.total
         }
コード例 #9
0
ファイル: index.py プロジェクト: Kungreye/InfoCatcher
def tag(ident):
    ident = ident.lower()
    tag = Tag.get_by_name(ident)
    if not tag:
        tag = Tag.get(ident)
        if not tag:
            abort(404)
    page = request.args.get('page', default=1, type=int)
    type = request.args.get('type', default='hot')  # hot/latest
    if type == 'latest':
        posts = PostTag.get_posts_by_tag(ident, page)  # paginate
    elif type == 'hot':
        posts = Item.get_post_ids_by_tag(ident, page,
                                         type)  # via Elasticsearch
        posts.items = Post.get_multi(posts.items)
    else:
        # Unknown type
        posts = []
    return render_template('tag.html',
                           tag=tag,
                           ident=ident,
                           posts=posts,
                           type=type)  #  `posts` here is pagination object.
コード例 #10
0
ファイル: crawling.py プロジェクト: VanLiuZhi/flask_starlight
def fetch(url):
    d = feedparser.parse(url)
    entries = d.entries

    posts = []

    for entry in entries:
        try:
            content = entry.content and entry.content[0].value
        except AttributeError:
            try:
                content = entry.summary
            except AttributeError:
                content = entry.title
        try:
            created_at = datetime.strptime(entry.published,
                                           '%Y-%m-%dT%H:%M:%S.%fZ')
        except ValueError:
            try:
                created_at = datetime.strptime(entry.published,
                                               '%Y-%m-%dT%H:%M:%S%fZ')
            except ValueError:
                created_at = datetime.strptime(entry.published,
                                               '%a, %d %b %Y %H:%M:%S %z')
        try:
            tags = entry.tags
        except AttributeError:
            tags = ['other']
        ok, post = Post.create_or_update(
            author_id=2,
            title=entry.title or 'other',
            orig_url=entry.link,
            content=strip_tags(content),
            created_at=created_at,
            tags=[tag.term for tag in tags if tag])
        if ok:
            posts.append(post)
コード例 #11
0
ファイル: tasks.py プロジェクト: VanLiuZhi/flask_starlight
def remove_post_from_feed(post_id, author_id):
    post = Post.get(post_id)
    _remove_post_from_feed(post, author_id)
    logger.info(
        f'Remove_post_from_feed post_id:{post_id}, author_id: {author_id}')
コード例 #12
0
def add_to_activity_feed(post_id):
    """ 把热门文章加入到`ACTIVITY_KEY`流中 """
    post = Post.get(post_id)
    ActivityFeed.add(int(post.created_at.timestamp()), post_id)
コード例 #13
0
def post(identifier):
    post = Post.get(identifier)
    return render_template("post.html", post=post)
コード例 #14
0
def remove_post_from_feed(post_id, author_id):
    post = Post.get(post_id)
    remove_post_from_feed_(post, author_id)
    logger.info(
        f"remove_post_from_feed post_id={post_id}, author_id={author_id}")
コード例 #15
0
def feed_post(id):
    post = Post.get(id)
    _feed_post(post)
    logger.info(f'Feed_post {id}')
コード例 #16
0
def post(id):
    post = Post.get_or_404(id)
    return render_template('post.html', post=post)
コード例 #17
0
def add_to_activity_feed(post_id):
    post = Post.get(post_id)

    ActivityFeed.add(int(post.created_at.strftime('%s')), post_id)
    logger.info(f'Add_to_activity_feed post_id:{post_id}')
コード例 #18
0
def feed_post(post_id):
    post = Post.get(post_id)
    feed_post_(post)
    logger.info(f"feed post post_id={post_id}")
コード例 #19
0
ファイル: api_app.py プロジェクト: hjlarry/flask-toutiao
 def _prepare(self, post_id):
     post = Post.get(post_id)
     if not post:
         raise ApiException(httperrors.post_not_found.value)
     return post
コード例 #20
0
def add_to_activity_feed(post_id):
    post = Post.get(post_id)
    ActivityFeed.add(int(post.created_at.strftime("%s")), post_id)
    logger.info(f"add_to_activity_feed post_id={post_id}")
コード例 #21
0
ファイル: index.py プロジェクト: Kungreye/InfoCatcher
def post(id):
    post = Post.get_or_404(id)  # `get_or_4o4` defined in BaseModel
    return render_template('post.html', post=post)