Esempio n. 1
0
def grab_by_spider(spider_class):
    new_posts_count = 0
    blog = Blog.query.filter(Blog.url == spider_class.url).first()

    # 若blog不存在,则创建
    if not blog:
        blog = Blog(url=spider_class.url, title=spider_class.title, is_approved=True,
                    subtitle=spider_class.subtitle, author=spider_class.author, has_spider=True)
        db.session.add(blog)
        db.session.commit()

    for p in spider_class.get_posts_():
        url = p['url']
        title = p['title']
        published_at = p['published_at']

        post = Post.query.filter(Post.url == url).first()

        # 新文章
        if not post:
            new_posts_count += 1
            content = spider_class.get_post_(url)
            post = Post(url=url, title=title, published_at=published_at, content=content)
            blog.posts.append(post)
            print(" new - %s" % title)
        elif published_at != post.published_at:  # 更新文章
            post.title = title
            post.published_at = published_at
            post.content = spider_class.get_post_(url)
            db.session.add(post)
            print(" update - %s" % title)
    db.session.add(blog)
    db.session.commit()
    return new_posts_count
Esempio n. 2
0
def grab_by_spider(spider_class):
    new_posts_count = 0
    blog = Blog.query.filter(Blog.url == spider_class.url).first()

    # 若blog不存在,则创建
    if not blog:
        blog = Blog(url=spider_class.url, title=spider_class.title, is_approved=True,
                    subtitle=spider_class.subtitle, author=spider_class.author, has_spider=True)
        if spider_class.for_special_purpose:  # 特殊用途
            blog.is_approved = False
            blog.for_special_purpose = True
        db.session.add(blog)
        db.session.commit()

    # logging.debug(blog.title)
    print(blog.title)

    # 检测博客是否在线
    blog.offline = check_offline(blog.url)

    # 用于计算blog最后更新时间
    last_updated_at = datetime.datetime.min

    for p in spider_class.get_posts_():
        url = p['url']
        title = p['title']
        published_at = p['published_at']

        post = Post.query.filter(Post.url == url).first()

        # 新文章
        if not post:
            new_posts_count += 1
            content = spider_class.get_post_(url)
            post = Post(url=url, title=title, published_at=published_at, content=content)
            blog.posts.append(post)
            # logging.debug(" new - %s" % title)
            print(" new - %s" % title)

            # 插入到用户订阅文章中
            for blog_user in blog.blog_users:
                user_read_post = UserReadPost(user_id=blog_user.user_id)
                post.readers.append(user_read_post)
        else:  # 更新文章
            post.title = title
            post.published_at = published_at
            post.content = spider_class.get_post_(url)

        db.session.add(post)

        if published_at > last_updated_at:
            last_updated_at = published_at

    blog.updated_at = last_updated_at
    db.session.add(blog)
    db.session.commit()
    return new_posts_count
Esempio n. 3
0
def grab_by_spider(spider_class):
    new_posts_count = 0
    blog = Blog.query.filter(Blog.url == spider_class.url).first()

    # 若blog不存在,则创建
    if not blog:
        blog = Blog(url=spider_class.url, title=spider_class.title, is_approved=True,
                    subtitle=spider_class.subtitle, author=spider_class.author, has_spider=True)
        if spider_class.for_special_purpose:  # 特殊用途
            blog.is_approved = False
            blog.for_special_purpose = True
        db.session.add(blog)
        db.session.commit()

    # 检测博客是否在线
    blog.offline = check_offline(blog.url)

    for p in spider_class.get_posts_():
        url = p['url']
        title = p['title']
        published_at = p['published_at']

        post = Post.query.filter(Post.url == url).first()

        # 新文章
        if not post:
            new_posts_count += 1
            content = spider_class.get_post_(url)
            post = Post(url=url, title=title, published_at=published_at, content=content)
            blog.posts.append(post)
            print(" new - %s" % title)
        elif published_at != post.published_at:  # 更新文章
            post.title = title
            post.published_at = published_at
            post.content = spider_class.get_post_(url)
            db.session.add(post)
            print(" update - %s" % title)
    db.session.add(blog)
    db.session.commit()
    return new_posts_count
Esempio n. 4
0
def grab_by_spider(spider_class):
    new_posts_count = 0
    blog = Blog.query.filter(Blog.url == spider_class.url).first()

    # 若blog不存在,则创建
    if not blog:
        blog = Blog(url=spider_class.url,
                    title=spider_class.title,
                    is_approved=True,
                    subtitle=spider_class.subtitle,
                    author=spider_class.author,
                    has_spider=True)
        if spider_class.for_special_purpose:  # 特殊用途
            blog.is_approved = False
            blog.for_special_purpose = True
        db.session.add(blog)
        db.session.commit()

    # logging.debug(blog.title)
    print(blog.title)

    # 检测博客是否在线
    blog.offline = check_offline(blog.url)

    # 用于计算blog最后更新时间
    last_updated_at = datetime.datetime.min

    for p in spider_class.get_posts_():
        url = p['url']
        title = p['title']
        published_at = p['published_at']

        post = Post.query.filter(Post.url == url).first()

        # 新文章
        if not post:
            new_posts_count += 1
            content = spider_class.get_post_(url)
            post = Post(url=url,
                        title=title,
                        published_at=published_at,
                        content=content)
            blog.posts.append(post)
            # logging.debug(" new - %s" % title)
            print(" new - %s" % title)

            # 插入到用户订阅文章中
            for blog_user in blog.blog_users:
                user_read_post = UserReadPost(user_id=blog_user.user_id)
                post.readers.append(user_read_post)
        else:  # 更新文章
            post.title = title
            post.published_at = published_at
            post.content = spider_class.get_post_(url)

        db.session.add(post)

        if published_at > last_updated_at:
            last_updated_at = published_at

    blog.updated_at = last_updated_at
    db.session.add(blog)
    db.session.commit()
    return new_posts_count