def grab(): """爬取博文""" import logging from spiders import grab_by_spider, spiders logging.basicConfig(filename='grab.log', level=logging.DEBUG) with app.app_context(): new_posts_count = 0 # 通过feed抓取blog for blog in Blog.query: if blog.feed and blog.is_approved: try: new_posts_count += grab_by_feed(blog) except Exception, e: log = GrabLog(message=e, details=traceback.format_exc(), blog_id=blog.id) db.session.add(log) db.session.commit() # 通过spider抓取blog for spider in spiders: try: new_posts_count += grab_by_spider(spider) except Exception, e: blog = Blog.query.filter(Blog.url == spider.url).first_or_404() log = GrabLog(message=e, details=traceback.format_exc(), blog_id=blog.id) db.session.add(log) db.session.commit()
def grab(): """获取最新feed数据""" new_posts_count = 0 flask_app = create_app() with flask_app.app_context(): # 通过feed抓取blog for blog in Blog.query: if blog.feed and blog.is_approved: try: new_posts_count += grab_by_feed(blog) except Exception, e: log = GrabLog(message=e, details=traceback.format_exc(), blog_id=blog.id) db.session.add(log) db.session.commit() # 通过spider抓取blog for spider in spiders: try: new_posts_count += grab_by_spider(spider) except Exception, e: blog = Blog.query.filter(Blog.url == spider.url).first_or_404() log = GrabLog(message=e, details=traceback.format_exc(), blog_id=blog.id) db.session.add(log) db.session.commit()
def grab_feed(): """通过feed爬取博文""" with app.app_context(): for blog in Blog.query: grab_by_feed(blog)
def test_grab(id): blog = Blog.query.get_or_404(id) grab_by_feed(blog)