def test_api(): api = Api() @api.site('https://news.ycombinator.com') @api.list('.athing') @api.route('/posts?page={page}', '/news?p={page}') @api.route('/posts', '/news?p=1') class Post(Item): url = Attr('.storylink', 'href') title = Text('.storylink') @api.site('https://news.ycombinator.com') @api.route('/posts?page={page}', '/news?p={page}') @api.route('/posts', '/news?p=1') class Page(Item): next_page = Attr('.morelink', 'href') def clean_next_page(self, value): return api.convert_string('/' + value, '/news?p={page}', request.host_url.strip('/') + '/posts?page={page}') app = App(api.app) with pytest.raises(SystemExit): api.run(port=-1) app.get('/posts?page=1') app.get('/posts?page=1') print(cli.__dict__)
def test_api(): api = Api() @api.site("https://news.ycombinator.com") @api.list(".athing") @api.route("/posts?page={page}", "/news?p={page}") @api.route("/posts", "/news?p=1") class Post(Item): url = Attr(".storylink", "href") title = Text(".storylink") @api.site("https://news.ycombinator.com") @api.route("/posts?page={page}", "/news?p={page}") @api.route("/posts", "/news?p=1") class Page(Item): next_page = Attr(".morelink", "href") def clean_next_page(self, value): return api.convert_string( "/" + value, "/news?p={page}", request.host_url.strip("/") + "/posts?page={page}", ) app = App(api.app) with pytest.raises(SystemExit): api.run(port=-1) app.get("/posts?page=1") app.get("/posts?page=1") print(cli.__dict__)
from htmlparsing import Attr, Text from toapi import Api, Item api = Api() @api.site('https://minwook-shin.github.io') @api.list('.post') @api.route('/api', '/') class Post(Item): url = Attr('.read-more', 'href') title = Text('h1 > a') api.run(debug=True, host='0.0.0.0', port=5000)
from toapi import Api, Item api = Api(browser="/home/bug/桌面/geckodriver") @api.site("https://news.ycombinator.com") @api.list(".athing") @api.route("/posts?page={page}", "/news?p={page}") @api.route("/posts", "/news?p=1") class Post(Item): url = Attr(".storylink", "href") title = Text(".storylink") @api.site("https://news.ycombinator.com") @api.route("/posts?page={page}", "/news?p={page}") @api.route("/posts", "/news?p=1") class Page(Item): next_page = Attr(".morelink", "href") def clean_next_page(self, value): return api.convert_string( "/" + value, "/news?p={page}", request.host_url.strip("/") + "/posts?page={page}", ) api.run(debug=True, host="0.0.0.0", port=5000)
# encoding:utf-8 ''' Toapi 是一个能够将任何 web 网站转化为 api 服务的库。 这是一个让所有网站都提供API的Python库。以前,我们爬取数据,然后把数据存起来,再创造一个api服务以便其他人可以访问。为此,我们还要定期更新我们的数据。 这个库让这一切变得容易起来。你要做的就是定义好你的数据,然后这些数据就会自动地变成可以访问的API。 ''' from flask import request from htmlparsing import Attr, Text from toapi import Api, Item api = Api() @api.site('http://www.itest.info') @api.list('.col-md-3') @api.route('/courses?page={page}', '/courses') @api.route('/courses', '/courses') class Course(Item): url = Attr('a', 'href') title = Text('h4') api.run(debug=True, host='0.0.0.0', port=12306)
img = Attr('a.cook-img', 'style') url = Attr('a.cook-img', 'href') title = Text('div.cook-info > a.cookname') major = Text('div.cook-info > p.major') def clean_img(self, img): re_img = re.compile( 'background: url[(](.*)[)] no-repeat center center;background-size: cover;position: relative;' ) return re_img.match(img).groups()[0] @api.site('https://www.douguo.com') @api.route('/search/{keyword}/{page}', '/search/recipe/{keyword}/0/{page}') @api.route('/search/{keyword}', '/search/recipe/{keyword}') class Page(Item): current = Attr('div.mt20 > div.pages > a.anext', 'href') total = Attr('div.mt20 > div.pages > a.alast', 'href') def clean_total(self, total): return int(total.rsplit('/', 1)[1]) def clean_current(self, current): print(current) return int(current.rsplit('/', 1)[1]) - 20 if __name__ == '__main__': api.run('0.0.0.0', debug=True)