Beispiel #1
0
def test_api():
    api = Api()

    @api.site('https://news.ycombinator.com')
    @api.list('.athing')
    @api.route('/posts?page={page}', '/news?p={page}')
    @api.route('/posts', '/news?p=1')
    class Post(Item):
        url = Attr('.storylink', 'href')
        title = Text('.storylink')

    @api.site('https://news.ycombinator.com')
    @api.route('/posts?page={page}', '/news?p={page}')
    @api.route('/posts', '/news?p=1')
    class Page(Item):
        next_page = Attr('.morelink', 'href')

        def clean_next_page(self, value):
            return api.convert_string('/' + value, '/news?p={page}', request.host_url.strip('/') + '/posts?page={page}')

    app = App(api.app)
    with pytest.raises(SystemExit):
        api.run(port=-1)
    app.get('/posts?page=1')
    app.get('/posts?page=1')
    print(cli.__dict__)
Beispiel #2
0
def test_api():
    api = Api()

    @api.site("https://news.ycombinator.com")
    @api.list(".athing")
    @api.route("/posts?page={page}", "/news?p={page}")
    @api.route("/posts", "/news?p=1")
    class Post(Item):
        url = Attr(".storylink", "href")
        title = Text(".storylink")

    @api.site("https://news.ycombinator.com")
    @api.route("/posts?page={page}", "/news?p={page}")
    @api.route("/posts", "/news?p=1")
    class Page(Item):
        next_page = Attr(".morelink", "href")

        def clean_next_page(self, value):
            return api.convert_string(
                "/" + value,
                "/news?p={page}",
                request.host_url.strip("/") + "/posts?page={page}",
            )

    app = App(api.app)
    with pytest.raises(SystemExit):
        api.run(port=-1)
    app.get("/posts?page=1")
    app.get("/posts?page=1")
    print(cli.__dict__)
Beispiel #3
0
from htmlparsing import Attr, Text
from toapi import Api, Item

api = Api()


@api.site('https://minwook-shin.github.io')
@api.list('.post')
@api.route('/api', '/')
class Post(Item):
    url = Attr('.read-more', 'href')
    title = Text('h1 > a')


api.run(debug=True, host='0.0.0.0', port=5000)
Beispiel #4
0
from toapi import Api, Item

api = Api(browser="/home/bug/桌面/geckodriver")


@api.site("https://news.ycombinator.com")
@api.list(".athing")
@api.route("/posts?page={page}", "/news?p={page}")
@api.route("/posts", "/news?p=1")
class Post(Item):
    url = Attr(".storylink", "href")
    title = Text(".storylink")


@api.site("https://news.ycombinator.com")
@api.route("/posts?page={page}", "/news?p={page}")
@api.route("/posts", "/news?p=1")
class Page(Item):
    next_page = Attr(".morelink", "href")

    def clean_next_page(self, value):
        return api.convert_string(
            "/" + value,
            "/news?p={page}",
            request.host_url.strip("/") + "/posts?page={page}",
        )


api.run(debug=True, host="0.0.0.0", port=5000)
Beispiel #5
0
# encoding:utf-8
'''
Toapi 是一个能够将任何 web 网站转化为 api 服务的库。

这是一个让所有网站都提供API的Python库。以前,我们爬取数据,然后把数据存起来,再创造一个api服务以便其他人可以访问。为此,我们还要定期更新我们的数据。
这个库让这一切变得容易起来。你要做的就是定义好你的数据,然后这些数据就会自动地变成可以访问的API。
'''
from flask import request
from htmlparsing import Attr, Text
from toapi import Api, Item

api = Api()


@api.site('http://www.itest.info')
@api.list('.col-md-3')
@api.route('/courses?page={page}', '/courses')
@api.route('/courses', '/courses')
class Course(Item):
    url = Attr('a', 'href')
    title = Text('h4')


api.run(debug=True, host='0.0.0.0', port=12306)
Beispiel #6
0
    img = Attr('a.cook-img', 'style')
    url = Attr('a.cook-img', 'href')
    title = Text('div.cook-info > a.cookname')
    major = Text('div.cook-info > p.major')

    def clean_img(self, img):
        re_img = re.compile(
            'background: url[(](.*)[)] no-repeat center center;background-size: cover;position: relative;'
        )
        return re_img.match(img).groups()[0]


@api.site('https://www.douguo.com')
@api.route('/search/{keyword}/{page}', '/search/recipe/{keyword}/0/{page}')
@api.route('/search/{keyword}', '/search/recipe/{keyword}')
class Page(Item):

    current = Attr('div.mt20 > div.pages > a.anext', 'href')
    total = Attr('div.mt20 > div.pages > a.alast', 'href')

    def clean_total(self, total):
        return int(total.rsplit('/', 1)[1])

    def clean_current(self, current):
        print(current)
        return int(current.rsplit('/', 1)[1]) - 20


if __name__ == '__main__':
    api.run('0.0.0.0', debug=True)