Example #1
0
def test_api():
    class MySettings(Settings):
        web = {"with_ajax": False}

    api = Api('https://news.ycombinator.com/', settings=MySettings)

    class Post(Item):
        url = XPath('//a[@class="storylink"][1]/@href')
        title = XPath('//a[@class="storylink"][1]/text()')

        class Meta:
            source = XPath('//tr[@class="athing"]')
            route = {'/all?page=:page': '/news?p=:page'}

    class Page(Item):
        next_page = XPath('//a[@class="morelink"]/@href')

        class Meta:
            source = None
            route = {'/all?page=:page': '/news?p=:page'}

        def clean_next_page(self, next_page):
            return "http://127.0.0.1:5000/" + str(next_page)

    api.register(Post)
    api.register(Page)

    api.parse('/news?p=1')
Example #2
0
def test_api_with_ajax():
    from toapi import XPath, Item, Api

    api = Api('https://news.ycombinator.com/', with_ajax=True)

    class Post(Item):
        url = XPath('//a[@class="storylink"][1]/@href')
        title = XPath('//a[@class="storylink"][1]/text()')

        class Meta:
            source = XPath('//tr[@class="athing"]')
            route = '/news\?p=\d+'

    class Page(Item):
        next_page = XPath('//a[@class="morelink"]/@href')

        class Meta:
            source = None
            route = '/news\?p=\d+'

        def clean_next_page(self, next_page):
            return "http://127.0.0.1:5000/" + next_page

    api.register(Post)
    api.register(Page)

    print(api.parse('/news?p=1'))
Example #3
0
def test_api_with_ajax():
    from toapi import XPath, Item, Api

    api = Api('https://news.ycombinator.com/', with_ajax=True)

    class Post(Item):
        url = XPath('//a[@class="storylink"][1]/@href')
        title = XPath('//a[@class="storylink"][1]/text()')

        class Meta:
            source = XPath('//tr[@class="athing"]')
            route = '/news\?p=\d+'

    class Page(Item):
        next_page = XPath('//a[@class="morelink"]/@href')

        class Meta:
            source = None
            route = '/news\?p=\d+'

    api.parse('/news?p=1')
Example #4
0
def test_api_with_ajax():
    from toapi import XPath, Item, Api

    api = Api('https://news.ycombinator.com/', with_ajax=True)

    class Post(Item):
        url = XPath('//a[@class="storylink"][1]/@href')
        title = XPath('//a[@class="storylink"][1]/text()')

        class Meta:
            source = XPath('//tr[@class="athing"]')
            route = '/'

    api.register(Post)

    print(api.parse('/'))
Example #5
0
        else:
            return ''.join(
                [i.text.strip().replace(u'\xa0', '') for i in title])

    def clean_url(self, value):
        return value


api.register(Post)

if __name__ == '__main__':
    headers = {
        'User-Agent':
        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)"
    }
    print(api.parse('/250/?start=25', headers=headers))
    api.serve()
    # Visit http://127.0.0.1:5000/250/
    # http://127.0.0.1:5000/250/?start=25
    # http://127.0.0.1:5000/250/?start=50
    # ...
"""
{
    "post": [
        {
            "title": "肖申克的救赎/The Shawshank Redemption",
            "url": "https://movie.douban.com/subject/1292052/"
        },
        {
            "title": "霸王别姬",
            "url": "https://movie.douban.com/subject/1291546/"
    def clean_title(self, title):
        if isinstance(title, unicode):
            return title.replace(u'\xa0', '')
        else:
            return ''.join(
                [i.text.strip().replace(u'\xa0', '') for i in title])


api.register(Post)

if __name__ == '__main__':
    headers = {
        'User-Agent':
        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)"
    }
    pprint(api.parse('/', headers=headers))
    api.serve()
    # Visit http://127.0.0.1:5000/
    # http://127.0.0.1:5000/?start=25
    # http://127.0.0.1:5000/?start=50
    # ...
"""
{
    "post": [
        {
            "title": "肖申克的救赎/The Shawshank Redemption",
            "url": "https://movie.douban.com/subject/1292052/"
        },
        {
            "title": "霸王别姬",
            "url": "https://movie.douban.com/subject/1291546/"
Example #7
0
            for node in title[0].itertext():
                text += node
            title = text.strip()
        return title

    class Meta:
        source = Css('li.b_algo')
        route = {'/:wd': '/search?q=:wd&ensearch=1'}


class Baidu(Bing):
    __name__ = 'baidu'
    __base_url__ = 'http://www.baidu.com'

    url = Css('h3.t a', attr='href')
    title = Css('h3.t a')

    class Meta:
        source = Css('div.result')
        route = {'/:wd': '/s?wd=:wd&ie=utf-8&vf_bl=1'}


api.register(Baidu)
api.register(Bing)

if __name__ == '__main__':
    print(api.parse('/python'))
    api.serve()

    # Visit http://127.0.0.1:5000/python
Example #8
0
        route = '/'

    def clean_title(self, title):
        if isinstance(title, unicode):
            return title.replace(u'\xa0', '')
        else:
            return ''.join([i.text.strip().replace(u'\xa0', '') for i in title])


api.register(Post)

if __name__ == '__main__':
    headers = {
        'User-Agent': "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)"
    }
    api.parse('/', headers=headers)
    api.serve()
    # Visit http://127.0.0.1:5000/
    # http://127.0.0.1:5000/?start=25
    # http://127.0.0.1:5000/?start=50
    # ...

"""
{
    "post": [
        {
            "title": "肖申克的救赎/The Shawshank Redemption",
            "url": "https://movie.douban.com/subject/1292052/"
        },
        {
            "title": "霸王别姬",