def test_api(): api = Api() @api.site("https://news.ycombinator.com") @api.list(".athing") @api.route("/posts?page={page}", "/news?p={page}") @api.route("/posts", "/news?p=1") class Post(Item): url = Attr(".storylink", "href") title = Text(".storylink") @api.site("https://news.ycombinator.com") @api.route("/posts?page={page}", "/news?p={page}") @api.route("/posts", "/news?p=1") class Page(Item): next_page = Attr(".morelink", "href") def clean_next_page(self, value): return api.convert_string( "/" + value, "/news?p={page}", request.host_url.strip("/") + "/posts?page={page}", ) app = App(api.app) with pytest.raises(SystemExit): api.run(port=-1) app.get("/posts?page=1") app.get("/posts?page=1") print(cli.__dict__)
def test_api(): api = Api() @api.site('https://news.ycombinator.com') @api.list('.athing') @api.route('/posts?page={page}', '/news?p={page}') @api.route('/posts', '/news?p=1') class Post(Item): url = Attr('.storylink', 'href') title = Text('.storylink') @api.site('https://news.ycombinator.com') @api.route('/posts?page={page}', '/news?p={page}') @api.route('/posts', '/news?p=1') class Page(Item): next_page = Attr('.morelink', 'href') def clean_next_page(self, value): return api.convert_string('/' + value, '/news?p={page}', request.host_url.strip('/') + '/posts?page={page}') app = App(api.app) with pytest.raises(SystemExit): api.run(port=-1) app.get('/posts?page=1') app.get('/posts?page=1') print(cli.__dict__)
def test_api_with_ajax(): from toapi import XPath, Item, Api api = Api('https://news.ycombinator.com/', with_ajax=True) class Post(Item): url = XPath('//a[@class="storylink"][1]/@href') title = XPath('//a[@class="storylink"][1]/text()') class Meta: source = XPath('//tr[@class="athing"]') route = '/news\?p=\d+' class Page(Item): next_page = XPath('//a[@class="morelink"]/@href') class Meta: source = None route = '/news\?p=\d+' def clean_next_page(self, next_page): return "http://127.0.0.1:5000/" + next_page api.register(Post) api.register(Page) print(api.parse('/news?p=1'))
def test_alias(): api = Api() assert api.convert_route_to_alias( '/movies/22/?output=utf-8', '/movies/\d{1,2}/?output=:page', '/movies/test/:page.html') == '/movies/test/utf-8.html' assert api.convert_route_to_alias( '/movies/22/?你好=你好', '/movies/\d{1,2}/?你好=:page', '/movies/test/:page.html') == '/movies/test/你好.html' assert api.convert_route_to_alias( '/movies/22/?page=2', '/movies/\d{1,2}/?page=:page', '/movies/test/index_:page.html') == '/movies/test/index_2.html' assert api.convert_route_to_alias( '/movies/?page=2', '/movies/?page=:page', '/html/gndy/dyzz/index_:page.html') == '/html/gndy/dyzz/index_2.html' assert api.convert_route_to_alias( '/movies/you/?page=2', '/movies/:f**k/?page=:page', '/html/gndy/:f**k/index_:page.html') == '/html/gndy/you/index_2.html' assert api.convert_route_to_alias('/movies/you/?page=2&a=1', '/:path', '/:path') == '/movies/you/?page=2&a=1' assert api.convert_route_to_alias('/', '/', '/') == '/'
def test_alias(): api = Api() assert api.convert_route_to_alias( '/movies/?page=2', '/movies/?page=:page', '/html/gndy/dyzz/index_:page.html') == '/html/gndy/dyzz/index_2.html' assert api.convert_route_to_alias( '/movies/you/?page=2', '/movies/:f**k/?page=:page', '/html/gndy/:f**k/index_:page.html') == '/html/gndy/you/index_2.html' assert api.convert_route_to_alias('/movies/you/?page=2&a=1', '/:path', '/:path') == '/movies/you/?page=2&a=1'
def test_api_with_ajax(): from toapi import XPath, Item, Api api = Api('https://news.ycombinator.com/', with_ajax=True) class Post(Item): url = XPath('//a[@class="storylink"][1]/@href') title = XPath('//a[@class="storylink"][1]/text()') class Meta: source = XPath('//tr[@class="athing"]') route = '/' api.register(Post) print(api.parse('/'))
def test_api_with_ajax(): from toapi import XPath, Item, Api api = Api('https://news.ycombinator.com/', with_ajax=True) class Post(Item): url = XPath('//a[@class="storylink"][1]/@href') title = XPath('//a[@class="storylink"][1]/text()') class Meta: source = XPath('//tr[@class="athing"]') route = '/news\?p=\d+' class Page(Item): next_page = XPath('//a[@class="morelink"]/@href') class Meta: source = None route = '/news\?p=\d+' api.parse('/news?p=1')
def test_api(): class MySettings(Settings): web = {"with_ajax": False} api = Api('https://news.ycombinator.com/', settings=MySettings) class Post(Item): url = XPath('//a[@class="storylink"][1]/@href') title = XPath('//a[@class="storylink"][1]/text()') class Meta: source = XPath('//tr[@class="athing"]') route = {'/all?page=:page': '/news?p=:page'} class Page(Item): next_page = XPath('//a[@class="morelink"]/@href') class Meta: source = None route = {'/all?page=:page': '/news?p=:page'} def clean_next_page(self, next_page): return "http://127.0.0.1:5000/" + str(next_page) api.register(Post) api.register(Page) api.parse('/news?p=1')
def test_error(): api = Api() @api.site("https://news.ycombinator.com") @api.list(".athing") @api.route("/posts?page={page}", "/news?p={page}") @api.route("/posts", "/news?p=1") class Post(Item): url = Attr(".storylink", "no this attribute") title = Text(".storylink") app = App(api.app) with pytest.raises(Exception): app.get("/posts?page=1")
def test_error(): api = Api() @api.site('https://news.ycombinator.com') @api.list('.athing') @api.route('/posts?page={page}', '/news?p={page}') @api.route('/posts', '/news?p=1') class Post(Item): url = Attr('.storylink', 'no this attribute') title = Text('.storylink') app = App(api.app) with pytest.raises(Exception): app.get('/posts?page=1')
from toapi import Api from items.page import Page from items.post import Post from items.gio_district import GIO from items.gio_detail import GIO_DETAIL from settings import MySettings api = Api('http://sh.lianjia.com', settings=MySettings) api.register(Page) api.register(Post) api.register(GIO) api.register(GIO_DETAIL) if __name__ == '__main__': api.serve()
from toapi import XPath, Item, Api, Settings class MySettings(Settings): web = { "with_ajax": False, "request_config": {}, "headers": None } api = Api('http://gaoqing.la/', settings=MySettings) class Movie(Item): url = XPath('//a[@class="zoom"]/@href') title = XPath('//a[@class="zoom"]/@title') class Meta: source = XPath('//*[@id="post_container"]/li') route = '/' api.register(Movie) api.serve()
from toapi import Api from items.hotbook import HotBook from items.book import Book from settings import MySettings api = Api('https://news.ycombinator.com', settings=MySettings) api.register(HotBook) api.register(Book) if __name__ == '__main__': api.serve()
from toapi import XPath, Item, Api api = Api() class Movie(Item): __base_url__ = 'http://www.dy2018.com' url = XPath('//b//a[@class="ulink"]/@href') title = XPath('//b//a[@class="ulink"]/text()') class Meta: source = XPath('//table[@class="tbspan"]') route = '/html/gndy/dyzz/index_\d+.html' class Post(Item): __base_url__ = 'https://news.ycombinator.com' url = XPath('//a[@class="storylink"]/@href') title = XPath('//a[@class="storylink"]/text()') class Meta: source = XPath('//tr[@class="athing"]') route = '/news\?p=\d+' class Page(Item): __base_url__ = 'https://news.ycombinator.com' next_page = XPath('//a[@class="morelink"]/@href')
#!/usr/bin/env python from toapi import Api from items.image_info import ImageInfo from items.user import User from settings import MySettings api = Api('https://www.instagram.com', settings=MySettings) api.register(ImageInfo) api.register(User) api.serve(ip='0.0.0.0', port='5000')
def test_storage(): api = Api() api.set_storage('a', '1') assert api.get_storage('a') == '1'
def test_cache(): api = Api() assert api.get_cache('a') is None api.set_cache('a', '1') assert api.get_cache('a') == '1'
from toapi import XPath, Item, Api api = Api('https://news.ycombinator.com/') class Post(Item): url = XPath('//a[@class="storylink"][1]/@href') title = XPath('//a[@class="storylink"][1]/text()') class Meta: source = XPath('//tr[@class="athing"]') route = '/' api.register(Post) api.serve() # Visit http://127.0.0.1:5000/
from toapi import XPath, Item, Api api = Api('http://gaoqing.la/', with_ajax=True) class Movie(Item): url = XPath('//a[@class="zoom"]/@href') title = XPath('//a[@class="zoom"]/@title') class Meta: source = XPath('//*[@id="post_container"]/li') route = '/' api.register(Movie) api.serve()
from toapi import Api from items.hotbook import HotBook from items.book import Book from settings import MySettings api = Api('https://91baby.mama.cn/search.php?searchsubmit=yes', settings=MySettings) api.register(HotBook) api.register(Book) if __name__ == '__main__': api.serve()
from toapi import XPath, Item, Api api = Api(base_url='http://www.dy2018.com') class MovieList(Item): url = XPath('//b//a[@class="ulink"]/@href') title = XPath('//b//a[@class="ulink"]/text()') class Meta: source = XPath('//table[@class="tbspan"]') route = { '/movies/?page=1': '/html/gndy/dyzz/', '/movies/?page=:page': '/html/gndy/dyzz/index_:page.html', '/movies/': '/html/gndy/dyzz/' } def clean_url(self, url): return '/movies/{}/'.format(url.split('/')[-1].split('.')[0]) class Movie(Item): download = XPath('//*[@id="Zoom"]/table[1]//a/text()') class Meta: source = None route = {'/movies/:id': '/i/:id.html'} api.register(MovieList) api.register(Movie)
#!/usr/bin/env python from toapi import Api from items.index import IndexOne, IndexArticle, IndexQuestion from items.article import Article from items.question import Question from items.one import One from settings import MySettings api = Api(settings=MySettings) api.register(IndexOne) api.register(IndexArticle) api.register(IndexQuestion) api.register(Article) api.register(Question) api.register(One) if __name__ == '__main__': api.serve()
from toapi import Api from items.pexels import Pexels from items.pixabay import Pixabay from settings import MySettings api = Api(settings=MySettings) api.register(Pixabay) api.register(Pexels) if __name__ == '__main__': api.serve()
from toapi import Css, Item, Api try: bool(type(unicode)) except NameError: unicode = str api = Api('https://movie.douban.com/top250') class Post(Item): url = Css('div.hd>a', attr='href') title = Css('span.title') class Meta: source = Css('div.item', attr='target') route = (('/250/?start=:start', '/?start=:start'), ('/250/', '/')) def clean_title(self, title): if isinstance(title, unicode): return title.replace(u'\xa0', '') else: return ''.join( [i.text.strip().replace(u'\xa0', '') for i in title]) def clean_url(self, value): return value api.register(Post)
#!/usr/bin/env python from toapi import Api from items.image_info import ImageInfo from items.user import User from settings import MySettings api = Api(settings=MySettings) api.register(ImageInfo) api.register(User) if __name__ == '__main__': api.serve()
from toapi import XPath, Item, Api api = Api('https://news.ycombinator.com/', with_ajax=True) class Post(Item): url = XPath('//a[@class="storylink"][1]/@href') title = XPath('//a[@class="storylink"][1]/text()') class Meta: source = XPath('//tr[@class="athing"]') route = '/news\?p=\d+' class Page(Item): next_page = XPath('//a[@class="morelink"]/@href') class Meta: source = None route = '/news\?p=\d+' api.register(Post) api.register(Page) api.serve() # Visit http://127.0.0.1:5000/news?p=1 """ { "page": {
from toapi import Api from items.google import Google from items.bing import Bing from items.baidu import Baidu from settings import MySettings api = Api(settings=MySettings) api.register(Google) api.register(Bing) api.register(Baidu) if __name__ == '__main__': api.serve()
from htmlparsing import Attr, Text from toapi import Api, Item api = Api() @api.site('https://minwook-shin.github.io') @api.list('.post') @api.route('/api', '/') class Post(Item): url = Attr('.read-more', 'href') title = Text('h1 > a') api.run(debug=True, host='0.0.0.0', port=5000)
def test_status(): api = Api() assert api.get_status('a') == 1 api.update_status('a') assert api.get_status('a') == 2
from toapi import Api from .items.page import Page from .items.post import Post from .items.user import User from .settings import MySettings api = Api('https://news.ycombinator.com/', settings=MySettings) api.register(User) api.register(Page) api.register(Post) api.serve(ip='0.0.0.0', port=5000)