Example #1
0
import aiohttp

from Spider import Request
from Spider import Spider
import PageParse
import argparse

ARGS = argparse.ArgumentParser(description="caoliu spider")
ARGS.add_argument("--pages", action='store', type=int,
                  default=1, help='Limit page to spider')
ARGS.add_argument("--max_tries", action='store', type=int,
                  default=30, help='Limit retries on network errors')
ARGS.add_argument("--root_dir", action='store',
                  default='./download', help='directory store picture and torrent')
ARGS.add_argument("--max_tasks", action='store', type=int,
                  default=20, help='Limit concurrent connections')

ROOT_DIR = "/media/mosaic/软件/git-myspider/cl_spider/source/"

args = ARGS.parse_args()

loop = asyncio.get_event_loop()
spider = Spider(max_tries=args.max_tries, max_tasks=args.max_tasks)
PageParse.start(spider, 1, args.pages+1,  root_dir=args.root_dir)
loop.run_until_complete(spider.spider())
spider.close()
loop.stop()
loop.run_forever()
loop.close()

Example #2
0
    def __init__(self, spider, url, path, params=None):
        self.path = path
        Request.__init__(self,
                         spider=spider,
                         url=url,
                         params=params,
                         content_type="binary")

    def handle_func(self, content):
        with open(self.path, "wb") as f:
            f.write(content)
            print("%s============>下载完成!" % (self.path))


def start(spider, start_page=1, end_page=2, root_dir="./"):
    print("enter %s" % root_dir)
    os.chdir(root_dir)
    for i in range(start_page, end_page)[::-1]:
        request = IndexPageRequest(spider, CL_URL % i)


if __name__ == "__main__":
    loop = asyncio.get_event_loop()
    spider = Spider(page_num=5, max_tries=30, max_tasks=20, rootDir='./source')
    request = IndexPageRequest(spider, CL_URL % 1)
    loop.run_until_complete(spider.spider())
    spider.close()
    loop.stop()
    loop.run_forever()
    loop.close()