예제 #1
0
def main():
    asyncio.set_event_loop(None)
    if args.iocp:
        from asyncio.windows_events import ProactorEventLoop
        loop = ProactorEventLoop()
    else:
        loop = asyncio.new_event_loop()
    sslctx = None
    if args.tls:
        import ssl
        # TODO: take cert/key from args as well.
        here = os.path.join(os.path.dirname(__file__), '..', 'tests')
        sslctx = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
        sslctx.options |= ssl.OP_NO_SSLv2
        sslctx.load_cert_chain(
            certfile=os.path.join(here, 'ssl_cert.pem'),
            keyfile=os.path.join(here, 'ssl_key.pem'))
    cache = Cache(loop)
    task = asyncio.streams.start_server(cache.handle_client,
                                        args.host, args.port,
                                        ssl=sslctx, loop=loop)
    svr = loop.run_until_complete(task)
    for sock in svr.sockets:
        logging.info('socket %s', sock.getsockname())
    loop.run_forever()
예제 #2
0
def main():
    asyncio.set_event_loop(None)
    if args.iocp:
        from asyncio.windows_events import ProactorEventLoop
        loop = ProactorEventLoop()
    else:
        loop = asyncio.new_event_loop()
    sslctx = None
    if args.tls:
        import ssl
        # TODO: take cert/key from args as well.
        here = os.path.join(os.path.dirname(__file__), '..', 'tests')
        sslctx = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
        sslctx.options |= ssl.OP_NO_SSLv2
        sslctx.load_cert_chain(certfile=os.path.join(here, 'ssl_cert.pem'),
                               keyfile=os.path.join(here, 'ssl_key.pem'))
    cache = Cache(loop)
    task = asyncio.streams.start_server(cache.handle_client,
                                        args.host,
                                        args.port,
                                        ssl=sslctx,
                                        loop=loop)
    svr = loop.run_until_complete(task)
    for sock in svr.sockets:
        logging.info('socket %s', sock.getsockname())
    loop.run_forever()
예제 #3
0
파일: fetch3.py 프로젝트: 1st1/asyncio
def main():
    if '--iocp' in sys.argv:
        from asyncio.windows_events import ProactorEventLoop
        loop = ProactorEventLoop()
        set_event_loop(loop)
    else:
        loop = get_event_loop()
    try:
        body = loop.run_until_complete(fetch(sys.argv[1], '-v' in sys.argv))
    finally:
        loop.close()
    sys.stdout.buffer.write(body)
def main():
    if '--iocp' in sys.argv:
        from asyncio.windows_events import ProactorEventLoop
        loop = ProactorEventLoop()
        set_event_loop(loop)
    else:
        loop = get_event_loop()
    try:
        body = loop.run_until_complete(fetch(sys.argv[1], '-v' in sys.argv))
    finally:
        loop.close()
    sys.stdout.buffer.write(body)
예제 #5
0
def main():
    """Main program.

    Parse arguments, set up event loop, run crawler, print report.
    """
    args = ARGS.parse_args()
    levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG]
    logging.basicConfig(level=levels[min(args.level, len(levels) - 1)])

    if args.iocp:
        from asyncio.windows_events import ProactorEventLoop
        loop = ProactorEventLoop()
        asyncio.set_event_loop(loop)
    elif args.select:
        loop = asyncio.SelectorEventLoop()
        asyncio.set_event_loop(loop)
    else:
        loop = asyncio.get_event_loop()

    if not args.roots:
        r = redis.StrictRedis(host='localhost', port=6379, db=0)
        data = [r.blpop('queue:urls_to_crawl')]
        # data.append(r.blpop('queue:urls_to_crawl'))
        # data.append(r.blpop('queue:urls_to_crawl'))
        roots, scrape_data = init_data(data)
        s = None  #Scraper(scrape_data)
    else:
        roots = {fix_url(root) for root in args.roots}
        s = None

    crawler = crawling.Crawler(roots,
                               scraper=s,
                               data_handler=None,
                               exclude=args.exclude,
                               strict=args.strict,
                               max_redirect=args.max_redirect,
                               max_tries=args.max_tries,
                               max_tasks=args.max_tasks)
    try:
        loop.run_until_complete(crawler.crawl())  # Crawler gonna crawl.
    except KeyboardInterrupt:
        sys.stderr.flush()
        print('\nInterrupted\n')
    finally:
        reporting.report(crawler)  ########## REPORTING
        crawler.close()

        # next two lines are required for actual aiohttp resource cleanup
        loop.stop()
        loop.run_forever()

        loop.close()
예제 #6
0
파일: sink.py 프로젝트: 1st1/asyncio
def main():
    global args
    args = ARGS.parse_args()
    if args.iocp:
        from asyncio.windows_events import ProactorEventLoop
        loop = ProactorEventLoop()
        set_event_loop(loop)
    else:
        loop = get_event_loop()
    try:
        loop.run_until_complete(start(loop, args.host, args.port))
    finally:
        loop.close()
예제 #7
0
    def test(self):
        """ Play PyChess-PyChess 1 min variant games """

        if sys.platform == "win32":
            from asyncio.windows_events import ProactorEventLoop
            loop = ProactorEventLoop()
            asyncio.set_event_loop(loop)
        else:
            loop = asyncio.SelectorEventLoop()
            asyncio.set_event_loop(loop)

        loop = asyncio.get_event_loop()
        loop.set_debug(enabled=True)

        for vari in PYCHESS_VARIANTS:
            variant = variants[vari]

            def coro():
                self.p0 = yield from discoverer.initEngine(self.engine, WHITE, False)
                self.p1 = yield from discoverer.initEngine(self.engine, BLACK, False)

            loop.run_until_complete(coro())

            def optionsCallback(engine):
                engine.setOptionVariant(variant)
                engine.setOptionStrength(1, False)
                engine.setOptionTime(60, 0, 0)

            self.p0.connect("readyForOptions", optionsCallback)
            self.p1.connect("readyForOptions", optionsCallback)

            def coro(variant):
                self.game = GameModel(TimeModel(60, 0), variant)
                self.game.setPlayers([self.p0, self.p1])

                def on_game_end(game, state, event):
                    event.set()

                event = asyncio.Event()
                self.game.connect("game_ended", on_game_end, event)

                self.p0.prestart()
                self.p1.prestart()

                if self.game.variant.need_initial_board:
                    for player in self.game.players:
                        player.setOptionInitialBoard(self.game)

                print(variant.name)
                self.game.start()

                yield from event.wait()

                pgn = StringIO()
                print(save(pgn, self.game))

                self.assertIsNone(self.p0.invalid_move)
                self.assertIsNone(self.p1.invalid_move)

            loop.run_until_complete(coro(variant))
예제 #8
0
def main():
    """Main program.

    Parse arguments, set up event loop, run crawler, print report.
    """
    args = ARGS.parse_args()
    if not args.roots:
        print('Use --help for command line help')
        return

    levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG]
    logging.basicConfig(level=levels[min(args.level, len(levels) - 1)])

    if args.iocp:
        from asyncio.windows_events import ProactorEventLoop
        loop = ProactorEventLoop()
        asyncio.set_event_loop(loop)
    elif args.select:
        loop = asyncio.SelectorEventLoop()
        asyncio.set_event_loop(loop)
    else:
        loop = asyncio.get_event_loop()

    roots = {fix_url(root) for root in args.roots}

    crawler = crawling.Crawler(
        roots,
        exclude=args.exclude,
        strict=args.strict,
        max_redirect=args.max_redirect,
        max_tries=args.max_tries,
        max_tasks=args.max_tasks,
        proxy=args.proxy,
        loop=loop,
    )
    try:
        loop.run_until_complete(crawler.crawl())  # Crawler gonna crawl.
    except KeyboardInterrupt:
        sys.stderr.flush()
        print('\nInterrupted\n')
    finally:
        now = datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S')
        with open("{}.log".format(now), "w") as f:
            reporting.report(crawler, file=f)

        # next two lines are required for actual aiohttp resource cleanup
        loop.stop()
        loop.run_forever()

        loop.close()
예제 #9
0
def main():
    """Main program.

    Parse arguments, set up event loop, run crawler, print report.
    """
    args = ARGS.parse_args()
    levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG]
    logging.basicConfig(level=levels[min(args.level, len(levels) - 1)])

    if args.iocp:
        from asyncio.windows_events import ProactorEventLoop

        loop = ProactorEventLoop()
        asyncio.set_event_loop(loop)
    elif args.select:
        loop = asyncio.SelectorEventLoop()
        asyncio.set_event_loop(loop)
    else:
        loop = asyncio.get_event_loop()

    if not args.roots:
        r = redis.StrictRedis(host="localhost", port=6379, db=0)
        data = [r.blpop("queue:urls_to_crawl")]
        # data.append(r.blpop('queue:urls_to_crawl'))
        # data.append(r.blpop('queue:urls_to_crawl'))
        roots, scrape_data = init_data(data)
        s = None  # Scraper(scrape_data)
    else:
        roots = {fix_url(root) for root in args.roots}
        s = None

    crawler = crawling.Crawler(
        roots,
        scraper=s,
        data_handler=None,
        exclude=args.exclude,
        strict=args.strict,
        max_redirect=args.max_redirect,
        max_tries=args.max_tries,
        max_tasks=args.max_tasks,
    )
    try:
        loop.run_until_complete(crawler.crawl())  # Crawler gonna crawl.
    except KeyboardInterrupt:
        sys.stderr.flush()
        print("\nInterrupted\n")
    finally:
        reporting.report(crawler)  ########## REPORTING
        crawler.close()

        # next two lines are required for actual aiohttp resource cleanup
        loop.stop()
        loop.run_forever()

        loop.close()
예제 #10
0
파일: crawl.py 프로젝트: 83286415/500lines
def main():
    """Main program.

    Parse arguments, set up event loop, run crawler, print report.
    """
    args = ARGS.parse_args()
    if not args.roots:
        print('Use --help for command line help')
        return

    levels = [logging.ERROR, logging.WARN, logging.INFO,
              logging.DEBUG]  # ERROR=0 WARN=1 越小越严重
    logging.basicConfig(level=levels[min(args.level, len(levels) - 1)])

    # 以下条件语句内区分了不同的循环方式,IOCP,select等,涉及系统底层socket操作,代码层面略。
    if args.iocp:
        from asyncio.windows_events import ProactorEventLoop
        loop = ProactorEventLoop()
        asyncio.set_event_loop(loop)
    elif args.select:  # 效率较低
        loop = asyncio.SelectorEventLoop()
        asyncio.set_event_loop(loop)
    else:
        loop = asyncio.get_event_loop()  # 默认循环方式

    roots = {fix_url(root) for root in args.roots}  # args.roots is a list

    crawler = crawling.Crawler(
        roots,
        exclude=args.exclude,
        strict=args.strict,
        max_redirect=args.max_redirect,
        max_tries=args.max_tries,
        max_tasks=args.max_tasks,
    )
    try:
        loop.run_until_complete(crawler.crawl())  # Crawler gonna crawl.
    except KeyboardInterrupt:
        sys.stderr.flush()  # 清理内存
        print('\nInterrupted\n')
    finally:
        reporting.report(crawler)  # 打印爬取结果,或输出结果到文件
        crawler.close()  # aiohttp loop close

        # next two lines are required for actual aiohttp resource cleanup
        loop.stop()
        loop.run_forever()  # clean up process

        loop.close()  # 移除signal处理器
예제 #11
0
def main():
    """Main program.

    Parse arguments, set up event loop, run crawler, print report.
    """
    args = ARGS.parse_args()
    print(args)
    if not args.roots:
        print('Use --help for command line help')
        return

    levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG]
    logging.basicConfig(level=levels[min(args.level, len(levels)-1)])   #实际未打印结果

    if args.iocp:
        from asyncio.windows_events import ProactorEventLoop
        loop = ProactorEventLoop()
        asyncio.set_event_loop(loop)
    elif args.select:
        loop = asyncio.SelectorEventLoop()
        asyncio.set_event_loop(loop)
    else:
        loop = asyncio.get_event_loop()

    roots = {fix_url(root) for root in args.roots}
    
    #创建crawler实例时,实现了add_url操作
    crawler = Crawler(roots,
                               exclude=args.exclude,
                               strict=args.strict,
                               max_redirect=args.max_redirect,
                               max_tries=args.max_tries,
                               max_tasks=args.max_tasks,
                               )
    try:
        loop.run_until_complete(crawler.crawl())  # Crawler gonna crawl.
    except KeyboardInterrupt:
        sys.stderr.flush()
        print('\nInterrupted\n')
    finally:
        report(crawler)
        crawler.close()

        # next two lines are required for actual aiohttp resource cleanup
        #loop.stop()
        #loop.run_forever()

        loop.close()
예제 #12
0
def main():
    '''
    Main program. Parse arguments, set up event loop, run crawler, print
    report.
    '''
    args = ARGS.parse_args()

    levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG]
    logging.basicConfig(level=levels[min(args.level, len(levels) - 1)])

    if args.iocp:
        from asyncio.windows_events import ProactorEventLoop
        loop = ProactorEventLoop()
        asyncio.set_event_loop(loop)
    elif args.select:
        loop = asyncio.SelectorEventLoop()
        asyncio.set_event_loop(loop)
    else:
        loop = asyncio.get_event_loop()

    if args.out:
        f = open(args.out, 'w')
    else:
        f = None

    roots = {fix_url(root) for root in args.roots}
    try:
        loop.run_until_complete(
            run_crawler(loop=loop,
                        roots=roots,
                        exclude=args.exclude,
                        strict=args.strict,
                        max_redirect=args.max_redirect,
                        max_tries=args.max_tries,
                        max_tasks=args.max_tasks,
                        login_url=args.login_url,
                        login_data=args.login_data,
                        file=f))
    except KeyboardInterrupt:
        sys.stderr.flush()
        print('\nInterrupted\n')
    finally:
        loop.stop()
        loop.run_forever()
        loop.close()
        if f is not None:
            f.close()
예제 #13
0
def get_loop(select=False):
    if os.name == 'nt':
        from asyncio.windows_events import ProactorEventLoop
        loop = ProactorEventLoop()
    elif select:
        loop = asyncio.SelectorEventLoop()
    else:
        loop = asyncio.get_event_loop()
    return loop
예제 #14
0
파일: cacheclt.py 프로젝트: 1st1/asyncio
def main():
    asyncio.set_event_loop(None)
    if args.iocp:
        from asyncio.windows_events import ProactorEventLoop
        loop = ProactorEventLoop()
    else:
        loop = asyncio.new_event_loop()
    sslctx = None
    if args.tls:
        sslctx = test_utils.dummy_ssl_context()
    cache = CacheClient(args.host, args.port, sslctx=sslctx, loop=loop)
    try:
        loop.run_until_complete(
            asyncio.gather(
                *[testing(i, cache, loop) for i in range(args.ntasks)],
                loop=loop))
    finally:
        loop.close()
예제 #15
0
파일: crawl.py 프로젝트: 1st1/asyncio
def main():
    """Main program.

    Parse arguments, set up event loop, run crawler, print report.
    """
    args = ARGS.parse_args()
    if not args.roots:
        print('Use --help for command line help')
        return

    log = Logger(args.level)

    if args.iocp:
        from asyncio.windows_events import ProactorEventLoop
        loop = ProactorEventLoop()
        asyncio.set_event_loop(loop)
    elif args.select:
        loop = asyncio.SelectorEventLoop()
        asyncio.set_event_loop(loop)
    else:
        loop = asyncio.get_event_loop()

    roots = {fix_url(root) for root in args.roots}

    crawler = Crawler(log,
                      roots, exclude=args.exclude,
                      strict=args.strict,
                      max_redirect=args.max_redirect,
                      max_tries=args.max_tries,
                      max_tasks=args.max_tasks,
                      max_pool=args.max_pool,
                      )
    try:
        loop.run_until_complete(crawler.crawl())  # Crawler gonna crawl.
    except KeyboardInterrupt:
        sys.stderr.flush()
        print('\nInterrupted\n')
    finally:
        crawler.report()
        crawler.close()
        loop.close()
예제 #16
0
파일: aspider.py 프로젝트: timedcy/aspider
def main(loop=None):
    """Main program.

    Parse arguments, set up event loop, run crawler, print report.
    """
    out_loop = True if loop else False
    args = ARGS.parse_args()
    if not args.roots:
        print('Use --help for command line help')
        return
    levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG]
    logging.basicConfig(level=levels[min(args.level, len(levels)-1)])

    root_path = args.roots[0]
    router = routeing.get_router()
    logging.debug('resume the loop')
    router.resume(loop)
    router.add_root_path(root_path)
    logging.debug(f'set up router with root_path {root_path}')
    if not loop:
        if args.iocp:
            from asyncio.windows_events import ProactorEventLoop
            loop = ProactorEventLoop()
            asyncio.set_event_loop(loop)
        elif args.select:
            loop = asyncio.SelectorEventLoop()
            asyncio.set_event_loop(loop)
        else:
            loop = asyncio.get_event_loop()

    loop.set_exception_handler(exception_handler)

    roots = {fix_url(root) for root in args.roots}

    crawler = crawling.Crawler(roots,
                               exclude=args.exclude,
                               strict=args.strict,
                               max_redirect=args.max_redirect,
                               max_tries=args.max_tries,
                               max_tasks=args.max_tasks,
                               loop=loop
                               )
    if out_loop:
        asyncio.ensure_future(crawler.crawl(), loop=loop)
        asyncio.ensure_future(do_async_report(router, crawler), loop=loop)
    else:
        try:
            loop.run_until_complete(crawler.crawl())  # Crawler gonna crawl.
        except KeyboardInterrupt:
            # sys.stderr.flush()
            pass
        finally:
            reporting.report(crawler)
            print('ALL DONE NOW')
예제 #17
0
def main():
    global args
    args = ARGS.parse_args()
    if args.iocp:
        from asyncio.windows_events import ProactorEventLoop
        loop = ProactorEventLoop()
        set_event_loop(loop)
    else:
        loop = get_event_loop()
    try:
        loop.run_until_complete(start(loop, args))
    finally:
        loop.close()
예제 #18
0
def main():
    "Parse arguments, set up event loop, run crawler and print a report."

    levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG]
    if args["--quiet"]:
        logging.basicConfig(level=levels[0])
    else:
        logging.basicConfig(level=levels[int(args["--verbose"])])

    # Not sure how to set --strict to True by default with docopts. So this is
    # where we handle strict vs lenient.
    if args["--lenient"]:
        args["--strict"] = False
    else:
        args["--strict"] = True

    if args["--iocp"]:
        from asyncio.windows_events import ProactorEventLoop
        loop = ProactorEventLoop()
        asyncio.set_event_loop(loop)
    elif args["--select"]:
        loop = asyncio.SelectorEventLoop()
        asyncio.set_event_loop(loop)
    else:
        loop = asyncio.get_event_loop()

    # Set comprehension to avoid redundancy.
    roots = {fix_url(root) for root in args["<root>"]}

    # Instantiating the crawler with our arguments.
    crawler = crawling.Crawler(roots,
                               exclude=args["--exclude"],
                               strict=args["--strict"],
                               max_redirect=int(args["--max-redirect"]),
                               max_tries=int(args["--max-tries"]),
                               max_tasks=int(args["--max-tasks"]),
                               max_pool=int(args["--max-pool"])
                               )

    # "And this is where the magic happens."
    try:
        loop.run_until_complete(crawler.crawl())
    except KeyboardInterrupt:
        sys.stderr.flush()
        print('\nInterrupted\n')
    finally:
        reporting.report(crawler)
        crawler.close()
        loop.close()
예제 #19
0
    def setUp(self):
        if sys.platform == "win32":
            from asyncio.windows_events import ProactorEventLoop
            loop = ProactorEventLoop()
            asyncio.set_event_loop(loop)
        else:
            loop = asyncio.SelectorEventLoop()
            asyncio.set_event_loop(loop)

        self.loop = asyncio.get_event_loop()
        self.loop.set_debug(enabled=True)

        widgets = uistuff.GladeWidgets("PyChess.glade")
        gamewidget.setWidgets(widgets)
        perspective_manager.set_widgets(widgets)

        self.welcome_persp = Welcome()
        perspective_manager.add_perspective(self.welcome_persp)

        self.games_persp = Games()
        perspective_manager.add_perspective(self.games_persp)
예제 #20
0
def main():
    asyncio.set_event_loop(None)
    if args.iocp:
        from asyncio.windows_events import ProactorEventLoop
        loop = ProactorEventLoop()
    else:
        loop = asyncio.new_event_loop()
    sslctx = None
    if args.tls:
        sslctx = test_utils.dummy_ssl_context()
    cache = CacheClient(args.host, args.port, sslctx=sslctx, loop=loop)
    try:
        loop.run_until_complete(
            asyncio.gather(
                *[testing(i, cache, loop) for i in range(args.ntasks)],
                loop=loop))
    finally:
        loop.close()
예제 #21
0
def main():
    """Main program.

    Parse arguments, set up event loop, run crawler, print report.
    """
    args = ARGS.parse_args()
    if not args.roots:
        print('Use --help for command line help')
        return

    log = Logger(args.level)

    if args.iocp:
        from asyncio.windows_events import ProactorEventLoop
        loop = ProactorEventLoop()
        asyncio.set_event_loop(loop)
    elif args.select:
        loop = asyncio.SelectorEventLoop()
        asyncio.set_event_loop(loop)
    else:
        loop = asyncio.get_event_loop()

    roots = {fix_url(root) for root in args.roots}

    crawler = Crawler(
        log,
        roots,
        exclude=args.exclude,
        strict=args.strict,
        max_redirect=args.max_redirect,
        max_tries=args.max_tries,
        max_tasks=args.max_tasks,
        max_pool=args.max_pool,
    )
    try:
        loop.run_until_complete(crawler.crawl())  # Crawler gonna crawl.
    except KeyboardInterrupt:
        sys.stderr.flush()
        print('\nInterrupted\n')
    finally:
        crawler.report()
        crawler.close()
        loop.close()
예제 #22
0
def main():
    """Main program.

    Parse arguments, set up event loop, run crawler, print report.
    """
    args = ARGS.parse_args()
    if not args.roots:
        print('Use --help for command line help')
        return

    levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG]
    logging.basicConfig(level=levels[min(args.level, len(levels)-1)])

    if args.iocp:
        from asyncio.windows_events import ProactorEventLoop
        loop = ProactorEventLoop()
        asyncio.set_event_loop(loop)
    elif args.select:
        loop = asyncio.SelectorEventLoop()
        asyncio.set_event_loop(loop)
    else:
        loop = asyncio.get_event_loop()

    roots = {fix_url(root) for root in args.roots}

    crawler = crawling.Crawler(roots,
                               exclude=args.exclude,
                               strict=args.strict,
                               max_redirect=args.max_redirect,
                               max_tries=args.max_tries,
                               max_tasks=args.max_tasks,
                               )
    try:
        loop.run_until_complete(crawler.crawl())  # Crawler gonna crawl.
    except KeyboardInterrupt:
        sys.stderr.flush()
        print('\nInterrupted\n')
    finally:
        reporting.report(crawler)
        crawler.close()
        loop.close()
예제 #23
0
        # get and print lines from stdout, stderr
        timeout = None
        while registered:
            done, pending = yield from asyncio.wait(
                registered, timeout=timeout,
                return_when=asyncio.FIRST_COMPLETED)
            if not done:
                break
            for f in done:
                stream = registered.pop(f)
                res = f.result()
                print(name[stream], res.decode('ascii').rstrip())
                if res != b'':
                    registered[asyncio.Task(stream.readline())] = stream
            timeout = 0.0

    stdout_transport.close()
    stderr_transport.close()

if __name__ == '__main__':
    if sys.platform == 'win32':
        loop = ProactorEventLoop()
        asyncio.set_event_loop(loop)
    else:
        loop = asyncio.get_event_loop()
    try:
        loop.run_until_complete(main(loop))
    finally:
        loop.close()
예제 #24
0
파일: main.py 프로젝트: JFluo2011/crawler
def run():
    setup_log(logging.INFO,
              os.path.join(os.path.abspath('.'), 'logs', 'look_ua.log'))
    source_urls = [
        ('https://www.look.com.ua/love/page/{}/', 42),
        # ('https://www.look.com.ua/spring/page/{}/', 94),
        # ('https://www.look.com.ua/autumn/page/{}/', 99),
        # ('https://www.look.com.ua/hi-tech/page/{}/', 114),
        # ('https://www.look.com.ua/summer/page/{}/', 119),
        # ('https://www.look.com.ua/newyear/page/{}/', 156),
        # ('https://www.look.com.ua/men/page/{}/', 157),
        # ('https://www.look.com.ua/holidays/page/{}/', 159),
        # ('https://www.look.com.ua/creative/page/{}/', 168),
        # ('https://www.look.com.ua/winter/page/{}/', 172),
        # ('https://www.look.com.ua/situation/page/{}/', 172),
        # ('https://www.look.com.ua/music/page/{}/', 184),
        # ('https://www.look.com.ua/food/page/{}/', 211),
        # ('https://www.look.com.ua/weapon/page/{}/', 217),
        # ('https://www.look.com.ua/aviation/page/{}/', 261),
        # ('https://www.look.com.ua/textures/page/{}/', 267),
        # ('https://www.look.com.ua/minimalism/page/{}/', 278),
        # ('https://www.look.com.ua/movies/page/{}/', 280),
        # ('https://www.look.com.ua/3d/page/{}/', 286),
        # ('https://www.look.com.ua/abstraction/page/{}/', 293),
        # ('https://www.look.com.ua/space/page/{}/', 302),
        # ('https://www.look.com.ua/sport/page/{}/', 307),
        # ('https://www.look.com.ua/mood/page/{}/', 422),
        # ('https://www.look.com.ua/flowers/page/{}/', 595),
        # ('https://www.look.com.ua/macro/page/{}/', 636),
        # ('https://www.look.com.ua/travel/page/{}/', 674),
        # ('https://www.look.com.ua/fantasy/page/{}/', 687),
        # ('https://www.look.com.ua/anime/page/{}/', 694),
        # ('https://www.look.com.ua/games/page/{}/', 720),
        # ('https://www.look.com.ua/other/page/{}/', 778),
        # ('https://www.look.com.ua/animals/page/{}/', 1103),
        # ('https://www.look.com.ua/landscape/page/{}/', 1140),
        # ('https://www.look.com.ua/nature/page/{}/', 1142),
        # ('https://www.look.com.ua/auto/page/{}/', 1559),
        # ('https://www.look.com.ua/girls/page/{}/', 9266),
    ]
    if sys.platform == 'win32':
        loop = ProactorEventLoop()
    else:
        loop = asyncio.get_event_loop()
    asyncio.set_event_loop(loop)
    redis_key = 'look_ua'
    crawler = Crawler(redis_key, max_tasks=1000, store_path='D:\\download\\')
    for info in source_urls:
        for i in range(1, info[1] + 1):
            json_data = {
                'url': info[0].format(i),
                'type_': 'text',
                'operate_func': 'parse_detail_task',
            }
            crawler.insert_task(json.dumps(json_data))

    try:
        loop.run_until_complete(crawler.crawl())  # Crawler gonna crawl.
    except KeyboardInterrupt:
        sys.stderr.flush()
        logging.warning('\nInterrupted\n')
    finally:
        loop.run_until_complete(crawler.close())
        loop.stop()
        loop.run_forever()

        loop.close()
예제 #25
0
def main():
    """Main program.

    Parse arguments, set up event loop, run crawler, print report.
    """
    args = ARGS.parse_args()
    if not args.roots:
        print('Use --help for command line help')
        return

    global config
    global headers

    config = configparser.ConfigParser()
    config.read('client_app.ini')

    headers = {"User-Agent": config['client']['user-agent']}

    # @todo: figure out what to do with these. Currently just for creating the auth URL
    scopes = [
        'publicData', 'characterContactsRead', 'characterFittingsRead',
        'characterLocationRead'
    ]

    if args.auth:
        id = bytes("{}:{}".format(config['client']['Key'],
                                  config['client']['secret']),
                   encoding="utf-8")
        headers.update({
            "Authorization": b"Basic " + base64.b64encode(id),
            "Content-Type": "application/x-www-form-urlencoded"
        })

        if config['client'].get('refresh', None) and not args.invalid:
            print("Using Refresh token to login")
            # do requests here to get auth/refresh code and stick them in config (save maybe?)
            r = requests.post(
                'https://login.eveonline.com/oauth/token',
                data="grant_type=refresh_token&refresh_token={}".format(
                    config['client']['refresh']),
                headers=headers).json()
            headers.update(
                {"Authorization": "Bearer {}".format(r['access_token'])})
        else:

            def handleLogin(httpd, parts):
                # do requests here to get auth/refresh code and stick them in config (save maybe?)
                r = requests.post(
                    'https://login.eveonline.com/oauth/token',
                    data="grant_type=authorization_code&code={}".format(
                        parts['code'][0]),
                    headers=headers).json()

                config["client"]["refresh"] = r['refresh_token']
                with open('client_app.ini', 'w') as configfile:
                    config.write(configfile)

                headers.update(
                    {"Authorization": "Bearer {}".format(r['access_token'])})
                httpd.stop()

            httpd = StoppableHTTPServer(('', 6789), AuthHandler)
            url = "https://login.eveonline.com/oauth/authorize/?response_type=code&scope={}&redirect_uri=http://localhost:6789/&client_id={}".format(
                "+".join(scopes), config['client']['key'])
            print("Please go here to authenticate: \n {}".format(url))
            httpd.serve(handleLogin)

    levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG]
    logging.basicConfig(level=levels[min(args.level, len(levels) - 1)])

    if args.iocp:
        from asyncio.windows_events import ProactorEventLoop
        loop = ProactorEventLoop()
        asyncio.set_event_loop(loop)
    elif args.select:
        loop = asyncio.SelectorEventLoop()
        asyncio.set_event_loop(loop)
    else:
        loop = asyncio.get_event_loop()

    roots = {fix_url(root) for root in args.roots}

    crawler = crawling.Crawler(
        roots,
        exclude=args.exclude,
        strict=args.strict,
        max_redirect=args.max_redirect,
        max_tries=args.max_tries,
        max_tasks=args.max_tasks,
        headers=headers,
        follow_pages=args.follow_pages,
    )
    try:
        loop.run_until_complete(crawler.crawl())  # Crawler gonna crawl.
    except KeyboardInterrupt:
        sys.stderr.flush()
        print('\nInterrupted\n')
    finally:
        reporting.report(crawler)
        crawler.close()

        # next two lines are required for actual aiohttp resource cleanup
        loop.stop()
        loop.run_forever()

        loop.close()
예제 #26
0
def main():
    """Main program.

    Parse arguments, set up event loop, run crawler, print report.
    """
    args = ARGS.parse_args()
    if not args.roots:
        print('Use --help for command line help')
        return

    global config
    global headers

    config = configparser.ConfigParser()
    config.read('client_app.ini')

    headers = {
        "User-Agent": config['client']['user-agent']
    }

    # @todo: figure out what to do with these. Currently just for creating the auth URL
    scopes = [
        'publicData',
        'characterContactsRead',
        'characterFittingsRead',
        'characterLocationRead'
    ]

    if args.auth:
        id = bytes("{}:{}".format(config['client']['Key'], config['client']['secret']), encoding="utf-8")
        headers.update({
            "Authorization": b"Basic " + base64.b64encode(id),
            "Content-Type": "application/x-www-form-urlencoded"
        })

        if config['client'].get('refresh', None) and not args.invalid:
            print("Using Refresh token to login")
            # do requests here to get auth/refresh code and stick them in config (save maybe?)
            r = requests.post('https://login.eveonline.com/oauth/token',
                              data="grant_type=refresh_token&refresh_token={}".format(config['client']['refresh']),
                              headers=headers).json()
            headers.update({"Authorization": "Bearer {}".format(r['access_token'])})
        else:
            def handleLogin(httpd, parts):
                # do requests here to get auth/refresh code and stick them in config (save maybe?)
                r = requests.post('https://login.eveonline.com/oauth/token',
                                  data="grant_type=authorization_code&code={}".format(parts['code'][0]),
                                  headers=headers).json()

                config["client"]["refresh"] = r['refresh_token']
                with open('client_app.ini', 'w') as configfile:
                    config.write(configfile)

                headers.update({"Authorization": "Bearer {}".format(r['access_token'])})
                httpd.stop()

            httpd = StoppableHTTPServer(('', 6789), AuthHandler)
            url = "https://login.eveonline.com/oauth/authorize/?response_type=code&scope={}&redirect_uri=http://localhost:6789/&client_id={}".format("+".join(scopes), config['client']['key'])
            print("Please go here to authenticate: \n {}".format(url))
            httpd.serve(handleLogin)

    levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG]
    logging.basicConfig(level=levels[min(args.level, len(levels)-1)])

    if args.iocp:
        from asyncio.windows_events import ProactorEventLoop
        loop = ProactorEventLoop()
        asyncio.set_event_loop(loop)
    elif args.select:
        loop = asyncio.SelectorEventLoop()
        asyncio.set_event_loop(loop)
    else:
        loop = asyncio.get_event_loop()

    roots = {fix_url(root) for root in args.roots}

    crawler = crawling.Crawler(roots,
                               exclude=args.exclude,
                               strict=args.strict,
                               max_redirect=args.max_redirect,
                               max_tries=args.max_tries,
                               max_tasks=args.max_tasks,
                               headers=headers,
                               follow_pages=args.follow_pages,
                               )
    try:
        loop.run_until_complete(crawler.crawl())  # Crawler gonna crawl.
    except KeyboardInterrupt:
        sys.stderr.flush()
        print('\nInterrupted\n')
    finally:
        reporting.report(crawler)
        crawler.close()

        # next two lines are required for actual aiohttp resource cleanup
        loop.stop()
        loop.run_forever()

        loop.close()
예제 #27
0
파일: crawl.py 프로젝트: chtcvl/crawler
def main():
    """Main program.

    Parse arguments, set up event loop, run crawler, print report.
    """
    args = ARGS.parse_args()
    # if not args.roots:
    #     print('Use --help for command line help')
    #     return

    levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG]
    logging.basicConfig(level=levels[min(args.level, len(levels) - 1)])

    if args.iocp:
        from asyncio.windows_events import ProactorEventLoop
        loop = ProactorEventLoop()
        asyncio.set_event_loop(loop)
    elif args.select:
        loop = asyncio.SelectorEventLoop()
        asyncio.set_event_loop(loop)
    else:
        loop = asyncio.get_event_loop()

    roots = {fix_url(root) for root in args.roots}
    roots = {'https://sauna.ru/'}
    crawler = crawling.Crawler(
        roots,
        exclude='\/(review|news|addFavorite|panorama|comment)',
        strict=args.strict,
        max_redirect=args.max_redirect,
        max_tries=args.max_tries,
        max_tasks=args.max_tasks,
        scrape_nonhtml=False,
    )
    try:
        loop.run_until_complete(crawler.dbconnect())
        loop.run_until_complete(crawler.crawl())  # Crawler gonna crawl.
    except KeyboardInterrupt:
        sys.stderr.flush()
        print('\nInterrupted\n')
    finally:
        reporting.report(crawler)
        crawler.close()

        # next two lines are required for actual aiohttp resource cleanup
        loop.stop()
        loop.run_forever()

        loop.close()
예제 #28
0
        timeout = None
        while registered:
            done, pending = yield from asyncio.wait(
                registered,
                timeout=timeout,
                return_when=asyncio.FIRST_COMPLETED)
            if not done:
                break
            for f in done:
                stream = registered.pop(f)
                res = f.result()
                print(name[stream], res.decode('ascii').rstrip())
                if res != b'':
                    registered[asyncio.Task(stream.readline())] = stream
            timeout = 0.0

    stdout_transport.close()
    stderr_transport.close()


if __name__ == '__main__':
    if sys.platform == 'win32':
        loop = ProactorEventLoop()
        asyncio.set_event_loop(loop)
    else:
        loop = asyncio.get_event_loop()
    try:
        loop.run_until_complete(main(loop))
    finally:
        loop.close()
예제 #29
0
def main():
    setup_log(logging.INFO,
              os.path.join(os.path.abspath('.'), 'logs', 'look_ua.log'))
    source_urls = [
        # ('https://www.look.com.ua/love/page/{}/', 42),
        # ('https://www.look.com.ua/spring/page/{}/', 94),
        # ('https://www.look.com.ua/autumn/page/{}/', 99),
        # ('https://www.look.com.ua/hi-tech/page/{}/', 114),

        # ('https://www.look.com.ua/summer/page/{}/', 119),
        # ('https://www.look.com.ua/newyear/page/{}/', 156),
        # ('https://www.look.com.ua/men/page/{}/', 157),
        # ('https://www.look.com.ua/holidays/page/{}/', 159),
        # ('https://www.look.com.ua/creative/page/{}/', 168),
        # ('https://www.look.com.ua/winter/page/{}/', 172),
        # ('https://www.look.com.ua/situation/page/{}/', 172),
        # ('https://www.look.com.ua/music/page/{}/', 184),
        # ('https://www.look.com.ua/food/page/{}/', 211),
        # ('https://www.look.com.ua/weapon/page/{}/', 217),
        # ('https://www.look.com.ua/aviation/page/{}/', 261),
        # ('https://www.look.com.ua/textures/page/{}/', 267),
        # ('https://www.look.com.ua/minimalism/page/{}/', 278),
        # ('https://www.look.com.ua/movies/page/{}/', 280),
        # ('https://www.look.com.ua/3d/page/{}/', 286),
        # ('https://www.look.com.ua/abstraction/page/{}/', 293),
        # ('https://www.look.com.ua/space/page/{}/', 302),
        # ('https://www.look.com.ua/sport/page/{}/', 307),
        # ('https://www.look.com.ua/mood/page/{}/', 422),
        # ('https://www.look.com.ua/flowers/page/{}/', 595),
        # ('https://www.look.com.ua/macro/page/{}/', 636),
        # ('https://www.look.com.ua/travel/page/{}/', 674),
        # ('https://www.look.com.ua/fantasy/page/{}/', 687),
        # ('https://www.look.com.ua/anime/page/{}/', 694),
        # ('https://www.look.com.ua/games/page/{}/', 720),

        # ('https://www.look.com.ua/other/page/{}/', 778),
        # ('https://www.look.com.ua/animals/page/{}/', 1103),
        # ('https://www.look.com.ua/landscape/page/{}/', 1140),
        # ('https://www.look.com.ua/nature/page/{}/', 1142),
        # ('https://www.look.com.ua/auto/page/{}/', 1559),
        # ('https://www.look.com.ua/girls/page/{}/', 9266),
    ]
    # loop = asyncio.get_event_loop()
    loop = ProactorEventLoop()
    asyncio.set_event_loop(loop)
    crawler = Crawler(max_tries=5, max_tasks=30)
    for info in source_urls:
        for i in range(1, info[1] + 1):
            json_data = {
                'url': info[0].format(i),
            }
            crawler.insert_task(crawler.start_page_key, json.dumps(json_data))

    try:
        loop.run_until_complete(crawler.crawl())  # Crawler gonna crawl.
    except KeyboardInterrupt:
        sys.stderr.flush()
        logging.warning('\nInterrupted\n')
    finally:
        loop.run_until_complete(crawler.close())

        # next two lines are required for actual aiohttp resource cleanup
        loop.stop()
        loop.run_forever()

        loop.close()
예제 #30
-1
    def test(self):
        """ Play PyChess-PyChess 1 min variant games """

        if sys.platform == "win32":
            from asyncio.windows_events import ProactorEventLoop
            loop = ProactorEventLoop()
            asyncio.set_event_loop(loop)
        else:
            loop = asyncio.SelectorEventLoop()
            asyncio.set_event_loop(loop)

        loop = asyncio.get_event_loop()
        loop.set_debug(enabled=True)

        for vari in PYCHESS_VARIANTS:
            variant = variants[vari]

            def coro():
                self.p0 = yield from discoverer.initEngine(self.engine, WHITE, False)
                self.p1 = yield from discoverer.initEngine(self.engine, BLACK, False)

            loop.run_until_complete(coro())

            def optionsCallback(engine):
                engine.setOptionVariant(variant)
                engine.setOptionStrength(1, False)
                engine.setOptionTime(60, 0, 0)

            self.p0.connect("readyForOptions", optionsCallback)
            self.p1.connect("readyForOptions", optionsCallback)

            def coro(variant):
                self.game = GameModel(TimeModel(60, 0), variant)
                self.game.setPlayers([self.p0, self.p1])

                def on_game_end(game, state, event):
                    event.set()

                event = asyncio.Event()
                self.game.connect("game_ended", on_game_end, event)

                self.p0.prestart()
                self.p1.prestart()

                if self.game.variant.need_initial_board:
                    for player in self.game.players:
                        player.setOptionInitialBoard(self.game)

                print(variant.name)
                self.game.start()

                yield from event.wait()

                pgn = StringIO()
                print(save(pgn, self.game))

                self.assertIsNone(self.p0.invalid_move)
                self.assertIsNone(self.p1.invalid_move)

            loop.run_until_complete(coro(variant))