Beispiel #1
0
def fetcher(ctx,
            xmlrpc,
            xmlrpc_host,
            xmlrpc_port,
            poolsize,
            proxy,
            user_agent,
            timeout,
            Fetcher=Fetcher):
    g = ctx.obj
    fetcher = Fetcher(inqueue=g.scheduler2fetcher,
                      outqueue=g.fetcher2processor,
                      poolsize=poolsize,
                      proxy=proxy)
    fetcher.phantomjs_proxy = g.phantomjs_proxy
    if user_agent:
        fetcher.user_agent = user_agent
    if timeout:
        fetcher.default_options = dict(fetcher.default_options)
        fetcher.default_options['timeout'] = timeout

    g.instances.append(fetcher)
    if g.get('testing_mode'):
        return fetcher

    if xmlrpc:
        run_in_thread(fetcher.xmlrpc_run, port=xmlrpc_port, bind=xmlrpc_host)
    fetcher.run()
Beispiel #2
0
def run_fetcher(g=g):
    from pyspider.fetcher.tornado_fetcher import Fetcher
    fetcher = Fetcher(inqueue=g.scheduler2fetcher,
                      outqueue=g.fetcher2processor)
    g.fetcher = fetcher
    run_in_thread(fetcher.xmlrpc_run)
    fetcher.run()
Beispiel #3
0
def run_fetcher(g=g):
    from pyspider.fetcher.tornado_fetcher import Fetcher
    fetcher = Fetcher(inqueue=g.scheduler2fetcher, outqueue=g.fetcher2processor)
    fetcher.phantomjs_proxy = g.phantomjs_proxy

    run_in_thread(fetcher.xmlrpc_run, port=g.fetcher_xmlrpc_port, bind=g.webui_host)
    fetcher.run()
Beispiel #4
0
def fetcher(ctx, xmlrpc, xmlrpc_host, xmlrpc_port):
    g = ctx.obj
    from pyspider.fetcher.tornado_fetcher import Fetcher
    fetcher = Fetcher(inqueue=g.scheduler2fetcher, outqueue=g.fetcher2processor)
    fetcher.phantomjs_proxy = g.phantomjs_proxy
    g.instances.append(fetcher)

    if xmlrpc:
        run_in_thread(fetcher.xmlrpc_run, port=xmlrpc_port, bind=xmlrpc_host)
    fetcher.run()
Beispiel #5
0
def run_fetcher(g=g):
    from pyspider.fetcher.tornado_fetcher import Fetcher
    fetcher = Fetcher(inqueue=g.scheduler2fetcher,
                      outqueue=g.fetcher2processor)
    fetcher.phantomjs_proxy = g.phantomjs_proxy

    run_in_thread(fetcher.xmlrpc_run,
                  port=g.fetcher_xmlrpc_port,
                  bind=g.webui_host)
    fetcher.run()
Beispiel #6
0
def fetcher(ctx, xmlrpc, xmlrpc_host, xmlrpc_port, poolsize, proxy, user_agent, timeout, Fetcher=Fetcher):
    g = ctx.obj
    fetcher = Fetcher(inqueue=g.scheduler2fetcher, outqueue=g.fetcher2processor,
                      poolsize=poolsize, proxy=proxy)
    fetcher.phantomjs_proxy = g.phantomjs_proxy
    if user_agent:
        fetcher.user_agent = user_agent
    if timeout:
        fetcher.default_options = dict(fetcher.default_options)
        fetcher.default_options['timeout'] = timeout

    g.instances.append(fetcher)
    if g.get('testing_mode'):
        return fetcher

    if xmlrpc:
        run_in_thread(fetcher.xmlrpc_run, port=xmlrpc_port, bind=xmlrpc_host)
    fetcher.run()
Beispiel #7
0
def run_fetcher(g=g):
    from pyspider.fetcher.tornado_fetcher import Fetcher
    fetcher = Fetcher(inqueue=g.scheduler2fetcher, outqueue=g.fetcher2processor)
    g.fetcher = fetcher
    run_in_thread(fetcher.xmlrpc_run)
    fetcher.run()