Ejemplo n.º 1
0
def run_crawler(argv=None, settings=None):
    """Run the scrapy crawler bounded to registered spiders.

    This function is suitable for standalone scripts.

    Usage::

        # mimic 'scrapy crawl' command having these two spiders available
        SpiderManager.register(FooSpider)
        SpiderManager.register(BarSpider)

        run_crawler()

    """
    argv = argv or sys.argv
    settings = _build_settings(settings)

    # load spider manager from this module
    settings.overrides.update({
        'SPIDER_MANAGER_CLASS':
        '%s.%s' % (__name__, SpiderManager.__name__),
    })

    crawler = CrawlerProcess(settings)
    crawler.install()

    parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter())
    parser.add_option('-l',
                      '--list',
                      action='store_true',
                      help="List available spiders")

    cmd = CrawlCommand()
    settings.defaults.update(cmd.default_settings)
    cmd.settings = settings
    cmd.add_options(parser)

    parser.usage = "%s %s" % (argv[0], cmd.syntax())
    opts, args = parser.parse_args()
    if opts.list:
        settings.defaults.update(ListCommand.default_settings)
        listcmd = ListCommand()
        listcmd.set_crawler(crawler)
        listcmd.run(args, opts)
        sys.exit(listcmd.exitcode)
    else:
        cmdline._run_print_help(parser, cmd.process_options, args, opts)
        cmd.set_crawler(crawler)
        cmdline._run_print_help(parser, cmdline._run_command, cmd, args, opts)
        sys.exit(cmd.exitcode)
def run_crawler(argv=None, settings=None):
    """Run the scrapy crawler bounded to registered spiders.

    This function is suitable for standalone scripts.

    Usage::

        # mimic 'scrapy crawl' command having these two spiders available
        SpiderManager.register(FooSpider)
        SpiderManager.register(BarSpider)

        run_crawler()

    """
    argv = argv or sys.argv
    settings = _build_settings(settings)

    # load spider manager from this module
    settings.overrides.update({
        'SPIDER_MANAGER_CLASS': '%s.%s' % (__name__, SpiderManager.__name__),
    })

    crawler = CrawlerProcess(settings)
    crawler.install()

    parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter())
    parser.add_option('-l', '--list', action='store_true',
                      help="List available spiders")

    cmd = CrawlCommand()
    settings.defaults.update(cmd.default_settings)
    cmd.settings = settings
    cmd.add_options(parser)

    parser.usage = "%s %s" % (argv[0], cmd.syntax())
    opts, args = parser.parse_args()
    if opts.list:
        settings.defaults.update(ListCommand.default_settings)
        listcmd = ListCommand()
        listcmd.set_crawler(crawler)
        listcmd.run(args, opts)
        sys.exit(listcmd.exitcode)
    else:
        cmdline._run_print_help(parser, cmd.process_options, args, opts)
        cmd.set_crawler(crawler)
        cmdline._run_print_help(parser, cmdline._run_command, cmd, args, opts)
        sys.exit(cmd.exitcode)