예제 #1
0
    def run(self, args, opts):
        url = args[0] if args else None
        if url:
            # first argument may be a local file
            url = guess_scheme(url)

        spider_loader = self.crawler_process.spider_loader

        spidercls = DefaultSpider
        if opts.spider:
            spidercls = spider_loader.load(opts.spider)
        elif url:
            spidercls = spidercls_for_request(spider_loader,
                                              Request(url),
                                              spidercls,
                                              log_multiple=True)

        # The crawler is created this way since the Shell manually handles the
        # crawling engine, so the set up in the crawl method won't work
        crawler = self.crawler_process._create_crawler(spidercls)
        # The Shell class needs a persistent engine in the crawler
        crawler.engine = crawler._create_engine()
        crawler.engine.start()

        self._start_crawler_thread()

        shell = Shell(crawler, update_vars=self.update_vars, code=opts.code)
        shell.start(url=url, redirect=not opts.no_redirect)
예제 #2
0
 def run(self, args, opts):
     url = args[0] if args else None
     shell = Shell(self.crawler,
                   update_vars=self.update_vars,
                   code=opts.code)
     self._start_crawler_thread()
     shell.start(url=url)
예제 #3
0
    def run(self, args, opts):
        url = args[0] if args else None
        spiders = self.crawler_process.spiders

        spidercls = DefaultSpider
        if opts.spider:
            spidercls = spiders.load(opts.spider)
        elif url:
            spidercls = spidercls_for_request(spiders,
                                              Request(url),
                                              spidercls,
                                              log_multiple=True)

        # The crawler is created this way since the Shell manually handles the
        # crawling engine, so the set up in the crawl method won't work
        crawler = self.crawler_process._create_logged_crawler(spidercls)
        # The Shell class needs a persistent engine in the crawler
        crawler.engine = crawler._create_engine()
        crawler.engine.start()

        self.crawler_process.start(start_reactor=False)
        self._start_crawler_thread()

        shell = Shell(crawler, update_vars=self.update_vars, code=opts.code)
        shell.start(url=url)
예제 #4
0
파일: shell.py 프로젝트: ymzy/scrapy
 def run(self, args, opts):
     url = args[0] if args else None
     spider = None
     if opts.spider:
         spider = self.crawler.spiders.create(opts.spider)
     shell = Shell(self.crawler,
                   update_vars=self.update_vars,
                   code=opts.code)
     self._start_crawler_thread()
     shell.start(url=url, spider=spider)
예제 #5
0
    def run(self, args, opts):
        crawler = self.crawler_process.create_crawler()

        url = args[0] if args else None
        spider = crawler.spiders.create(opts.spider) if opts.spider else None

        self.crawler_process.start_crawling()
        self._start_crawler_thread()

        shell = Shell(crawler, update_vars=self.update_vars, code=opts.code)
        shell.start(url=url, spider=spider)
예제 #6
0
 def run(self, args, opts):
     url = args[0] if args else None
     shell = Shell(self.crawler, update_vars=self.update_vars, inthread=True, \
         code=opts.code)
     def err(f):
         log.err(f, "Shell error")
         self.exitcode = 1
     d = shell.start(url=url)
     d.addErrback(err)
     d.addBoth(lambda _: self.crawler.stop())
     self.crawler.start()