Python Shell.start Examples, scrapy.shell.Shell.start Python Examples

Example #1

0

Show file

File: shell.py Project: weisbeck/403Section

 def run(self, args, opts):
     url = args[0] if args else None
     shell = Shell(self.crawler,
                   update_vars=self.update_vars,
                   code=opts.code)
     self._start_crawler_thread()
     shell.start(url=url)

Example #2

0

Show file

    def run(self, args, opts):
        url = args[0] if args else None
        spiders = self.crawler_process.spiders

        spidercls = DefaultSpider
        if opts.spider:
            spidercls = spiders.load(opts.spider)
        elif url:
            spidercls = spidercls_for_request(spiders,
                                              Request(url),
                                              spidercls,
                                              log_multiple=True)

        # The crawler is created this way since the Shell manually handles the
        # crawling engine, so the set up in the crawl method won't work
        crawler = self.crawler_process._create_logged_crawler(spidercls)
        # The Shell class needs a persistent engine in the crawler
        crawler.engine = crawler._create_engine()
        crawler.engine.start()

        self.crawler_process.start(start_reactor=False)
        self._start_crawler_thread()

        shell = Shell(crawler, update_vars=self.update_vars, code=opts.code)
        shell.start(url=url)

Example #3

0

Show file

File: shell.py Project: elacuesta/scrapy

    def run(self, args, opts):
        url = args[0] if args else None
        if url:
            # first argument may be a local file
            url = guess_scheme(url)

        spider_loader = self.crawler_process.spider_loader

        spidercls = DefaultSpider
        if opts.spider:
            spidercls = spider_loader.load(opts.spider)
        elif url:
            spidercls = spidercls_for_request(spider_loader, Request(url),
                                              spidercls, log_multiple=True)

        # The crawler is created this way since the Shell manually handles the
        # crawling engine, so the set up in the crawl method won't work
        crawler = self.crawler_process._create_crawler(spidercls)
        # The Shell class needs a persistent engine in the crawler
        crawler.engine = crawler._create_engine()
        crawler.engine.start()

        self._start_crawler_thread()

        shell = Shell(crawler, update_vars=self.update_vars, code=opts.code)
        shell.start(url=url, redirect=not opts.no_redirect)

Example #4

0

Show file

    def run(self, args, opts):
        url = args[0] if args else None
        if url:
            # first argument may be a local file
            url = guess_scheme(url)

        spider_loader = self.crawler_process.spider_loader

        spidercls = DefaultSpider
        if opts.spider:
            spidercls = spider_loader.load(opts.spider)
        elif url:
            spidercls = spidercls_for_request(spider_loader,
                                              Request(url),
                                              spidercls,
                                              log_multiple=True)

        # The crawler is created this way since the Shell manually handles the
        # crawling engine, so the set up in the crawl method won't work
        crawler = self.crawler_process._create_crawler(spidercls)
        # The Shell class needs a persistent engine in the crawler
        crawler.engine = crawler._create_engine()
        crawler.engine.start()

        self._start_crawler_thread()

        shell = Shell(crawler, update_vars=self.update_vars, code=opts.code)
        shell.start(url=url, redirect=not opts.no_redirect)

Example #5

0

Show file

File: test_shell.py Project: Blender3D/scrapy

    def test_inspect_response_text(self):
        response = TextResponse(url='http://example.com/', body='''
            {"hello": "world"}
        ''')
        shell = Shell(self.crawler, code='None')
        shell.start(response=response, spider=self.spider)

        self.assertNotIn('sel', shell.vars)

Example #6

0

Show file

File: shell.py Project: 00gpowe/scrapy

 def run(self, args, opts):
     url = args[0] if args else None
     spider = None
     if opts.spider:
         spider = self.crawler.spiders.create(opts.spider)
     shell = Shell(self.crawler, update_vars=self.update_vars, code=opts.code)
     self._start_crawler_thread()
     shell.start(url=url, spider=spider)

Example #7

0

Show file

File: test_shell.py Project: Blender3D/scrapy

    def test_inspect_response_xml(self):
        response = XmlResponse(url='http://example.com/', body='''
            <?xml version="1.0" encoding="UTF-8"?>
            <foo>Testing</foo>
        ''')
        shell = Shell(self.crawler, code='None')
        shell.start(response=response, spider=self.spider)

        self.assertIn('sel', shell.vars)

Example #8

0

Show file

File: shell.py Project: ymzy/scrapy

 def run(self, args, opts):
     url = args[0] if args else None
     spider = None
     if opts.spider:
         spider = self.crawler.spiders.create(opts.spider)
     shell = Shell(self.crawler,
                   update_vars=self.update_vars,
                   code=opts.code)
     self._start_crawler_thread()
     shell.start(url=url, spider=spider)

Example #9

0

Show file

File: test_shell.py Project: Blender3D/scrapy

    def test_inspect_response_binary(self):
        response = Response(url='http://example.com/', body='''
            '{\xcc\xe8\x92\xe6\xb8\xa21\xb2\xe5O6\xc9\x84\xba8
            \xa3\x877\xa8v\xee9p.UJ\xa1m\x8a"H\xb3\xcc\x08\xff
            \x87d\x00i\xce\xb7a\xff\x8c\xd8NX\xae\xc2'
        ''')
        shell = Shell(self.crawler, code='None')
        shell.start(response=response, spider=self.spider)

        self.assertNotIn('sel', shell.vars)

Example #10

0

Show file

File: shell.py Project: xtmhm2000/scrapy-0.22

    def run(self, args, opts):
        crawler = self.crawler_process.create_crawler()

        url = args[0] if args else None
        spider = crawler.spiders.create(opts.spider) if opts.spider else None

        self.crawler_process.start_crawling()
        self._start_crawler_thread()

        shell = Shell(crawler, update_vars=self.update_vars, code=opts.code)
        shell.start(url=url, spider=spider)

Example #11

0

Show file

File: shell.py Project: 0xfab/scrapy

    def run(self, args, opts):
        crawler = self.crawler_process.create_crawler()

        url = args[0] if args else None
        spider = crawler.spiders.create(opts.spider) if opts.spider else None

        self.crawler_process.start_crawling()
        self._start_crawler_thread()

        shell = Shell(crawler, update_vars=self.update_vars, code=opts.code)
        shell.start(url=url, spider=spider)

Example #12

0

Show file

File: test_shell.py Project: Blender3D/scrapy

    def test_inspect_response_html(self):
        response = HtmlResponse(url='http://example.com/', body='''
            <!doctype html>
            <html>
                <p>Testing</p>
            </html>
        ''')
        shell = Shell(self.crawler, code='None')
        shell.start(response=response, spider=self.spider)

        self.assertIn('sel', shell.vars)

Example #13

0

Show file

File: __init__.py Project: stevearc/ozzy

def shell(argv):
    """ Open a url in the scrapy shell """
    parser = argparse.ArgumentParser('ozzy shell',
                                     description=shell.__doc__)
    parser.add_argument('url', help="URL to open in a shell")
    args = parser.parse_args(argv)

    crawler_process = CrawlerProcess(load_settings())
    crawler = crawler_process.create_crawler()
    crawler_process.start_crawling()
    thread = Thread(target=crawler_process.start_reactor)
    thread.daemon = True
    thread.start()
    sh = Shell(crawler)
    sh.start(url=args.url)

Example #14

0

Show file

 def run(self, args, opts):
     url = args[0] if args else None
     shell = Shell(self.crawler, update_vars=self.update_vars, inthread=True, \
         code=opts.code)
     def err(f):
         log.err(f, "Shell error")
         self.exitcode = 1
     d = shell.start(url=url)
     d.addErrback(err)
     d.addBoth(lambda _: self.crawler.stop())
     self.crawler.start()

Example #15

0

Show file

File: shell.py Project: Bia-lx/scrapy

    def run(self, args, opts):
        url = args[0] if args else None
        spiders = self.crawler_process.spiders

        spidercls = DefaultSpider
        if opts.spider:
            spidercls = spiders.load(opts.spider)
        elif url:
            spidercls = spidercls_for_request(spiders, Request(url),
                                              spidercls, log_multiple=True)

        # The crawler is created this way since the Shell manually handles the
        # crawling engine, so the set up in the crawl method won't work
        crawler = self.crawler_process._create_logged_crawler(spidercls)
        # The Shell class needs a persistent engine in the crawler
        crawler.engine = crawler._create_engine()
        crawler.engine.start()

        self.crawler_process.start(start_reactor=False)
        self._start_crawler_thread()

        shell = Shell(crawler, update_vars=self.update_vars, code=opts.code)
        shell.start(url=url)

Example #16

0

Show file

File: shell.py Project: joehillen/scrapy

 def run(self, args, opts):
     url = args[0] if args else None
     shell = Shell(self.crawler, update_vars=self.update_vars, code=opts.code)
     self._start_crawler_thread()
     shell.start(url=url)

Example #17

0

Show file

File: shell.py Project: kenzouyeh/scrapy

 def run(self, args, opts):
     url = args[0] if args else None
     shell = Shell(self.update_vars)
     shell.start(url)