Ejemplo n.º 1
0
 def _schedule(self, request, spider):
     if spider is None:
         spider = create_spider_for_request(self.crawler.spiders, request, \
             BaseSpider('default'), log_multiple=True)
     spider.set_crawler(self.crawler)
     self.crawler.engine.open_spider(spider)
     d = self.crawler.engine.schedule(request, spider)
     d.addCallback(lambda x: (x, spider))
     return d
Ejemplo n.º 2
0
 def _schedule(self, request, spider):
     if spider is None:
         spider = create_spider_for_request(self.crawler.spiders, request, \
             BaseSpider('default'), log_multiple=True)
     spider.set_crawler(self.crawler)
     self.crawler.engine.open_spider(spider)
     d = self.crawler.engine.schedule(request, spider)
     d.addCallback(lambda x: (x, spider))
     return d
Ejemplo n.º 3
0
 def _open_spider(self, request, spider):
     if self.spider:
         return self.spider
     if spider is None:
         spider = create_spider_for_request(self.crawler.spiders, request, \
             BaseSpider('default'), log_multiple=True)
     spider.set_crawler(self.crawler)
     self.crawler.engine.open_spider(spider, close_if_idle=False)
     self.spider = spider
     return spider
Ejemplo n.º 4
0
 def _open_spider(self, request, spider):
     if self.spider:
         return self.spider
     if spider is None:
         spider = create_spider_for_request(self.crawler.spiders, request, \
             BaseSpider('default'), log_multiple=True)
     spider.set_crawler(self.crawler)
     self.crawler.engine.open_spider(spider, close_if_idle=False)
     self.spider = spider
     return spider
Ejemplo n.º 5
0
 def set_spider(self, url, opts):
     if opts.spider:
         try:
             self.spider = self.crawler.spiders.create(opts.spider)
         except KeyError:
             log.msg('Unable to find spider: %s' % opts.spider, log.ERROR)
     else:
         self.spider = create_spider_for_request(self.crawler.spiders, url)
         if not self.spider:
             log.msg('Unable to find spider for: %s' % request, log.ERROR)
Ejemplo n.º 6
0
 def set_spider(self, url, opts):
     if opts.spider:
         try:
             self.spider = self.crawler.spiders.create(opts.spider)
         except KeyError:
             log.msg('Unable to find spider: %s' % opts.spider, log.ERROR)
     else:
         self.spider = create_spider_for_request(self.crawler.spiders, url)
         if not self.spider:
             log.msg('Unable to find spider for: %s' % request, log.ERROR)
Ejemplo n.º 7
0
 def _schedule(self, request, spider):
     if spider is None:
         spider = create_spider_for_request(self.crawler.spiders, request, \
             BaseSpider('default'), log_multiple=True)
     spider.set_crawler(self.crawler)
     self.crawler.engine.open_spider(spider, close_if_idle=False)
     d = request_deferred(request)
     d.addCallback(lambda x: (x, spider))
     self.crawler.engine.crawl(request, spider)
     return d
Ejemplo n.º 8
0
 def get_spider(self, request, opts):
     if opts.spider:
         try:
             return self.crawler.spiders.create(opts.spider)
         except KeyError:
             log.msg('Unable to find spider: %s' % opts.spider, log.ERROR)
     else:
         spider = create_spider_for_request(self.crawler.spiders, request)
         if spider:
             return spider
         log.msg('Unable to find spider for: %s' % request, log.ERROR)
Ejemplo n.º 9
0
 def get_spider(self, request, opts):
     if opts.spider:
         try:
             return self.crawler.spiders.create(opts.spider)
         except KeyError:
             log.msg('Unable to find spider: %s' % opts.spider, log.ERROR)
     else:
         spider = create_spider_for_request(self.crawler.spiders, request)
         if spider:
             return spider
         log.msg('Unable to find spider for: %s' % request, log.ERROR)
Ejemplo n.º 10
0
 def append_url(self, url=None, spider=None, **kwargs):
     """Append a URL to crawl with the given spider. If the spider is not
     given, a spider will be looked up based on the URL
     """
     if url is None:
         raise ValueError("A url is required")
     if spider is None:
         spider = create_spider_for_request(self._spiders, Request(url), \
             **kwargs)
     if spider:
         requests = arg_to_iter(spider.make_requests_from_url(url))
         self.spider_requests.append((spider, requests))
Ejemplo n.º 11
0
 def set_spider(self, url, opts):
     if opts.spider:
         try:
             self.spider = self.crawler.spiders.create(opts.spider)
         except KeyError:
             log.msg(format='Unable to find spider: %(spider)s',
                     level=log.ERROR, spider=opts.spider)
     else:
         self.spider = create_spider_for_request(self.crawler.spiders, Request(url))
         if not self.spider:
             log.msg(format='Unable to find spider for: %(url)s',
                     level=log.ERROR, url=url)
Ejemplo n.º 12
0
    def run(self, args, opts):
        if len(args) != 1 or not is_url(args[0]):
            raise UsageError()
        cb = lambda x: self._print_response(x, opts)
        request = Request(args[0], callback=cb, dont_filter=True)
        request.meta['handle_httpstatus_all'] = True

        spider = None
        if opts.spider:
            spider = self.crawler.spiders.create(opts.spider)
        else:
            spider = create_spider_for_request(self.crawler.spiders, request, \
                default_spider=BaseSpider('default'))
        self.crawler.crawl(spider, [request])
        self.crawler.start()
Ejemplo n.º 13
0
    def run(self, args, opts):
        if len(args) != 1 or not is_url(args[0]):
            raise UsageError()
        cb = lambda x: self._print_response(x, opts)
        request = Request(args[0], callback=cb, dont_filter=True)
        request.meta['handle_httpstatus_all'] = True

        spider = None
        if opts.spider:
            spider = self.crawler.spiders.create(opts.spider)
        else:
            spider = create_spider_for_request(self.crawler.spiders, request, \
                default_spider=BaseSpider('default'))
        self.crawler.crawl(spider, [request])
        self.crawler.start()
Ejemplo n.º 14
0
 def append_request(self, request, spider=None, **kwargs):
     if spider is None:
         spider = create_spider_for_request(self._spiders, request, **kwargs)
     if spider:
         self.spider_requests.append((spider, [request]))