def run(self, args, opts): if opts.list: self._list_templates() return if opts.dump: template_file = self._find_template(opts.template) if template_file: template = open(template_file, 'r') print template.read() return if len(args) != 2: return False name = args[0] domain = args[1] module = sanitize_module_name(name) # if spider already exists and not force option then halt try: spider = spiders.create(name) except KeyError: pass else: if not opts.force: print "Spider '%s' already exists in module:" % name print " %s" % spider.__module__ sys.exit(1) template_file = self._find_template(opts.template) if template_file: self._genspider(module, name, domain, opts.template, template_file)
def run(self, args, opts): if not len(args) == 1 or not is_url(args[0]): return False responses = [] # to collect downloaded responses request = Request(args[0], callback=responses.append) if opts.spider: try: spider = spiders.create(opts.spider) except KeyError: log.msg("Unable to find spider: %s" % opts.spider, log.ERROR) return else: spider = spiders.create_for_request(request) scrapymanager.configure() scrapymanager.queue.append_request(request, spider) scrapymanager.start() if not responses: log.msg("No response returned", log.ERROR, spider=spider) return # now process response # - if callbacks defined then call each one print results # - if --rules option given search for matching spider's rule # - default print result using default 'parse' spider's callback response = responses[0] if self.callbacks: # apply each callback for callback in self.callbacks: items, links = self.run_callback(spider, response, callback, args, opts) self.print_results(items, links, callback, opts) elif opts.rules: # search for matching spider's rule if hasattr(spider, "rules") and spider.rules: items, links = [], [] for rule in spider.rules: if rule.link_extractor.matches(response.url) and rule.callback: items, links = self.run_callback(spider, response, rule.callback, args, opts) self.print_results(items, links, rule.callback, opts) # first-match rule breaks rules loop break else: log.msg( 'No rules found for spider "%s", ' "please specify a callback for parsing" % spider.name, log.ERROR ) else: # default callback 'parse' items, links = self.run_callback(spider, response, "parse", args, opts) self.print_results(items, links, "parse", opts)
def run(self, args, opts): q = ExecutionQueue() urls, names = self._split_urls_and_names(args) for name in names: q.append_spider_name(name) if opts.spider: try: spider = spiders.create(opts.spider) for url in urls: q.append_url(url, spider) except KeyError: log.msg('Unable to find spider: %s' % opts.spider, log.ERROR) else: for name, urls in self._group_urls_by_spider(urls): spider = spiders.create(name) for url in urls: q.append_url(url, spider) scrapymanager.queue = q scrapymanager.start()
def run(self, args, opts): if len(args) != 1 or not is_url(args[0]): return False cb = lambda x: self._print_response(x, opts) request = Request(args[0], callback=cb, dont_filter=True) spider = None if opts.spider: try: spider = spiders.create(opts.spider) except KeyError: log.msg("Could not find spider: %s" % opts.spider, log.ERROR) scrapymanager.configure() scrapymanager.queue.append_request(request, spider, \ default_spider=BaseSpider('default')) scrapymanager.start()