def run(self, args, opts): if not len(args) == 1 or not is_url(args[0]): return False responses = [] # to collect downloaded responses request = Request(args[0], callback=responses.append) if opts.spider: try: spider = spiders.create(opts.spider) except KeyError: log.msg("Unable to find spider: %s" % opts.spider, log.ERROR) return else: spider = spiders.create_for_request(request) scrapymanager.configure() scrapymanager.queue.append_request(request, spider) scrapymanager.start() if not responses: log.msg("No response returned", log.ERROR, spider=spider) return # now process response # - if callbacks defined then call each one print results # - if --rules option given search for matching spider's rule # - default print result using default 'parse' spider's callback response = responses[0] if self.callbacks: # apply each callback for callback in self.callbacks: items, links = self.run_callback(spider, response, callback, args, opts) self.print_results(items, links, callback, opts) elif opts.rules: # search for matching spider's rule if hasattr(spider, "rules") and spider.rules: items, links = [], [] for rule in spider.rules: if rule.link_extractor.matches(response.url) and rule.callback: items, links = self.run_callback(spider, response, rule.callback, args, opts) self.print_results(items, links, rule.callback, opts) # first-match rule breaks rules loop break else: log.msg( 'No rules found for spider "%s", ' "please specify a callback for parsing" % spider.name, log.ERROR ) else: # default callback 'parse' items, links = self.run_callback(spider, response, "parse", args, opts) self.print_results(items, links, "parse", opts)
def fetch(self, request_or_url, print_help=False): if isinstance(request_or_url, Request): request = request_or_url url = request.url else: url = parse_url(request_or_url) request = Request(url) spider = spiders.create_for_request(request, BaseSpider('default'), \ log_multiple=True) print "Fetching %s..." % request scrapymanager.engine.open_spider(spider) response = threads.blockingCallFromThread(reactor, scrapymanager.engine.schedule, \ request, spider) if response: self.populate_vars(url, response, request, spider) if print_help: self.print_help() else: print "Done - use shelp() to see available objects"