def run(self, args, opts): if not args: print "An URL is required" return for response in fetch(args): spider = spiders.fromurl(response.url) if not spider: log.msg('Cannot find spider for "%s"' % response.url) continue if self.callbacks: for callback in self.callbacks: items, links = self.run_callback(spider, response, callback, args, opts) self.print_results(items, links, callback, opts) elif opts.rules: rules = getattr(spider, "rules", None) if rules: items, links = [], [] for rule in rules: if rule.callback and rule.link_extractor.matches(response.url): items, links = self.run_callback(spider, response, rule.callback, args, opts) self.print_results(items, links, rule.callback, opts) break else: log.msg( 'No rules found for spider "%s", please specify a callback for parsing' % spider.domain_name ) continue else: items, links = self.run_callback(spider, response, "parse", args, opts) self.print_results(items, links, "parse", opts)
def run(self, args, opts): if len(args) != 1: print "One URL is required" return responses = fetch(args) if responses: if opts.headers: pprint.pprint(responses[0].headers) else: print responses[0].body