def execute(self, args, opts): task = None if opts.task_id: task = Task().load(id=opts.task_id) if opts.task_name: task = Task().next(name=opts.task_name) if task or len(args): if task: domain = task.domain else: domain = args[0] spider = spiders.fromdomain(domain) scrapymanager.configure() if opts.child: def _stop(): pass # monkeypatching stop command to prevent stoping prematurely in child mode scrapymanager.stop = _stop if not task.locked: task.lock() self.crawl(spider, task) scrapyengine.start() else: log.msg('You must specify atleast 1 domain', level=log.ERROR)
def run(self, args, opts): if opts.list: self._list_templates() return if opts.dump: template_file = self._find_template(opts.template) if template_file: template = open(template_file, 'r') print template.read() return if len(args) < 2: return False module = sanitize_module_name(args[0]) domain = args[1] spider = spiders.fromdomain(domain) if spider and not opts.force: print "Spider '%s' already exists in module:" % domain print " %s" % spider.__module__ sys.exit(1) template_file = self._find_template(opts.template) if template_file: self._genspider(module, domain, opts.template, template_file)
def _get_spider_requests(*args): """Collect requests and spiders from the given arguments. Returns a dict of spider -> list of requests """ spider_requests = defaultdict(list) for arg in args: if isinstance(arg, tuple): request, spider = arg spider_requests[spider] = request elif isinstance(arg, Request): spider = spiders.fromurl(arg.url) or BaseSpider('default') if spider: spider_requests[spider] += [arg] else: log.msg('Could not find spider for request: %s' % arg, log.ERROR) elif isinstance(arg, BaseSpider): spider_requests[arg] += arg.start_requests() elif is_url(arg): spider = spiders.fromurl(arg) or BaseSpider('default') if spider: for req in arg_to_iter(spider.make_requests_from_url(arg)): spider_requests[spider] += [req] else: log.msg('Could not find spider for url: %s' % arg, log.ERROR) elif isinstance(arg, basestring): spider = spiders.fromdomain(arg) if spider: spider_requests[spider] += spider.start_requests() else: log.msg('Could not find spider for domain: %s' % arg, log.ERROR) else: raise TypeError("Unsupported argument: %r" % arg) return spider_requests