def execute(self, args, opts): task = None if opts.task_id: task = Task().load(id=opts.task_id) if opts.task_name: task = Task().next(name=opts.task_name) if task or len(args): if task: domain = task.domain else: domain = args[0] spider = spiders.fromdomain(domain) scrapymanager.configure() if opts.child: def _stop(): pass # monkeypatching stop command to prevent stoping prematurely in child mode scrapymanager.stop = _stop if not task.locked: task.lock() self.crawl(spider, task) scrapyengine.start() else: log.msg('You must specify atleast 1 domain', level=log.ERROR)
def _loop(self, args, opts): if settings.get('MEMDEBUG_WITH_GUPPY', False) and guppy: heapy = guppy.hpy() task = Task().next(locked=0, completed=0) if task: task.lock() cmd = ['python', os.path.join(os.getcwd(), 'scrapy-ctl.py'), 'run'] cmd.append('--task-id=%s'%task.id) if opts.child_logfile: cmd.append('--logfile=%s'%opts.child_logfile) cmd.append('--child') task.start = datetime.now() process = subprocess.Popen(cmd, shell=False, stderr=subprocess.PIPE, stdout=subprocess.PIPE, close_fds=True) task.result, task.errors = process.communicate() task.finish = datetime.now() task.completed = 1 task.save() timetext.LANG = 'en' total = task.finish - task.start log.msg('Finished: %s(%s) in %s'%(task.name, task.id, timetext.stringify(total)), level=log.INFO, domain=task.domain) if settings.get('MEMDEBUG_WITH_GUPPY', False) and guppy: log.msg(heapy.heap(), level=log.DEBUG) heapy.setref() else: time.sleep(30)