コード例 #1
0
ファイル: run.py プロジェクト: 1060460048/djangoscraper
    def execute(self, args, opts):            
        
        task = None
        
        if opts.task_id:
            task = Task().load(id=opts.task_id)
        if opts.task_name:
            task = Task().next(name=opts.task_name)
            
        if task or len(args):
            
            if task:
                domain = task.domain
            else:
                domain = args[0]
            
            spider = spiders.fromdomain(domain)         
            scrapymanager.configure()
            if opts.child:
                def _stop():
                    pass
                # monkeypatching stop command to prevent stoping prematurely in child mode
                scrapymanager.stop = _stop
            if not task.locked:
                task.lock()
            self.crawl(spider, task)
            scrapyengine.start()

        else:
            log.msg('You must specify atleast 1 domain', level=log.ERROR)
コード例 #2
0
ファイル: run.py プロジェクト: 1060460048/djangoscraper
 def _loop(self, args, opts):
     if settings.get('MEMDEBUG_WITH_GUPPY', False) and guppy:
         heapy = guppy.hpy()
         
     task = Task().next(locked=0, completed=0)
     if task:
         task.lock()
         cmd = ['python', os.path.join(os.getcwd(), 'scrapy-ctl.py'), 'run']
         cmd.append('--task-id=%s'%task.id)
         if opts.child_logfile:
             cmd.append('--logfile=%s'%opts.child_logfile)
             cmd.append('--child')
         task.start = datetime.now()
         process = subprocess.Popen(cmd, shell=False, stderr=subprocess.PIPE, stdout=subprocess.PIPE, close_fds=True)
         task.result, task.errors = process.communicate()
         task.finish = datetime.now()
         task.completed = 1
         task.save()
         timetext.LANG = 'en'
         total = task.finish - task.start
         log.msg('Finished: %s(%s) in %s'%(task.name, task.id, timetext.stringify(total)), level=log.INFO, domain=task.domain)
         if settings.get('MEMDEBUG_WITH_GUPPY', False) and guppy:
             log.msg(heapy.heap(), level=log.DEBUG)
             heapy.setref()
     else:
         time.sleep(30)