Exemple #1
0
 def _spawn_process(self, message, slot):
     msg = native_stringify_dict(message, keys_only=False)
     project = msg['_project']
     args = [sys.executable, '-m', self.runner, 'crawl']
     args += get_crawl_args(msg)
     e = self.app.getComponent(IEnvironment)
     env = e.get_environment(msg, slot)
     env = native_stringify_dict(env, keys_only=False)
     pp = ScrapyProcessProtocol(slot, project, msg['_spider'], msg['_job'],
                                env)
     pp.deferred.addBoth(self._process_finished, slot)
     reactor.spawnProcess(pp, sys.executable, args=args, env=env)
     self.processes[slot] = pp
Exemple #2
0
 def render_GET(self, request):
     args = native_stringify_dict(copy(request.args), keys_only=False)
     project = args['project'][0]
     spiders = self.root.launcher.processes.values()
     running = [
         {
             "id": s.job,
             "spider": s.spider,
             "pid": s.pid,
             "start_time": s.start_time.isoformat(' ')
         } for s in spiders if s.project == project
     ]
     queue = self.root.poller.queues[project]
     pending = [
         {
             "id": x["_job"],
             "spider": x["name"]
         } for x in queue.list()
     ]
     finished = [
         {
             "id": s.job,
             "spider": s.spider,
             "start_time": s.start_time.isoformat(' '),
             "end_time": s.end_time.isoformat(' ')
         } for s in self.root.launcher.finished if s.project == project
     ]
     return {
         "node_name": self.root.node_name,
         "status": "ok",
         "pending": pending,
         "running": running,
         "finished": finished
     }
Exemple #3
0
 def render_POST(self, request):
     try:
         args = native_stringify_dict(copy(request.args), keys_only=False)
         args = dict((k, v[0]) for k, v in args.items())
         project = args['project']  # project name
         spider = args['spider']  # spider name
         job_id = args['job']  # job id
         offset = int(args.get('offset', 0))  # seek offset position
         logs_dir = self.root.logs_dir  # logs directory
         log_path = os.path.join(
             logs_dir, project, spider, "{}.log".format(job_id))
         with open(log_path, "r", encoding='UTF-8') as f:
             f.seek(offset)  # f.seek(0, 2)
             lines = f.read()  # Go to the end of file
             whence = f.tell()  # curr position
             f.close()
         errors = self.comp_error.findall(
             string=self.comp_block.sub(
                 repl=r'{}\1'.format(self.delimiter), string=lines))
         errors = [(e[1], e[2], e[0]) for e in errors]
         return {
             "node_name": self.root.node_name,
             "status": "ok",
             "whence": whence,
             "errors": errors
         }
     except Exception as e:
         return {
             "node_name": self.root.node_name,
             "status": "error",
             "message": str(e)
         }
Exemple #4
0
 def render_POST(self, request):
     args = native_stringify_dict(copy(request.args), keys_only=False)
     project = args['project'][0]
     self._delete_version(project)
     UtilsCache.invalid_cache(project)
     return {
         "node_name": self.root.node_name,
         "status": "ok"
     }
Exemple #5
0
 def render_GET(self, request):
     args = native_stringify_dict(copy(request.args), keys_only=False)
     project = args['project'][0]
     versions = self.root.egg_storage.list(project)
     return {
         "node_name": self.root.node_name,
         "status": "ok",
         "versions": versions
     }
Exemple #6
0
 def render_GET(self, request):
     args = native_stringify_dict(copy(request.args), keys_only=False)
     project = args['project'][0]
     version = args.get('_version', [''])[0]
     spiders = get_spider_list(
         project, runner=self.root.runner, version=version)
     return {
         "node_name": self.root.node_name,
         "status": "ok",
         "spiders": spiders
     }
Exemple #7
0
 def render_POST(self, request):
     args = native_stringify_dict(copy(request.args), keys_only=False)
     args = dict((k, v[0]) for k, v in args.items())
     project = args['project']
     job_id = args['job']
     signal = args.get('signal', 'TERM')
     prev_state = None
     queue = self.root.poller.queues[project]
     c = queue.remove(lambda x: x["_job"] == job_id)
     if c:
         prev_state = "pending"
     spiders = self.root.launcher.processes.values()
     for s in spiders:
         if s.job == job_id:
             s.transport.signalProcess(signal)
             prev_state = "running"
     return {
         "node_name": self.root.node_name,
         "status": "ok",
         "prev_state": prev_state
     }
Exemple #8
0
 def render_POST(self, request):
     args = native_stringify_dict(copy(request.args), keys_only=False)
     settings = args.pop('setting', [])
     settings = dict(x.split('=', 1) for x in settings)
     args = dict((k, v[0]) for k, v in args.items())
     project = args.pop('project')
     spider = args.pop('spider')
     version = args.get('_version', '')
     spiders = get_spider_list(project, version=version)
     if spider not in spiders:
         return {
             "status": "error",
             "message": "spider '%s' not found" % spider
         }
     args['settings'] = settings
     job_id = args.pop('job', uuid.uuid1().hex)
     args['_job'] = job_id
     self.root.scheduler.schedule(project, spider, **args)
     return {
         "node_name": self.root.node_name,
         "status": "ok",
         "job": job_id
     }