def _spawn_process(self, message, slot): msg = native_stringify_dict(message, keys_only=False) project = msg['_project'] args = [sys.executable, '-m', self.runner, 'crawl'] args += get_crawl_args(msg) e = self.app.getComponent(IEnvironment) env = e.get_environment(msg, slot) env = native_stringify_dict(env, keys_only=False) pp = ScrapyProcessProtocol(slot, project, msg['_spider'], msg['_job'], env) pp.deferred.addBoth(self._process_finished, slot) reactor.spawnProcess(pp, sys.executable, args=args, env=env) self.processes[slot] = pp
def render_GET(self, request): args = native_stringify_dict(copy(request.args), keys_only=False) project = args['project'][0] spiders = self.root.launcher.processes.values() running = [ { "id": s.job, "spider": s.spider, "pid": s.pid, "start_time": s.start_time.isoformat(' ') } for s in spiders if s.project == project ] queue = self.root.poller.queues[project] pending = [ { "id": x["_job"], "spider": x["name"] } for x in queue.list() ] finished = [ { "id": s.job, "spider": s.spider, "start_time": s.start_time.isoformat(' '), "end_time": s.end_time.isoformat(' ') } for s in self.root.launcher.finished if s.project == project ] return { "node_name": self.root.node_name, "status": "ok", "pending": pending, "running": running, "finished": finished }
def render_POST(self, request): try: args = native_stringify_dict(copy(request.args), keys_only=False) args = dict((k, v[0]) for k, v in args.items()) project = args['project'] # project name spider = args['spider'] # spider name job_id = args['job'] # job id offset = int(args.get('offset', 0)) # seek offset position logs_dir = self.root.logs_dir # logs directory log_path = os.path.join( logs_dir, project, spider, "{}.log".format(job_id)) with open(log_path, "r", encoding='UTF-8') as f: f.seek(offset) # f.seek(0, 2) lines = f.read() # Go to the end of file whence = f.tell() # curr position f.close() errors = self.comp_error.findall( string=self.comp_block.sub( repl=r'{}\1'.format(self.delimiter), string=lines)) errors = [(e[1], e[2], e[0]) for e in errors] return { "node_name": self.root.node_name, "status": "ok", "whence": whence, "errors": errors } except Exception as e: return { "node_name": self.root.node_name, "status": "error", "message": str(e) }
def render_POST(self, request): args = native_stringify_dict(copy(request.args), keys_only=False) project = args['project'][0] self._delete_version(project) UtilsCache.invalid_cache(project) return { "node_name": self.root.node_name, "status": "ok" }
def render_GET(self, request): args = native_stringify_dict(copy(request.args), keys_only=False) project = args['project'][0] versions = self.root.egg_storage.list(project) return { "node_name": self.root.node_name, "status": "ok", "versions": versions }
def render_GET(self, request): args = native_stringify_dict(copy(request.args), keys_only=False) project = args['project'][0] version = args.get('_version', [''])[0] spiders = get_spider_list( project, runner=self.root.runner, version=version) return { "node_name": self.root.node_name, "status": "ok", "spiders": spiders }
def render_POST(self, request): args = native_stringify_dict(copy(request.args), keys_only=False) args = dict((k, v[0]) for k, v in args.items()) project = args['project'] job_id = args['job'] signal = args.get('signal', 'TERM') prev_state = None queue = self.root.poller.queues[project] c = queue.remove(lambda x: x["_job"] == job_id) if c: prev_state = "pending" spiders = self.root.launcher.processes.values() for s in spiders: if s.job == job_id: s.transport.signalProcess(signal) prev_state = "running" return { "node_name": self.root.node_name, "status": "ok", "prev_state": prev_state }
def render_POST(self, request): args = native_stringify_dict(copy(request.args), keys_only=False) settings = args.pop('setting', []) settings = dict(x.split('=', 1) for x in settings) args = dict((k, v[0]) for k, v in args.items()) project = args.pop('project') spider = args.pop('spider') version = args.get('_version', '') spiders = get_spider_list(project, version=version) if spider not in spiders: return { "status": "error", "message": "spider '%s' not found" % spider } args['settings'] = settings job_id = args.pop('job', uuid.uuid1().hex) args['_job'] = job_id self.root.scheduler.schedule(project, spider, **args) return { "node_name": self.root.node_name, "status": "ok", "job": job_id }