def parse_and_build_application(self): application_url = self.spark_history_server_api_url + 'applications/%s/1/' % self.application_id application_json = jsonutil.get_json(application_url) start_time = timeutil.convert_str_to_timestamp( application_json['startTime']) jobs_url = self.spark_history_server_api_url + 'applications/%s/1/jobs' % self.application_id jobs_json = jsonutil.get_json(jobs_url) jobs = [self.parse_and_build_job(job_json) for job_json in jobs_json] jobs.reverse() try: input_bytes = jobs[0].stages[0].input_bytes except IndexError: input_bytes = 0 executors_url = self.spark_history_server_api_url + 'applications/%s/1/allexecutors' % self.application_id executors_json = jsonutil.get_json(executors_url) executors = [ self.parse_and_build_executor(executor_json) for executor_json in executors_json if executor_json['id'] != 'driver' ] return sparkmodel.Application(self.application_id, start_time, jobs, executors, input_bytes)
def build(self, application_id: str): json: dict = jsonutil.get_json(self.url + 'ws/v1/applicationhistory/apps/%s/' % application_id) start_time = json['startedTime'] end_time = json['finishedTime'] elapsed_time = json['elapsedTime'] name = json['name'] return CompletedApplication(start_time, end_time, elapsed_time, name)
def parse_and_build_resources(self) -> List[Resource]: url = self.RM_API_URL + 'ws/v1/cluster/nodes' conf = jsonutil.get_json(url) nodes = conf['nodes']['node'] resources = [] for n in nodes: memory = (int(n['usedMemoryMB']) + int(n['availMemoryMB'])) / 1024 vcores = int(n['usedVirtualCores']) + int(n['availableVirtualCores']) resources.append(Resource(vcores, int(memory))) return resources
def parse_and_build_job(self, j): job_id = j['jobId'] name = j['name'] stage_ids = j['stageIds'] stages = [] for stage_id in stage_ids: stage_url = self.spark_history_server_api_url + 'applications/%s/1/stages/%s' % ( self.application_id, stage_id) stage_json = jsonutil.get_json(stage_url) stage = self.parse_and_build_stage(stage_json) stages.append(stage) stages.sort(key=lambda s: s.stage_id) return sparkmodel.Job(job_id, name, stages)
def parse_and_build_stage(self, j): j = j[0] stage_id = j['stageId'] num_tasks = j['numTasks'] input_bytes = j['inputBytes'] name = j['name'] tasks_url = self.spark_history_server_api_url + 'applications/%s/1/stages/%s/0/taskList' % ( self.application_id, stage_id) tasks_json = jsonutil.get_json(tasks_url) tasks = [ self.parse_and_build_task(task_json) for task_json in tasks_json ] return sparkmodel.Stage(stage_id, num_tasks, input_bytes, name, tasks)
def get_queue_constraints(self): url = self.RM_HOST + 'ws/v1/cluster/scheduler' conf = jsonutil.get_json(url) ret = [] queues = conf['scheduler']['schedulerInfo']['queues']['queue'] for q in queues: name = q['queueName'] capacity = q['capacity'] used_capacity = q['usedCapacity'] max_capacity = q['maxCapacity'] ret.append( QueueConstraint(name, used_capacity, capacity, max_capacity)) return ret
def parse_and_build_running_apps(self) -> List[RunningApplication]: url = self.RM_API_URL + 'ws/v1/cluster/apps?states=RUNNING' app_json = jsonutil.get_json(url) return self.build_running_apps_from_json(app_json)
def parse_and_build_waiting_apps(self) -> List[WaitingApplication]: url = self.RM_API_URL + 'ws/v1/cluster/apps?states=NEW,NEW_SAVING,SUBMITTED,ACCEPTED' app_json = jsonutil.get_json(url) return self.build_waiting_apps_from_json(app_json)