def cluster_stat_processor(resultset, fields=[], limit=100): stats = [] cluster_stat = log_pb2.ClusterStat() for data in resultset: for d in data.data_list: cluster_stat.ParseFromString(d) stats.append(data_filter(util.pb2dict(cluster_stat), fields)) stats = sorted(stats, key=lambda x:x["time"]) return stats[0:limit]
def pod_stat_processor(resultset, fields=[], limit=100): stats = [] pod_stat = log_pb2.PodStat() for data in resultset: for d in data.data_list: pod_stat.ParseFromString(d) e = util.pb2dict(pod_stat) e['ftime'] = datetime.datetime.fromtimestamp(e['time']/1000000).strftime("%Y-%m-%d %H:%M:%S") stats.append(data_filter(e , fields)) return stats[0:limit]
def agent_event_processor(resultset, fields=[], limit=100): stats = [] agent_event = log_pb2.AgentEvent() for data in resultset: for d in data.data_list: agent_event.ParseFromString(d) e = util.pb2dict(agent_event) e['ftime'] = datetime.datetime.fromtimestamp(e['time']/1000000).strftime("%Y-%m-%d %H:%M:%S") stats.append(data_filter(e , fields)) stats = sorted(stats, key=lambda x:x["time"]) return stats[0:limit]
def job_stat_processor(resultset, fields=[], limit=100): if not fields: return [] stats = [] job_stat = log_pb2.JobStat() for result in resultset: for d in result.data_list: job_stat.ParseFromString(d) data = util.pb2dict(job_stat) stats.append(data_filter(data, fields)) stats = sorted(stats, key=lambda x:x["time"]) return stats
def get_pod_event_by_jobid(self, jobid, start_time, end_time, limit=1000): data_list, status = self.ftrace.index_query( "baidu.galaxy", "PodEvent", "jobid", jobid, int(start_time * 1000000), int(end_time * 1000000), limit=limit ) events = [] pod_event = log_pb2.PodEvent() for data in data_list: for d in data.data_list: pod_event.ParseFromString(d) e = util.pb2dict(pod_event) e["stage"] = galaxy_pb2.PodStage.Name(pod_event.stage) e["level"] = log_pb2.TraceLevel.Name(pod_event.level) e["state"] = galaxy_pb2.PodState.Name(pod_event.state) events.append(e) return events, True
def get_pod_stat(self, podid, start_time, end_time, limit=100): data_list, status = self.ftrace.simple_query( "baidu.galaxy", "PodStat", podid, int(start_time * 1000000), int(end_time * 1000000), limit=limit ) if not status: logger.error("fail to query job stat") return [], False stats = [] pod_stat = log_pb2.PodStat() for data in data_list: for d in data.data_list: pod_stat.ParseFromString(d) pod_stat.time = pod_stat.time / 1000 stats.append(util.pb2dict(pod_stat)) return stats, True
def task_event_processor(resultset, fields=[], limit=100): events = [] task_event = log_pb2.TaskEvent() for data in resultset: for d in data.data_list: task_event.ParseFromString(d) e = util.pb2dict(task_event) e["initd_port"] = e["initd_addr"].split(":")[-1] e["stage"] = agent_pb2.TaskStage.Name(task_event.stage) e["level"] = log_pb2.TraceLevel.Name(task_event.level) e["state"] = galaxy_pb2.TaskState.Name(task_event.state) e["main"] = initd_pb2.ProcessStatus.Name(task_event.main) e["ftime"] = datetime.datetime.fromtimestamp(e['ttime']/1000000).strftime("%Y-%m-%d %H:%M:%S") e["deploy"] = initd_pb2.ProcessStatus.Name(task_event.deploy) events.append(data_filter(e, fields)) events = sorted(events, key=lambda x:x["ttime"], reverse = True) return events[0:limit]
def pod_event_processor(resultset, fields=[], limit=100): if not fields: return [] events = [] pod_event = log_pb2.PodEvent() for data in resultset: for d in data.data_list: pod_event.ParseFromString(d) e = util.pb2dict(pod_event) e["stage"] = galaxy_pb2.PodStage.Name(pod_event.stage) e["level"] = log_pb2.TraceLevel.Name(pod_event.level) if e["level"] not in ["TERROR", "TWARNING"]: continue e["state"] = galaxy_pb2.PodState.Name(pod_event.state) events.append(data_filter(e, fields)) events = sorted(events, key=lambda x:x["time"]) return events[0:limit]
def get_pods(self, jobid): """ """ controller = client.Controller() controller.SetTimeout(5) master = master_pb2.Master_Stub(self.channel) request = master_pb2.ShowPodRequest() request.jobid = jobid response = master.ShowPod(controller, request) if response.status != galaxy_pb2.kOk: LOG.error("fail get pods"); return [], False for pod in response.pods: new_pod = util.pb2dict(pod) new_pod["stage"] = galaxy_pb2.PodStage.Name(pod.stage) new_pod["state"] = galaxy_pb2.PodState.Name(pod.state) pods.append(new_pod) return pods, True
def get_pod_event(self, podid, start_time, end_time, limit=100): data_list, status = self.ftrace.simple_query( "baidu.galaxy", "PodEvent", podid, int(start_time * 1000000), int(end_time * 1000000), limit=limit ) if not status: logger.error("fail to query pod event") return [], False events = [] pod_event = log_pb2.PodEvent() for data in data_list: for d in data.data_list: pod_event.ParseFromString(d) e = util.pb2dict(pod_event) e["stage"] = galaxy_pb2.PodStage.Name(pod_event.stage) e["level"] = log_pb2.TraceLevel.Name(pod_event.level) e["state"] = galaxy_pb2.PodState.Name(pod_event.state) events.append(e) return events, True
def get_pod_stat(self, podid, start_time, end_time, limit=100): data_list, status = self.ftrace.simple_query("baidu.galaxy", "PodStat", podid, int(start_time * 1000000), int(end_time * 1000000), limit=limit) if not status: logger.error("fail to query job stat") return [], False stats = [] pod_stat = log_pb2.PodStat() for data in data_list: for d in data.data_list: pod_stat.ParseFromString(d) pod_stat.time = pod_stat.time / 1000 stats.append(util.pb2dict(pod_stat)) return stats, True
def get_pod_event_by_jobid(self, jobid, start_time, end_time, limit=1000): data_list, status = self.ftrace.index_query("baidu.galaxy", "PodEvent", "jobid", jobid, int(start_time * 1000000), int(end_time * 1000000), limit=limit) events = [] pod_event = log_pb2.PodEvent() for data in data_list: for d in data.data_list: pod_event.ParseFromString(d) e = util.pb2dict(pod_event) e["stage"] = galaxy_pb2.PodStage.Name(pod_event.stage) e["level"] = log_pb2.TraceLevel.Name(pod_event.level) e["state"] = galaxy_pb2.PodState.Name(pod_event.state) events.append(e) return events, True
def get_pod_event(self, podid, start_time, end_time, limit=100): data_list, status = self.ftrace.simple_query("baidu.galaxy", "PodEvent", podid, int(start_time * 1000000), int(end_time * 1000000), limit=limit) if not status: logger.error("fail to query pod event") return [], False events = [] pod_event = log_pb2.PodEvent() for data in data_list: for d in data.data_list: pod_event.ParseFromString(d) e = util.pb2dict(pod_event) e["stage"] = galaxy_pb2.PodStage.Name(pod_event.stage) e["level"] = log_pb2.TraceLevel.Name(pod_event.level) e["state"] = galaxy_pb2.PodState.Name(pod_event.state) events.append(e) return events, True
def get_task_event(self, podid, start_time, end_time, limit=100): data_list, status = self.ftrace.index_query("baidu.galaxy", "TaskEvent", "pod_id", podid, int(start_time * 1000000), int(end_time * 1000000), limit=limit) events = [] task_event = log_pb2.TaskEvent() for data in data_list: for d in data.data_list: task_event.ParseFromString(d) e = util.pb2dict(task_event) e["id"] = e["id"][-8:] e["initd_port"] = e["initd_addr"].split(":")[-1] e["stage"] = agent_pb2.TaskStage.Name(task_event.stage) e["level"] = log_pb2.TraceLevel.Name(task_event.level) e["state"] = galaxy_pb2.TaskState.Name(task_event.state) events.append(e) return events, True
def get_job_stat(self, jobid, start_time, end_time, limit=100): data_list, status = self.ftrace.simple_query("baidu.galaxy", "JobStat", jobid, int(start_time * 1000000), int(end_time * 1000000), limit=limit) if not status: logger.error("fail to query job stat") return [], False stats = [] job_stat = log_pb2.JobStat() for data in data_list: for d in data.data_list: job_stat.ParseFromString(d) job_stat.time = job_stat.time / 1000 stats.append(util.pb2dict(job_stat)) logger.info( "query job %s stat from %s to %s , count %d" % (jobid, datetime.datetime.fromtimestamp(start_time).strftime( "%Y-%m-%d %H:%M:%S"), datetime.datetime.fromtimestamp( end_time).strftime("%Y-%m-%d %H:%M:%S"), len(stats))) return stats, True
def get_task_event(self, podid, start_time, end_time, limit=100): data_list, status = self.ftrace.index_query( "baidu.galaxy", "TaskEvent", "pod_id", podid, int(start_time * 1000000), int(end_time * 1000000), limit=limit, ) events = [] task_event = log_pb2.TaskEvent() for data in data_list: for d in data.data_list: task_event.ParseFromString(d) e = util.pb2dict(task_event) e["id"] = e["id"][-8:] e["initd_port"] = e["initd_addr"].split(":")[-1] e["stage"] = agent_pb2.TaskStage.Name(task_event.stage) e["level"] = log_pb2.TraceLevel.Name(task_event.level) e["state"] = galaxy_pb2.TaskState.Name(task_event.state) events.append(e) return events, True
def get_job_stat(self, jobid, start_time, end_time, limit=100): data_list, status = self.ftrace.simple_query( "baidu.galaxy", "JobStat", jobid, int(start_time * 1000000), int(end_time * 1000000), limit=limit ) if not status: logger.error("fail to query job stat") return [], False stats = [] job_stat = log_pb2.JobStat() for data in data_list: for d in data.data_list: job_stat.ParseFromString(d) job_stat.time = job_stat.time / 1000 stats.append(util.pb2dict(job_stat)) logger.info( "query job %s stat from %s to %s , count %d" % ( jobid, datetime.datetime.fromtimestamp(start_time).strftime("%Y-%m-%d %H:%M:%S"), datetime.datetime.fromtimestamp(end_time).strftime("%Y-%m-%d %H:%M:%S"), len(stats), ) ) return stats, True