def index_query(self, db, table, index_name, index_value, time_from, time_to, limit=100): LOG.info( "db %s table %s index_name %s index_value %s from %d to %d limit %d" % (db, table, index_name, index_value, time_from, time_to, limit)) if not db or not table or not index_name: return [], False ftrace = query_pb2.SearchEngineService_Stub(self.channel) controller = client.Controller() controller.SetTimeout(5) condition = query_pb2.RpcIndexCondition(cmp_key=index_value, cmp=query_pb2.RpcEqualTo, index_table_name=index_name) request = query_pb2.RpcSearchRequest(condition=[condition]) request.db_name = db request.table_name = table request.start_timestamp = time_from request.end_timestamp = time_to request.limit = limit response = ftrace.Search(controller, request) return response.result_list, True
def make_job(self,name,pkg_type, pkg_src,boot_cmd, replicate_num = 1, mem_limit = 1024, cpu_limit = 2, deploy_step_size=-1): """ send a new job command to galaxy master return: """ assert name assert pkg_type assert pkg_src assert boot_cmd req = self._build_new_job_req(name,pkg_type,str(pkg_src), boot_cmd, replicate_num = replicate_num, mem_limit = mem_limit, cpu_limit = cpu_limit, deploy_step_size = deploy_step_size) master = master_pb2.Master_Stub(self.channel) controller = client.Controller() controller.SetTimeout(1.5) try: response = master.NewJob(controller,req) if not response: LOG.error("fail to create job") return False,None if response.status == 0: return True,response.job_id return False,response.job_id except: LOG.exception("fail to create job") return False,None
def list_all_job(self): request = master_pb2.ListJobRequest() master = master_pb2.Master_Stub(self.channel) controller = client.Controller() controller.SetTimeout(1.5) try: response = master.ListJob(controller,request) if not response: return False,[] ret = [] for job in response.jobs: base = BaseEntity() base.job_id = job.job_id base.job_name = job.job_name base.running_task_num = job.running_task_num base.replica_num = job.replica_num trace = BaseEntity() trace.killed_count = job.trace.killed_count trace.overflow_killed_count = job.trace.overflow_killed_count trace.start_count = job.trace.start_count trace.deploy_failed_count = job.trace.deploy_failed_count trace.reschedule_count = job.trace.reschedule_count trace.deploy_start_time = job.trace.deploy_start_time trace.deploy_end_time = job.trace.deploy_end_time trace.state = SCHEDULE_STATE_MAP[job.trace.state] base.trace = trace ret.append(base) return True,ret except: LOG.exception('fail to list jobs') return False,[]
def make_req(self, req): ftrace = query_pb2.SearchEngineService_Stub(self.channel) controller = client.Controller() controller.SetTimeout(10) LOG.info(req) response = ftrace.Search(controller, req) return response.result_list, True
def list_task_by_job_id(self,job_id): req = master_pb2.ListTaskRequest() req.job_id = job_id master = master_pb2.Master_Stub(self.channel) controller = client.Controller() controller.SetTimeout(3.5) try: response = master.ListTask(controller,req) if not response: LOG.error('fail to list task %s'%job_id) return False,[] ret = [] for task in response.tasks: base = BaseEntity() base.id = task.info.task_id base.status = STATE_MAP[task.status] base.name = task.info.task_name base.agent_addr = task.agent_addr base.job_id = task.job_id base.offset = task.offset base.mem_limit = task.info.required_mem base.cpu_limit = task.info.required_cpu base.mem_used = task.memory_usage base.cpu_used = task.cpu_usage base.start_time = task.start_time ret.append(base) return True,ret except: LOG.exception('fail to list task') return False,[]
def get_scheduled_history(self,job_id): req = master_pb2.ListTaskRequest() req.job_id = job_id master = master_pb2.Master_Stub(self.channel) controller = client.Controller() controller.SetTimeout(3.5) try: response = master.ListTask(controller,req) if not response: LOG.error('fail to list task %s'%job_id) return False,[] ret = [] for task in response.scheduled_tasks: base = BaseEntity() base.id = task.info.task_id base.status = STATE_MAP[task.status] base.name = task.info.task_name base.agent_addr = task.agent_addr base.job_id = task.job_id base.offset = task.offset base.mem_limit = task.info.required_mem base.cpu_limit = task.info.required_cpu base.mem_used = task.memory_usage base.cpu_used = task.cpu_usage base.start_time = task.start_time base.gc_path = task.root_path base.end_time = datetime.datetime.fromtimestamp(task.end_time).strftime("%m-%d %H:%M:%S") ret.append(base) return True,ret except: LOG.exception('fail to list task history') return False,[]
def list_all_node(self): """ list all node of galaxy master return: if error ,None will be return """ master = master_pb2.Master_Stub(self.channel) controller = client.Controller() controller.SetTimeout(1.5) request = master_pb2.ListNodeRequest() try: response = master.ListNode(controller,request) if not response: LOG.error('fail to call list node') return [] ret = [] for node in response.nodes: base = BaseEntity() base.id = node.node_id base.node_id = node.node_id base.addr = node.addr base.task_num = node.task_num base.cpu_share = node.cpu_share base.mem_share = node.mem_share base.cpu_allocated = node.cpu_allocated base.mem_allocated = node.mem_allocated base.mem_used = node.mem_used base.cpu_used = node.cpu_used ret.append(base) return ret except: LOG.exception("fail to call list node") return []
def get_real_time_status(self): controller = client.Controller() controller.SetTimeout(5) master = master_pb2.Master_Stub(self.channel) request = master_pb2.GetMasterStatusRequest() response = master.GetStatus(controller, request) return response
def get_all_job(self): controller = client.Controller() controller.SetTimeout(5) master = master_pb2.Master_Stub(self.channel) request = master_pb2.ListJobsRequest() response = master.ListJobs(controller, request) return response.jobs, True
def kill_job(self,job_id): req = master_pb2.KillJobRequest() req.job_id = job_id master = master_pb2.Master_Stub(self.channel) controller = client.Controller() controller.SetTimeout(1.5) try: master.KillJob(controller,req) except: LOG.exception('fail to kill job')
def get_status(self): lumia_ctrl = lumia_pb2.LumiaCtrl_Stub(self.channel) controller = client.Controller() controller.SetTimeout(2.5) request = lumia_pb2.GetStatusRequest() try: response = lumia_ctrl.GetStatus(controller, request) return response.live_nodes, response.dead_nodes except: LOG.exception("fail to call get status") return [], []
def report(self, ip): lumia_ctrl = lumia_pb2.LumiaCtrl_Stub(self.channel) controller = client.Controller() controller.SetTimeout(2.5) request = lumia_pb2.ReportDeadMinionRequest() request.ip = ip try: response = lumia_ctrl.ReportDeadMinion(controller, request) return response.status except: LOG.exception("fail to call get status") return [], []
def get_overview(self): """ """ lumia_ctrl = lumia_pb2.LumiaCtrl_Stub(self.channel) controller = client.Controller() controller.SetTimeout(2.5) request = lumia_pb2.GetOverviewRequest() try: response = lumia_ctrl.GetOverview(controller,request) return response.minions except: LOG.exception("fail to call list node") return []
def show_minion(self, ips = []): if not ips: return [] lumia_ctrl = lumia_pb2.LumiaCtrl_Stub(self.channel) controller = client.Controller() controller.SetTimeout(2.5) request = lumia_pb2.GetMinionRequest(ips = ips) try: response = lumia_ctrl.GetMinion(controller, request) return response.minions except: LOG.exception("fail to call list node") return []
def tag_agent(self, tag, agent_set): entity = master_pb2.TagEntity(tag = tag, agents = agent_set) request = master_pb2.TagAgentRequest(tag_entity = entity) master = master_pb2.Master_Stub(self.channel) controller = client.Controller() controller.SetTimeout(1.5) try: response = master.TagAgent(controller, request) if response.status == 0 : return True return False except: LOG.exception("fail to tag agent") return False
def simple_query(self, db, table, id, time_from, time_to, limit=100): if not db or not table or not id: return [], False ftrace = query_pb2.SearchEngineService_Stub(self.channel) controller = client.Controller() controller.SetTimeout(5) request = query_pb2.RpcSearchRequest() request.db_name = db request.table_name = table request.primary_key = id request.start_timestamp = time_from request.end_timestamp = time_to request.limit = limit response = ftrace.Search(controller, request) return response.result_list, True
def update_job(self,id,replicate_num): req = master_pb2.UpdateJobRequest() req.job_id = int(id) req.replica_num = int(replicate_num) master = master_pb2.Master_Stub(self.channel) controller = client.Controller() controller.SetTimeout(1.5) try: response = master.UpdateJob(controller,req) if not response or response.status != 0 : return False return True except: LOG.exception('fail to update job') return False
def list_tag(self): request = master_pb2.ListTagRequest() master = master_pb2.Master_Stub(self.channel) controller = client.Controller() controller.SetTimeout(1.5) try: response = master.ListTag(controller, request) ret = [] for tag in response.tags: base = BaseEntity() base.tag = tag.tag base.agents = [agent for agent in tag.agents] ret.append(base.__dict__) return ret except Exception as e: LOG.exception("fail to list tag %s"%str(e)) return []
def get_pods(self, jobid): """ """ controller = client.Controller() controller.SetTimeout(5) master = master_pb2.Master_Stub(self.channel) request = master_pb2.ShowPodRequest() request.jobid = jobid response = master.ShowPod(controller, request) if response.status != galaxy_pb2.kOk: LOG.error("fail get pods"); return [], False for pod in response.pods: new_pod = util.pb2dict(pod) new_pod["stage"] = galaxy_pb2.PodStage.Name(pod.stage) new_pod["state"] = galaxy_pb2.PodState.Name(pod.state) pods.append(new_pod) return pods, True
def update_job(self,id,replicate_num, deploy_step_size = None): req = master_pb2.UpdateJobRequest() req.job_id = int(id) req.replica_num = int(replicate_num) if deploy_step_size != None : req.deploy_step_size = deploy_step_size; master = master_pb2.Master_Stub(self.channel) controller = client.Controller() controller.SetTimeout(1.5) try: response = master.UpdateJob(controller,req) if not response or response.status != 0 : return False return True except client.TimeoutError: LOG.exception('rpc timeout') except : LOG.exception('fail to update job') return False
def set_password(self, user, password): """ set password return: if error ,None will be return """ agent_stub = agent_pb2.Agent_Stub(self.channel) controller = client.Controller() controller.SetTimeout(1.5) request = agent_pb2.SetPasswordRequest() request.user_name = user request.password = password try: response = agent_stub.SetPassword(controller, request) if not response: return -1 return response.status except: LOG.exception("fail to call set password") return -10
def HeartBeat(self): while True: with self._mutex: master = master_pb2.Master_Stub(self._channel) controller = client.Controller() controller.SetTimeout(100) req = master_pb2.HeartBeatRequest() req.cpu_share = self._cpu req.mem_share = self._mem req.version = self._version req.agent_addr = self._my_addr status_list = [] for key in self._task_status: print "running task %s " % key status_list.append(self._task_status[key]) req.task_status.extend(status_list) response = master.HeartBeat(controller, req) self._agent_id = response.agent_id self._version = response.version print "heart beat version %s agent %s" % (self._version, self._agent_id) time.sleep(1)
def list_all_job(self): request = master_pb2.ListJobRequest() master = master_pb2.Master_Stub(self.channel) controller = client.Controller() controller.SetTimeout(1.5) try: response = master.ListJob(controller,request) if not response: return False,[] ret = [] for job in response.jobs: base = BaseEntity() base.job_id = job.job_id base.job_name = job.job_name base.running_task_num = job.running_task_num base.replica_num = job.replica_num ret.append(base) return True,ret except: LOG.exception('fail to list jobs') return False,[]
def AddMonitorRpc(self, request): req = json.loads(request.GET.get('json_param')) pb_request = scheduler_pb2.RpcMonitorRequest() pb_request.db_name = req['db_name'] pb_request.table_name = req['table_name'] pb_request.moduler_owner.extend(req['mail_list']) rule_set = req['rule_set'] result = rule_set['result'] self.ConvertRule(result, pb_request.rule_set.result) rule_list = rule_set['rule_list'] for rule in rule_list: pb_rule = pb_request.rule_set.rule_list.add() self.ConvertRule(rule, pb_rule) rpc_channel = client.Channel(self.service_addr) sdk = scheduler_pb2.LogSchedulerService_Stub(rpc_channel) controller = client.Controller() controller.SetTimeout(5) pb_response = sdk.RpcMonitor(controller, pb_request) return True
# * The sofa-pbrpc python lib has been installed. # * The server in ../../sample/echo has been started. # # For more, please refer to `./README'. from sofa.pbrpc import client import echo_service_pb2 import sys # Create service stub. channel = client.Channel('127.0.0.1:12321') service = echo_service_pb2.EchoServer_Stub(channel) # Create controller. # We set timeout to 1.5 seconds by controller.SetTimeout() method. controller = client.Controller() controller.SetTimeout(1.5) # Prepare request. request = echo_service_pb2.EchoRequest() request.message = 'Hello World' # Call method. try: response = service.Echo(controller, request) except client.TimeoutError: print "ERROR: RPC timeout" sys.exit(1) except Exception as e: print "ERROR: RPC fail: %s" % e sys.exit(1)