def State(self, request, context): # validate request payload data = id_serializer.load(request) experiment = documents.Experiment.objects(deleted=False).get(id=data['id']) if experiment.state in ["STARTING", "RUNNING"]: response = utils.get_kubernetes_job_status(utils.get_k8s_name(experiment)) if response.get("active"): experiment.state = "RUNNING" elif response.get("succeeded"): experiment.state = "SUCCEEDED" elif response.get("failed") and int(response.get("failed")) >= 1: experiment.state = "FAILED" if experiment.state not in ["STARTING", "RUNNING"]: utils.clean_kubernetes_job(utils.get_k8s_name(experiment)) completion_time = datetime.datetime.strptime(response.get("completion_time"), "%Y-%m-%dT%H:%M:%S") delta = completion_time - experiment.create_at experiment.uptime += int(delta.total_seconds()) experiment.last_update = datetime.datetime.now() experiment.save() return job_state_serializer.dump({"state": experiment.state})
def State(self, request, context): # validate request payload data = id_serializer.load(request) mdl = documents.ModelApis.objects(deleted=False).get(id=data['id']) if mdl.state in ["STARTING", "RUNNING"]: state = utils.get_kubernetes_deployment_status(utils.get_k8s_name(mdl)) mdl.state = state # clean job if already in finished state like stopped, failed, aborted, succeeded if mdl.state not in ["STARTING", "RUNNING"]: response = utils.get_kubernetes_deployment_status(utils.get_k8s_name(mdl)) if response.get("available_replicas"): mdl.state = "RUNNING" else: mdl.state = "STARTING" mdl.last_update = datetime.datetime.now() mdl.save() return job_state_serializer.dump({"state": mdl.state})
def Delete(self, request, context): # validate request payload data = id_serializer.load(request) workspace = documents.Workspace.objects(deleted=False).get(id=data['id']) # delete pvc if workspace.state in ["STOPPED", "CREATED"]: utils.clean_kubernetes_pvc("data-{}".format(utils.get_k8s_name(workspace))) workspace.deleted = True workspace.last_update = datetime.datetime.now() workspace.save() return status_serializer.dump({"status": 200, "message": "Successfully delete workspace."}) return status_serializer.dump({"status": 400, "message": "Cannot delete starting or running workspace."})
def State(self, request, context): # validate request payload data = id_serializer.load(request) workspace = documents.Workspace.objects(deleted=False).get(id=data['id']) if workspace.state in ["STARTING", "RUNNING"]: response = utils.get_kubernetes_deployment_status(utils.get_k8s_name(workspace)) if response.get("available_replicas"): workspace.state = "RUNNING" else: workspace.state = "STARTING" workspace.last_update = datetime.datetime.now() workspace.save() return job_state_serializer.dump({"state": workspace.state})
def Stop(self, request, context): # validate request payload data = id_serializer.load(request) experiment = documents.Experiment.objects(deleted=False).get(id=data['id']) if experiment.state not in ["STARTING", "RUNNING"]: return status_serializer.dump({"status": 400, "message": "Cannot stop not running or starting experiment."}) # stop and clean all resources utils.clean_kubernetes_job(utils.get_k8s_name(experiment)) experiment.state = "ABORTED" delta = datetime.datetime.utcnow() - experiment.create_at experiment.uptime += int(delta.total_seconds()) experiment.last_update = datetime.datetime.now() experiment.save() return status_serializer.dump({"status": 200, "message": "Successfully stopped Experiment."})
def Stop(self, request, context): # validate request payload data = id_serializer.load(request) mdl = documents.ModelApis.objects(deleted=False).get(id=data['id']) if mdl.state not in ["STARTING", "RUNNING"]: return status_serializer.dump({"status": 400, "message": "Cannot stop not running or starting modelapis."}) # stop and clean all resources utils.clean_kubernetes_deployment(utils.get_k8s_name(mdl)) mdl.state = "STOPPED" delta = datetime.datetime.utcnow() - mdl.last_start mdl.uptime += int(delta.total_seconds()) mdl.last_update = datetime.datetime.now() mdl.save() return status_serializer.dump({"status": 200, "message": "Successfully stopped modelapis."})