def check_instance_idle(self): last_active_time = self._last_activity_time has_running = False for ref in self._session_refs.values(): for info in ref.get_graph_infos().values(): if info.get('end_time') is None: has_running = True break else: last_active_time = max(info['end_time'], last_active_time) if has_running: break if not has_running and last_active_time < time.time( ) - self._idle_timeout: # timeout: we need to kill the instance from odps import ODPS from odps.accounts import BearerTokenAccount from cupid.runtime import context logger.warning('Timeout met, killing the instance now.') bearer_token = context().get_bearer_token() account = BearerTokenAccount(bearer_token) project = os.environ['ODPS_PROJECT_NAME'] endpoint = os.environ['ODPS_RUNTIME_ENDPOINT'] o = ODPS(None, None, account=account, project=project, endpoint=endpoint) o.stop_instance(os.environ['MARS_K8S_POD_NAMESPACE']) else: self.ref().check_instance_idle(_delay=10, _tell=True, _wait=False)
def check_instance_idle(self): from cupid.runtime import context has_running, active_time_from_service = self._get_service_activity_info( ) if active_time_from_service != self._last_active_time_from_service: self._last_active_time = active_time_from_service self._last_active_time_from_service = active_time_from_service elif has_running: self._last_active_time = time.time() if self._last_active_time < time.time() - self._idle_timeout: # timeout: we need to kill the instance from odps import ODPS from odps.accounts import BearerTokenAccount logger.warning('Timeout met, killing the instance now.') bearer_token = context().get_bearer_token() account = BearerTokenAccount(bearer_token) project = os.environ['ODPS_PROJECT_NAME'] endpoint = os.environ['ODPS_RUNTIME_ENDPOINT'] o = ODPS(None, None, account=account, project=project, endpoint=endpoint) o.stop_instance(os.environ['MARS_K8S_POD_NAMESPACE']) else: kv_store = context().kv_store() kv_store[CUPID_LAST_IDLE_TIME_KEY] = str(self._last_active_time) self.ref().check_instance_idle(_delay=10, _tell=True, _wait=False)
def _handle_terminate_instance(sock): from cupid.runtime import context, RuntimeContext from odps import ODPS from odps.accounts import BearerTokenAccount try: cmd_len, = struct.unpack('<I', sock.recv(4)) # dict with key cmd_body = pickle.loads(sock.recv(cmd_len)) instance_id = cmd_body['instance_id'] if not RuntimeContext.is_context_ready(): logger.warning('Cupid context not ready') else: bearer_token = context().get_bearer_token() account = BearerTokenAccount(bearer_token) project = os.environ['ODPS_PROJECT_NAME'] endpoint = os.environ['ODPS_RUNTIME_ENDPOINT'] o = ODPS(None, None, account=account, project=project, endpoint=endpoint) o.stop_instance(instance_id) except: logger.exception('Failed to put kv value') _write_request_result(sock, False, exc_info=sys.exc_info())