def post(self): sessname = self.get_session_id() user_id = self.get_user_id() if (sessname is None) or (user_id is None): self.send_error() return mode = self.get_argument("action", False) if mode is False: JBoxEBSVolHandler.log_error("Unknown mode for ebs handler") self.send_error() return try: if mode == "attach" or mode == "detach": JBoxAsyncJob.async_plugin_task( JBoxEBSVolAsyncTask.__name__, {"action": mode, "user_id": user_id, "sessname": sessname} ) response = {"code": 0, "data": ""} elif mode == "status": response = {"code": 0, "data": self._get_state(sessname, user_id)} else: response = {"code": -1, "data": "Unknown data volume operation " + mode} except Exception as ex: JBoxEBSVolHandler.log_error("exception in data volume operation") JBoxEBSVolHandler._get_logger().exception("exception in data volume operation") response = {"code": -1, "data": ex.message} self.write(response)
def maintain(max_timeout=0, inactive_timeout=0): SessContainer.log_info("Starting container maintenance...") tnow = datetime.datetime.now(pytz.utc) tmin = datetime.datetime(datetime.MINYEAR, 1, 1, tzinfo=pytz.utc) stop_before = (tnow - datetime.timedelta(seconds=max_timeout)) if (max_timeout > 0) else tmin stop_inacive_before = (tnow - datetime.timedelta(seconds=inactive_timeout)) if (inactive_timeout > 0) else tmin all_containers = BaseContainer.session_containers(allcontainers=True) all_cnames = {} container_id_list = [] for cdesc in all_containers: cid = cdesc['Id'] cont = SessContainer(cid) container_id_list.append(cid) cname = cont.get_name() if cname is None: SessContainer.log_debug("Ignoring %s", cont.debug_str()) continue all_cnames[cname] = cid c_is_active = cont.is_running() or cont.is_restarting() last_ping = SessContainer._get_last_ping(cname) # if we don't have a ping record, create one (we must have restarted) if (last_ping is None) and c_is_active: SessContainer.log_info("Discovered new container %s", cont.debug_str()) SessContainer.record_ping(cname) start_time = cont.time_started() # check that start time is not absurdly small (indicates a continer that's starting up) start_time_not_zero = (tnow-start_time).total_seconds() < (365*24*60*60) if (start_time < stop_before) and start_time_not_zero: # don't allow running beyond the limit for long running sessions # SessContainer.log_info("time_started " + str(cont.time_started()) + # " delete_before: " + str(delete_before) + # " cond: " + str(cont.time_started() < delete_before)) SessContainer.log_warn("Running beyond allowed time %s. Scheduling cleanup.", cont.debug_str()) SessContainer.invalidate_container(cont.get_name()) JBoxAsyncJob.async_backup_and_cleanup(cont.dockid) elif (last_ping is not None) and c_is_active and (last_ping < stop_inacive_before): # if inactive for too long, stop it # SessContainer.log_info("last_ping " + str(last_ping) + " stop_before: " + str(stop_before) + # " cond: " + str(last_ping < stop_before)) SessContainer.log_warn("Inactive beyond allowed time %s. Scheduling cleanup.", cont.debug_str()) SessContainer.invalidate_container(cont.get_name()) JBoxAsyncJob.async_backup_and_cleanup(cont.dockid) # delete ping entries for non exixtent containers for cname in SessContainer.PINGS.keys(): if cname not in all_cnames: del SessContainer.PINGS[cname] SessContainer.VALID_CONTAINERS = all_cnames VolMgr.refresh_disk_use_status(container_id_list=container_id_list) SessContainer.log_info("Finished container maintenance.")
def handle_if_logout(self, cont): logout = self.get_argument('logout', False) if logout == 'me': SessContainer.invalidate_container(cont.get_name()) JBoxAsyncJob.async_backup_and_cleanup(cont.dockid) response = {'code': 0, 'data': ''} self.write(response) return True return False
def try_launch_container(cls, user_id, max_hop=False): sessname = unique_sessname(user_id) cont = SessContainer.get_by_name(sessname) cls.log_debug("have existing container for %s: %r", sessname, None != cont) if cont is not None: cls.log_debug("container running: %r", cont.is_running()) if max_hop: self_load = Compute.get_instance_stats(Compute.get_instance_id(), 'Load') if self_load < 100: SessContainer.invalidate_container(sessname) JBoxAsyncJob.async_launch_by_name(sessname, user_id, True) return True is_leader = is_proposed_cluster_leader() if ((cont is None) or (not cont.is_running())) and ( not Compute.should_accept_session(is_leader)): if cont is not None: SessContainer.invalidate_container(cont.get_name()) JBoxAsyncJob.async_backup_and_cleanup(cont.dockid) return False SessContainer.invalidate_container(sessname) JBoxAsyncJob.async_launch_by_name(sessname, user_id, True) return True
def get_cluster_api_status(): result = dict() for inst in Compute.get_all_instances(): try: api_status = JBoxAsyncJob.sync_api_status(inst) if api_status['code'] == 0: result[inst] = api_status['data'] else: APIContainer.log_error("error fetching api status from %s", inst) except: APIContainer.log_error("exception fetching api status from %s", inst) APIContainer.log_debug("api status: %r", result) return result
def post(self): sessname = self.get_session_id() user_id = self.get_user_id() if (sessname is None) or (user_id is None): self.send_error() return mode = self.get_argument('action', False) if mode is False: JBoxEBSVolHandler.log_error("Unknown mode for ebs handler") self.send_error() return try: if mode == 'attach' or mode == 'detach': JBoxAsyncJob.async_plugin_task(JBoxEBSVolAsyncTask.__name__, { 'action': mode, 'user_id': user_id, 'sessname': sessname }) response = {'code': 0, 'data': ''} elif mode == 'status': response = { 'code': 0, 'data': self._get_state(sessname, user_id) } else: response = { 'code': -1, 'data': 'Unknown data volume operation ' + mode } except Exception as ex: JBoxEBSVolHandler.log_error("exception in data volume operation") JBoxEBSVolHandler._get_logger().exception( "exception in data volume operation") response = {'code': -1, 'data': ex.message} self.write(response)
def find_logged_in_instance(user_id): container_id = "/" + unique_sessname(user_id) instances = Compute.get_all_instances() for inst in instances: try: sessions = JBoxAsyncJob.sync_session_status(inst)['data'] if len(sessions) > 0: if container_id in sessions: return inst except: JBoxHandler.log_error("Error receiving sessions list from %r", inst) pass return None
def get_active_sessions(): instances = Compute.get_all_instances() active_sessions = set() for inst in instances: try: sessions = JBoxAsyncJob.sync_session_status(inst)['data'] if len(sessions) > 0: for sess_id in sessions.keys(): active_sessions.add(sess_id) except: SessContainer.log_error("Error receiving sessions list from %r", inst) return active_sessions
def get_active_sessions(): instances = Compute.get_all_instances() active_sessions = set() for inst in instances: try: sessions = JBoxAsyncJob.sync_session_status(inst)['data'] if len(sessions) > 0: for sess_id in sessions.keys(): active_sessions.add(sess_id) except: SessContainer.log_error( "Error receiving sessions list from %r", inst) return active_sessions
def handle_if_instance_info(self, is_allowed): stats = self.get_argument('instance_info', None) if stats is None: return False if not is_allowed: AdminHandler.log_error("Show instance info not allowed for user") response = {'code': -1, 'data': 'You do not have permissions to view these stats'} else: try: if stats == 'load': result = {} # get cluster loads average_load = Compute.get_cluster_average_stats('Load') if None != average_load: result['Average Load'] = average_load machine_loads = Compute.get_cluster_stats('Load') if None != machine_loads: for n, v in machine_loads.iteritems(): result['Instance ' + n] = v elif stats == 'sessions': result = dict() instances = Compute.get_all_instances() for idx in range(0, len(instances)): try: inst = instances[idx] result[inst] = JBoxAsyncJob.sync_session_status(inst)['data'] except: JBoxHandler.log_error("Error receiving sessions list from %r", inst) elif stats == 'apis': result = APIContainer.get_cluster_api_status() else: raise Exception("unknown command %s" % (stats,)) response = {'code': 0, 'data': result} except: AdminHandler.log_error("exception while getting stats") AdminHandler._get_logger().exception("exception while getting stats") response = {'code': -1, 'data': 'error getting stats'} self.write(response) return True
def try_launch_container(cls, user_id, max_hop=False): sessname = unique_sessname(user_id) cont = SessContainer.get_by_name(sessname) cls.log_debug("have existing container for %s: %r", sessname, None != cont) if cont is not None: cls.log_debug("container running: %r", cont.is_running()) if max_hop: self_load = Compute.get_instance_stats(Compute.get_instance_id(), 'Load') if self_load < 100: SessContainer.invalidate_container(sessname) JBoxAsyncJob.async_launch_by_name(sessname, user_id, True) return True is_leader = is_proposed_cluster_leader() if ((cont is None) or (not cont.is_running())) and (not Compute.should_accept_session(is_leader)): if cont is not None: SessContainer.invalidate_container(cont.get_name()) JBoxAsyncJob.async_backup_and_cleanup(cont.dockid) return False SessContainer.invalidate_container(sessname) JBoxAsyncJob.async_launch_by_name(sessname, user_id, True) return True
def maintain(max_timeout=0, inactive_timeout=0): SessContainer.log_info("Starting container maintenance...") tnow = datetime.datetime.now(pytz.utc) tmin = datetime.datetime(datetime.MINYEAR, 1, 1, tzinfo=pytz.utc) stop_before = (tnow - datetime.timedelta(seconds=max_timeout)) if ( max_timeout > 0) else tmin stop_inacive_before = (tnow - datetime.timedelta( seconds=inactive_timeout)) if (inactive_timeout > 0) else tmin all_containers = BaseContainer.session_containers(allcontainers=True) all_cnames = {} container_id_list = [] for cdesc in all_containers: cid = cdesc['Id'] cont = SessContainer(cid) container_id_list.append(cid) cname = cont.get_name() if cname is None: SessContainer.log_debug("Ignoring %s", cont.debug_str()) continue all_cnames[cname] = cid c_is_active = cont.is_running() or cont.is_restarting() last_ping = SessContainer._get_last_ping(cname) # if we don't have a ping record, create one (we must have restarted) if (last_ping is None) and c_is_active: SessContainer.log_info("Discovered new container %s", cont.debug_str()) SessContainer.record_ping(cname) start_time = cont.time_started() # check that start time is not absurdly small (indicates a continer that's starting up) start_time_not_zero = (tnow - start_time).total_seconds() < ( 365 * 24 * 60 * 60) if (start_time < stop_before) and start_time_not_zero: # don't allow running beyond the limit for long running sessions # SessContainer.log_info("time_started " + str(cont.time_started()) + # " delete_before: " + str(delete_before) + # " cond: " + str(cont.time_started() < delete_before)) SessContainer.log_warn( "Running beyond allowed time %s. Scheduling cleanup.", cont.debug_str()) SessContainer.invalidate_container(cont.get_name()) JBoxAsyncJob.async_backup_and_cleanup(cont.dockid) elif (last_ping is not None) and c_is_active and ( last_ping < stop_inacive_before): # if inactive for too long, stop it # SessContainer.log_info("last_ping " + str(last_ping) + " stop_before: " + str(stop_before) + # " cond: " + str(last_ping < stop_before)) SessContainer.log_warn( "Inactive beyond allowed time %s. Scheduling cleanup.", cont.debug_str()) SessContainer.invalidate_container(cont.get_name()) JBoxAsyncJob.async_backup_and_cleanup(cont.dockid) elif not c_is_active and ( (tnow - cont.time_finished()).total_seconds() > (10 * 60)): SessContainer.log_warn("Dead container %s. Deleting.", cont.debug_str()) cont.delete(backup=False) del all_cnames[cname] container_id_list.remove(cid) # delete ping entries for non exixtent containers for cname in SessContainer.PINGS.keys(): if cname not in all_cnames: del SessContainer.PINGS[cname] SessContainer.VALID_CONTAINERS = all_cnames VolMgr.refresh_disk_use_status(container_id_list=container_id_list) SessContainer.log_info("Finished container maintenance.")