Example #1
0
    def post(self):
        sessname = self.get_session_id()
        user_id = self.get_user_id()
        if (sessname is None) or (user_id is None):
            self.send_error()
            return

        mode = self.get_argument("action", False)
        if mode is False:
            JBoxEBSVolHandler.log_error("Unknown mode for ebs handler")
            self.send_error()
            return

        try:
            if mode == "attach" or mode == "detach":
                JBoxAsyncJob.async_plugin_task(
                    JBoxEBSVolAsyncTask.__name__, {"action": mode, "user_id": user_id, "sessname": sessname}
                )
                response = {"code": 0, "data": ""}
            elif mode == "status":
                response = {"code": 0, "data": self._get_state(sessname, user_id)}
            else:
                response = {"code": -1, "data": "Unknown data volume operation " + mode}
        except Exception as ex:
            JBoxEBSVolHandler.log_error("exception in data volume operation")
            JBoxEBSVolHandler._get_logger().exception("exception in data volume operation")
            response = {"code": -1, "data": ex.message}

        self.write(response)
Example #2
0
    def maintain(max_timeout=0, inactive_timeout=0):
        SessContainer.log_info("Starting container maintenance...")
        tnow = datetime.datetime.now(pytz.utc)
        tmin = datetime.datetime(datetime.MINYEAR, 1, 1, tzinfo=pytz.utc)

        stop_before = (tnow - datetime.timedelta(seconds=max_timeout)) if (max_timeout > 0) else tmin
        stop_inacive_before = (tnow - datetime.timedelta(seconds=inactive_timeout)) if (inactive_timeout > 0) else tmin

        all_containers = BaseContainer.session_containers(allcontainers=True)
        all_cnames = {}
        container_id_list = []
        for cdesc in all_containers:
            cid = cdesc['Id']
            cont = SessContainer(cid)
            container_id_list.append(cid)
            cname = cont.get_name()

            if cname is None:
                SessContainer.log_debug("Ignoring %s", cont.debug_str())
                continue

            all_cnames[cname] = cid

            c_is_active = cont.is_running() or cont.is_restarting()
            last_ping = SessContainer._get_last_ping(cname)

            # if we don't have a ping record, create one (we must have restarted) 
            if (last_ping is None) and c_is_active:
                SessContainer.log_info("Discovered new container %s", cont.debug_str())
                SessContainer.record_ping(cname)

            start_time = cont.time_started()
            # check that start time is not absurdly small (indicates a continer that's starting up)
            start_time_not_zero = (tnow-start_time).total_seconds() < (365*24*60*60)
            if (start_time < stop_before) and start_time_not_zero:
                # don't allow running beyond the limit for long running sessions
                # SessContainer.log_info("time_started " + str(cont.time_started()) +
                #               " delete_before: " + str(delete_before) +
                #               " cond: " + str(cont.time_started() < delete_before))
                SessContainer.log_warn("Running beyond allowed time %s. Scheduling cleanup.", cont.debug_str())
                SessContainer.invalidate_container(cont.get_name())
                JBoxAsyncJob.async_backup_and_cleanup(cont.dockid)
            elif (last_ping is not None) and c_is_active and (last_ping < stop_inacive_before):
                # if inactive for too long, stop it
                # SessContainer.log_info("last_ping " + str(last_ping) + " stop_before: " + str(stop_before) +
                #           " cond: " + str(last_ping < stop_before))
                SessContainer.log_warn("Inactive beyond allowed time %s. Scheduling cleanup.", cont.debug_str())
                SessContainer.invalidate_container(cont.get_name())
                JBoxAsyncJob.async_backup_and_cleanup(cont.dockid)

        # delete ping entries for non exixtent containers
        for cname in SessContainer.PINGS.keys():
            if cname not in all_cnames:
                del SessContainer.PINGS[cname]

        SessContainer.VALID_CONTAINERS = all_cnames
        VolMgr.refresh_disk_use_status(container_id_list=container_id_list)
        SessContainer.log_info("Finished container maintenance.")
Example #3
0
 def handle_if_logout(self, cont):
     logout = self.get_argument('logout', False)
     if logout == 'me':
         SessContainer.invalidate_container(cont.get_name())
         JBoxAsyncJob.async_backup_and_cleanup(cont.dockid)
         response = {'code': 0, 'data': ''}
         self.write(response)
         return True
     return False
Example #4
0
    def try_launch_container(cls, user_id, max_hop=False):
        sessname = unique_sessname(user_id)
        cont = SessContainer.get_by_name(sessname)
        cls.log_debug("have existing container for %s: %r", sessname,
                      None != cont)
        if cont is not None:
            cls.log_debug("container running: %r", cont.is_running())

        if max_hop:
            self_load = Compute.get_instance_stats(Compute.get_instance_id(),
                                                   'Load')
            if self_load < 100:
                SessContainer.invalidate_container(sessname)
                JBoxAsyncJob.async_launch_by_name(sessname, user_id, True)
                return True

        is_leader = is_proposed_cluster_leader()
        if ((cont is None) or (not cont.is_running())) and (
                not Compute.should_accept_session(is_leader)):
            if cont is not None:
                SessContainer.invalidate_container(cont.get_name())
                JBoxAsyncJob.async_backup_and_cleanup(cont.dockid)
            return False

        SessContainer.invalidate_container(sessname)
        JBoxAsyncJob.async_launch_by_name(sessname, user_id, True)
        return True
Example #5
0
 def get_cluster_api_status():
     result = dict()
     for inst in Compute.get_all_instances():
         try:
             api_status = JBoxAsyncJob.sync_api_status(inst)
             if api_status['code'] == 0:
                 result[inst] = api_status['data']
             else:
                 APIContainer.log_error("error fetching api status from %s", inst)
         except:
             APIContainer.log_error("exception fetching api status from %s", inst)
     APIContainer.log_debug("api status: %r", result)
     return result
Example #6
0
    def post(self):
        sessname = self.get_session_id()
        user_id = self.get_user_id()
        if (sessname is None) or (user_id is None):
            self.send_error()
            return

        mode = self.get_argument('action', False)
        if mode is False:
            JBoxEBSVolHandler.log_error("Unknown mode for ebs handler")
            self.send_error()
            return

        try:
            if mode == 'attach' or mode == 'detach':
                JBoxAsyncJob.async_plugin_task(JBoxEBSVolAsyncTask.__name__, {
                    'action': mode,
                    'user_id': user_id,
                    'sessname': sessname
                })
                response = {'code': 0, 'data': ''}
            elif mode == 'status':
                response = {
                    'code': 0,
                    'data': self._get_state(sessname, user_id)
                }
            else:
                response = {
                    'code': -1,
                    'data': 'Unknown data volume operation ' + mode
                }
        except Exception as ex:
            JBoxEBSVolHandler.log_error("exception in data volume operation")
            JBoxEBSVolHandler._get_logger().exception(
                "exception in data volume operation")
            response = {'code': -1, 'data': ex.message}

        self.write(response)
Example #7
0
    def find_logged_in_instance(user_id):
        container_id = "/" + unique_sessname(user_id)
        instances = Compute.get_all_instances()

        for inst in instances:
            try:
                sessions = JBoxAsyncJob.sync_session_status(inst)['data']
                if len(sessions) > 0:
                    if container_id in sessions:
                        return inst
            except:
                JBoxHandler.log_error("Error receiving sessions list from %r", inst)
                pass
        return None
Example #8
0
    def get_active_sessions():
        instances = Compute.get_all_instances()

        active_sessions = set()
        for inst in instances:
            try:
                sessions = JBoxAsyncJob.sync_session_status(inst)['data']
                if len(sessions) > 0:
                    for sess_id in sessions.keys():
                        active_sessions.add(sess_id)
            except:
                SessContainer.log_error("Error receiving sessions list from %r", inst)

        return active_sessions
Example #9
0
    def find_logged_in_instance(user_id):
        container_id = "/" + unique_sessname(user_id)
        instances = Compute.get_all_instances()

        for inst in instances:
            try:
                sessions = JBoxAsyncJob.sync_session_status(inst)['data']
                if len(sessions) > 0:
                    if container_id in sessions:
                        return inst
            except:
                JBoxHandler.log_error("Error receiving sessions list from %r",
                                      inst)
                pass
        return None
Example #10
0
 def get_cluster_api_status():
     result = dict()
     for inst in Compute.get_all_instances():
         try:
             api_status = JBoxAsyncJob.sync_api_status(inst)
             if api_status['code'] == 0:
                 result[inst] = api_status['data']
             else:
                 APIContainer.log_error("error fetching api status from %s",
                                        inst)
         except:
             APIContainer.log_error("exception fetching api status from %s",
                                    inst)
     APIContainer.log_debug("api status: %r", result)
     return result
Example #11
0
    def get_active_sessions():
        instances = Compute.get_all_instances()

        active_sessions = set()
        for inst in instances:
            try:
                sessions = JBoxAsyncJob.sync_session_status(inst)['data']
                if len(sessions) > 0:
                    for sess_id in sessions.keys():
                        active_sessions.add(sess_id)
            except:
                SessContainer.log_error(
                    "Error receiving sessions list from %r", inst)

        return active_sessions
Example #12
0
    def handle_if_instance_info(self, is_allowed):
        stats = self.get_argument('instance_info', None)
        if stats is None:
            return False

        if not is_allowed:
            AdminHandler.log_error("Show instance info not allowed for user")
            response = {'code': -1, 'data': 'You do not have permissions to view these stats'}
        else:
            try:
                if stats == 'load':
                    result = {}
                    # get cluster loads
                    average_load = Compute.get_cluster_average_stats('Load')
                    if None != average_load:
                        result['Average Load'] = average_load

                    machine_loads = Compute.get_cluster_stats('Load')
                    if None != machine_loads:
                        for n, v in machine_loads.iteritems():
                            result['Instance ' + n] = v
                elif stats == 'sessions':
                    result = dict()
                    instances = Compute.get_all_instances()

                    for idx in range(0, len(instances)):
                        try:
                            inst = instances[idx]
                            result[inst] = JBoxAsyncJob.sync_session_status(inst)['data']
                        except:
                            JBoxHandler.log_error("Error receiving sessions list from %r", inst)
                elif stats == 'apis':
                    result = APIContainer.get_cluster_api_status()
                else:
                    raise Exception("unknown command %s" % (stats,))

                response = {'code': 0, 'data': result}
            except:
                AdminHandler.log_error("exception while getting stats")
                AdminHandler._get_logger().exception("exception while getting stats")
                response = {'code': -1, 'data': 'error getting stats'}

        self.write(response)
        return True
Example #13
0
    def try_launch_container(cls, user_id, max_hop=False):
        sessname = unique_sessname(user_id)
        cont = SessContainer.get_by_name(sessname)
        cls.log_debug("have existing container for %s: %r", sessname, None != cont)
        if cont is not None:
            cls.log_debug("container running: %r", cont.is_running())

        if max_hop:
            self_load = Compute.get_instance_stats(Compute.get_instance_id(), 'Load')
            if self_load < 100:
                SessContainer.invalidate_container(sessname)
                JBoxAsyncJob.async_launch_by_name(sessname, user_id, True)
                return True

        is_leader = is_proposed_cluster_leader()
        if ((cont is None) or (not cont.is_running())) and (not Compute.should_accept_session(is_leader)):
            if cont is not None:
                SessContainer.invalidate_container(cont.get_name())
                JBoxAsyncJob.async_backup_and_cleanup(cont.dockid)
            return False

        SessContainer.invalidate_container(sessname)
        JBoxAsyncJob.async_launch_by_name(sessname, user_id, True)
        return True
Example #14
0
    def maintain(max_timeout=0, inactive_timeout=0):
        SessContainer.log_info("Starting container maintenance...")
        tnow = datetime.datetime.now(pytz.utc)
        tmin = datetime.datetime(datetime.MINYEAR, 1, 1, tzinfo=pytz.utc)

        stop_before = (tnow - datetime.timedelta(seconds=max_timeout)) if (
            max_timeout > 0) else tmin
        stop_inacive_before = (tnow - datetime.timedelta(
            seconds=inactive_timeout)) if (inactive_timeout > 0) else tmin

        all_containers = BaseContainer.session_containers(allcontainers=True)
        all_cnames = {}
        container_id_list = []
        for cdesc in all_containers:
            cid = cdesc['Id']
            cont = SessContainer(cid)
            container_id_list.append(cid)
            cname = cont.get_name()

            if cname is None:
                SessContainer.log_debug("Ignoring %s", cont.debug_str())
                continue

            all_cnames[cname] = cid

            c_is_active = cont.is_running() or cont.is_restarting()
            last_ping = SessContainer._get_last_ping(cname)

            # if we don't have a ping record, create one (we must have restarted)
            if (last_ping is None) and c_is_active:
                SessContainer.log_info("Discovered new container %s",
                                       cont.debug_str())
                SessContainer.record_ping(cname)

            start_time = cont.time_started()
            # check that start time is not absurdly small (indicates a continer that's starting up)
            start_time_not_zero = (tnow - start_time).total_seconds() < (
                365 * 24 * 60 * 60)
            if (start_time < stop_before) and start_time_not_zero:
                # don't allow running beyond the limit for long running sessions
                # SessContainer.log_info("time_started " + str(cont.time_started()) +
                #               " delete_before: " + str(delete_before) +
                #               " cond: " + str(cont.time_started() < delete_before))
                SessContainer.log_warn(
                    "Running beyond allowed time %s. Scheduling cleanup.",
                    cont.debug_str())
                SessContainer.invalidate_container(cont.get_name())
                JBoxAsyncJob.async_backup_and_cleanup(cont.dockid)
            elif (last_ping is not None) and c_is_active and (
                    last_ping < stop_inacive_before):
                # if inactive for too long, stop it
                # SessContainer.log_info("last_ping " + str(last_ping) + " stop_before: " + str(stop_before) +
                #           " cond: " + str(last_ping < stop_before))
                SessContainer.log_warn(
                    "Inactive beyond allowed time %s. Scheduling cleanup.",
                    cont.debug_str())
                SessContainer.invalidate_container(cont.get_name())
                JBoxAsyncJob.async_backup_and_cleanup(cont.dockid)
            elif not c_is_active and (
                (tnow - cont.time_finished()).total_seconds() > (10 * 60)):
                SessContainer.log_warn("Dead container %s. Deleting.",
                                       cont.debug_str())
                cont.delete(backup=False)
                del all_cnames[cname]
                container_id_list.remove(cid)

        # delete ping entries for non exixtent containers
        for cname in SessContainer.PINGS.keys():
            if cname not in all_cnames:
                del SessContainer.PINGS[cname]

        SessContainer.VALID_CONTAINERS = all_cnames
        VolMgr.refresh_disk_use_status(container_id_list=container_id_list)
        SessContainer.log_info("Finished container maintenance.")