def do_periodic_task(_mode): active_clusters = UserCluster.list_all_groupids() ParallelHousekeep.log_info("%d active clusters", len(active_clusters)) if len(active_clusters) == 0: return active_sessions = SessContainer.get_active_sessions() for cluster_id in active_clusters: sess_id = "/" + UserCluster.sessname_for_cluster(cluster_id) if sess_id not in active_sessions: ParallelHousekeep.log_info("Session (%s) corresponding to cluster (%s) not found. Terminating cluster.", sess_id, cluster_id) ParallelHousekeep.terminate_or_delete_cluster(cluster_id)
def do_periodic_task(_mode): active_clusters = UserCluster.list_all_groupids() ParallelHousekeep.log_info("%d active clusters", len(active_clusters)) if len(active_clusters) == 0: return active_sessions = SessContainer.get_active_sessions() for cluster_id in active_clusters: sess_id = "/" + UserCluster.sessname_for_cluster(cluster_id) if sess_id not in active_sessions: ParallelHousekeep.log_info( "Session (%s) corresponding to cluster (%s) not found. Terminating cluster.", sess_id, cluster_id) ParallelHousekeep.terminate_or_delete_cluster(cluster_id)
def do_periodic_task(_mode): active_clusters = UserCluster.list_all_groupids() ParallelHousekeep.log_info("%d active clusters", len(active_clusters)) if len(active_clusters) == 0: return for cluster_id in active_clusters: sessname = UserCluster.sessname_for_cluster(cluster_id) try: sess_props = JBoxSessionProps(Compute.get_install_id(), sessname) if not sess_props.get_instance_id(): ParallelHousekeep.log_info( "Session (%s) corresponding to cluster (%s) not found. Terminating cluster.", sessname, cluster_id) ParallelHousekeep.terminate_or_delete_cluster(cluster_id) except JBoxDBItemNotFound: pass
def terminate_or_delete_cluster(cluster_id): uc = UserCluster(None, gname=cluster_id) uc.terminate_or_delete()
def post(self): sessname = self.get_session_id() user_id = self.get_user_id() if (sessname is None) or (user_id is None): self.send_error() return mode = self.get_argument('cluster', False) if mode is False: ParallelHandler.log_error("Unknown mode for parallel handler") self.send_error() return user = JBoxUserV2(user_id) is_allowed = user.has_resource_profile(JBoxUserV2.RES_PROF_CLUSTER) if not is_allowed: ParallelHandler.log_error("Cluster access not allowed for user") response = {'code': -1, 'data': 'You do not have permissions to use any clusters'} self.write(response) return True cont = SessContainer.get_by_name(sessname) if cont is None: self.send_error() return ParallelHandler.log_debug("Parallel request %s for %s", mode, cont.debug_str()) try: max_cores = user.get_max_cluster_cores() balance = user.get_balance() uc = UserCluster(user.get_user_id()) if mode == 'status': status = uc.status() status['limits'] = { 'max_cores': max_cores, 'credits': balance } self.write_machinefile(cont, uc) response = {'code': 0, 'data': status} elif mode == 'terminate': action = 'terminate' if uc.isactive() else 'delete' uc.terminate_or_delete() response = {'code': 0, 'data': action} elif mode == 'create': ninsts = int(self.get_argument('ninsts', 0)) avzone = self.get_argument('avzone', '') spot_price = float(self.get_argument('spot_price', 0.0)) if ninsts > (max_cores / UserCluster.INSTANCE_CORES): response = {'code': -1, 'data': 'You are allowed a maximum of ' + str(max_cores) + ' cores.'} elif (spot_price > UserCluster.INSTANCE_COST) or (spot_price < 0): response = { 'code': -1, 'data': 'Bid price must be between $0 - $' + str(UserCluster.INSTANCE_COST) + '.' } else: uc.delete() user_data = ParallelHandler.create_user_script(cont) uc.create(ninsts, avzone, user_data, spot_price=spot_price) uc.start() response = {'code': 0, 'data': ''} else: response = {'code': -1, 'data': 'Unknown cluster operation ' + mode} except Exception as ex: ParallelHandler.log_error("exception in cluster operation") ParallelHandler._get_logger().exception("exception in cluster operation") response = {'code': -1, 'data': ex.message} self.write(response)