def pre_allocate_expr(self, context): # TODO: too complex, not check hackathon_id = context.hackathon_id self.log.debug("executing pre_allocate_expr for hackathon %s " % hackathon_id) hackathon = Hackathon.objects(id=hackathon_id).first() hackathon_templates = hackathon.templates for template in hackathon_templates: try: template = template pre_num = int(hackathon.config.get(HackathonConfig.PRE_ALLOCATE_NUMBER, 1)) query = Q(status=ExprStatus.STARTING) | Q(status=ExprStatus.RUNNING) curr_num = Experiment.objects(user=None, hackathon=hackathon, template=template).filter(query).count() self.log.debug("pre_alloc_exprs: pre_num is %d, curr_num is %d, remain_num is %d " % (pre_num, curr_num, pre_num - curr_num)) # TODO Should support VirtualEnvProvider.K8S only in future after k8s Template is supported # if template.provider == VirtualEnvProvider.K8S: if curr_num < pre_num: start_num = Experiment.objects(user=None, template=template, status=ExprStatus.STARTING).count() allowed_currency = int(hackathon.config.get(HackathonConfig.PRE_ALLOCATE_CONCURRENT, 1)) if start_num >= allowed_currency: self.log.debug( "there are already %d Experiments starting, will check later ... " % allowed_currency) return else: remain_num = min(allowed_currency, pre_num) - start_num self.log.debug( "no starting template: %s , remain num is %d ... " % (template.name, remain_num)) self.start_pre_alloc_exprs(None, template.name, hackathon.name, remain_num) break except Exception as e: self.log.error(e) self.log.error("check default experiment failed")
def getConnectInfo(self): connection_name = request.args.get("name") self.log.debug("Guacamole connecion request, connection name: %s" % connection_name) expr = Experiment.objects(virtual_environments__name=connection_name ).no_dereference().first() if not expr: return not_found("not_found") if expr.user.id != g.user.id: return forbidden("forbidden") remote_paras = expr.virtual_environments.get( name=connection_name).remote_paras # TODO Support DYNAMIC host/port in case of they cannot be determined on provision phase if K8S_UNIT.REMOTE_PARAMETER_HOST_NAME not in remote_paras: # TTT available_public_ips = self.util.safe_get_config( "ukylin.k8s.ips", ["119.3.202.71", "49.4.90.39"]) random_ip = available_public_ips[random.randint( 0, len(available_public_ips) - 1)] remote_paras[K8S_UNIT.REMOTE_PARAMETER_HOST_NAME] = random_ip self.log.debug("get guacamole config by id: %s, paras: %r" % (connection_name, remote_paras)) return remote_paras
def _on_virtual_environment_unexpected_error(self, context): self.log.warn("experiment unexpected error: " + context.experiment_id) expr = Experiment.objects(id=context.experiment_id).no_dereference() \ .only("status", "virtual_environments").first() if "virtual_environment_name" in context: expr.virtual_environments.get(name=context.virtual_environment_name).status = ExprEnvStatus.UNEXPECTED_ERROR expr.save()
def get_registration_detail(self, user, hackathon, registration=None): detail = { "hackathon": hackathon.dic(), "user": self.user_manager.user_display_info(user)} if not registration: registration = registration or self.get_registration_by_user_and_hackathon(user.id, hackathon.id) if not registration: return detail # "asset" is alreay in registration detail["registration"] = registration.dic() # experiment if any try: exp = Experiment.objects( user=user.id, hackathon=hackathon.id, status__in=[ExprStatus.STARTING, ExprStatus.RUNNING]).first() if exp: detail["experiment"] = exp.dic() except Exception as e: self.log.error(e) return detail
def heart_beat(self, expr_id): expr = Experiment.objects(id=expr_id, status=ExprStatus.RUNNING).first() if expr is None: return not_found('Experiment is not running') expr.last_heart_beat_time = self.util.get_now() expr.save() return ok()
def start_expr(self, context): """To start a new Experiment asynchronously :type context: Context :param context: the execution context. """ expr = Experiment(status=ExprStatus.INIT, template=context.template, user=context.user, virtual_environments=[], hackathon=context.hackathon) expr.save() template_content = self.template_library.load_template(context.template) expr.status = ExprStatus.STARTING expr.save() # context contains complex object, we need create another serializable one with only simple fields new_context = Context(template_content=template_content, template_name=context.template.name, hackathon_id=context.hackathon.id, experiment_id=expr.id, pre_alloc_enabled = context.pre_alloc_enabled) if context.get("user", None): new_context.user_id = context.user.id if self._internal_start_expr(new_context): new_context.experiment = expr return new_context self.rollback(new_context) return None
def _on_virtual_environment_stopped(self, context): expr = Experiment.objects(id=context.experiment_id).no_dereference() \ .only("status", "virtual_environments").first() ve = expr.virtual_environments.get(name=context.virtual_environment_name) ve.status = ExprEnvStatus.STOPPED if all(ve.status == ExprEnvStatus.STOPPED for ve in expr.virtual_environments): expr.status = ExprStatus.STOPPED expr.save()
def _on_virtual_environment_success(self, context): expr = Experiment.objects(id=context.experiment_id).no_dereference() \ .only("status", "virtual_environments").first() if all(ve.status == ExprEnvStatus.RUNNING for ve in expr.virtual_environments): expr.status = ExprStatus.RUNNING expr.save() self._on_expr_started(context) self._hooks_on_virtual_environment_success(context)
def get_expr_list_by_hackathon_id(self, hackathon, context): # get a list of all experiments' detail user_name = context.user_name if "user_name" in context else None status = context.status if "status" in context else None page = int(context.page) if "page" in context else 1 per_page = int(context.per_page) if "per_page" in context else 10 users = User.objects(name=user_name).all() if user_name else [] if user_name and status: experiments_pagi = Experiment.objects(hackathon=hackathon, status=status, user__in=users).paginate(page, per_page) elif user_name and not status: experiments_pagi = Experiment.objects(hackathon=hackathon, user__in=users).paginate(page, per_page) elif not user_name and status: experiments_pagi = Experiment.objects(hackathon=hackathon, status=status).paginate(page, per_page) else: experiments_pagi = Experiment.objects(hackathon=hackathon).paginate(page, per_page) return self.util.paginate(experiments_pagi, self.__get_expr_with_detail)
def restart_stopped_expr(self, experiment_id): experiment = Experiment.objects(id=experiment_id).first() for ve in experiment.virtual_environments: if ve.provider == VirtualEnvProvider.DOCKER: if not self.hosted_docker_proxy.is_container_running(ve.docker_container): self.hosted_docker_proxy.start_container(ve.docker_container.host_server, ve.docker_container.container_id) elif ve.provider == VirtualEnvProvider.AZURE: raise NotImplementedError() self.__check_expr_real_status(experiment) return experiment.dic()
def stop_expr(self, expr_id): """ :param expr_id: experiment id :return: """ self.log.debug("begin to stop %s" % str(expr_id)) expr = Experiment.objects(id=expr_id).first() if expr is not None: starter = self.get_starter(expr.hackathon, expr.template) if starter: starter.stop_expr(Context(experiment_id=expr.id, experiment=expr)) self.log.debug("experiment %s ended success" % expr_id) return ok('OK') else: return ok()
def _internal_stop_expr(self, context): expr = Experiment.objects(id=context.experiment_id).first() if not expr: return if len(expr.virtual_environments) == 0: expr.status = ExprStatus.ROLL_BACKED expr.save() return # delete containers and change expr status for ve in expr.virtual_environments: context = context.copy() # create new context for every virtual_environment context.virtual_environment_name = ve.name self._stop_virtual_environment(ve, expr, context)
def __check_expr_status(self, user, hackathon, template): """ check experiment status, if there are pre-allocate experiments, the experiment will be assigned directly :param user: :param hackathon: :param template: :return: """ criterion = Q(status__in=[ExprStatus.RUNNING, ExprStatus.STARTING], hackathon=hackathon, user=user) is_admin = self.admin_manager.is_hackathon_admin(hackathon.id, user.id) if is_admin: criterion &= Q(template=template) expr = Experiment.objects(criterion).first() if expr: # user has a running/starting experiment return expr # try to assign pre-configured expr to user expr = Experiment.objects(status=ExprStatus.RUNNING, hackathon=hackathon, template=template, user=None).first() if expr: expr.user = user expr.save() return expr
def roll_back(self, expr_id): """ roll back when exception occurred :param expr_id: experiment id """ self.log.debug("Starting rollback experiment %s..." % expr_id) expr = Experiment.objects(id=expr_id) if not expr: self.log.warn("rollback failed due to experiment not found") return starter = self.get_starter(expr.hackathon, expr.template) if not starter: self.log.warn("rollback failed due to no starter found") return return starter.rollback(Context(experiment=expr))
def delete_template(self, template_id): self.log.debug("delete template [%s]" % template_id) try: template = self.get_template_info_by_id(template_id) if template is None: return ok("already removed") # user can only delete the template which created by himself except super admin if g.user.id != template.creator.id and not g.user.is_super: return forbidden() if Experiment.objects(template=template).count() > 0: return forbidden("template already in use") # remove record in DB # the Hackathon used this template will imply the mongoengine's PULL reverse_delete_rule self.log.debug("delete template {}".format(template.name)) template.delete() return ok("delete template success") except Exception as ex: self.log.error(ex) return internal_server_error("delete template failed")
def scheduler_recycle_expr(self): """recycle experiment according to hackathon basic info on recycle configuration According to the hackathon's basic info on 'recycle_enabled', find out time out experiments Then call function to recycle them :return: """ self.log.debug("start checking recyclable experiment ... ") for hackathon in self.hackathon_manager.get_recyclable_hackathon_list(): try: # check recycle enabled mins = self.hackathon_manager.get_recycle_minutes(hackathon) # filter out the experiments that need to be recycled exprs = Experiment.objects(create_time__lt=self.util.get_now() - timedelta(minutes=mins), status=ExprStatus.RUNNING, hackathon=hackathon) for expr in exprs: self.__recycle_expr(expr) except Exception as e: self.log.error(e)
def _enable_guacd_file_transfer(self, context): """ This function should be invoked after container is started in hosted_docker.py :param ve: virtual environment """ expr = Experiment.objects(id=context.experiment_id).no_dereference().first() virtual_env = expr.virtual_environments.get(name=context.virtual_environment_name) remote = virtual_env.remote_paras p = pexpect.spawn("scp -P %s %s %s@%s:/usr/local/sbin/guacctl" % (remote["port"], abspath("%s/../expr/guacctl" % dirname(realpath(__file__))), remote["username"], remote["hostname"])) i = p.expect([pexpect.TIMEOUT, 'yes/no', 'password: '******'password:']) if i != 0: p.sendline(remote["password"]) p.expect(pexpect.EOF) p.close()
def __start_virtual_environment(self, context, docker_template_unit): origin_name = docker_template_unit.get_name() prefix = str(context.experiment_id)[0:9] suffix = "".join(random.sample(string.ascii_letters + string.digits, 8)) new_name = '%s-%s-%s' % (prefix, origin_name, suffix.lower()) docker_template_unit.set_name(new_name) self.log.debug("starting to start container: %s" % new_name) # db document for VirtualEnvironment ve = VirtualEnvironment(provider=VirtualEnvProvider.DOCKER, name=new_name, image=docker_template_unit.get_image_with_tag(), status=ExprEnvStatus.INIT, remote_provider=VERemoteProvider.Guacamole) # create a new context for current ve only context = context.copy() experiment = Experiment.objects(id=context.experiment_id).no_dereference().only("virtual_environments").first() experiment.virtual_environments.append(ve) experiment.save() # start container remotely , use hosted docker context.virtual_environment_name = ve.name context.unit = docker_template_unit self._internal_start_virtual_environment(context)
def get_expr_status_and_confirm_starting(self, expr_id): expr = Experiment.objects(id=expr_id).first() if expr: return self.__report_expr_status(expr, isToConfirmExprStarting=True) else: return not_found('Experiment Not found')