def start_expr(self, context): """To start a new Experiment asynchronously :type context: Context :param context: the execution context. """ expr = Experiment(status=ExprStatus.INIT, template=context.template, user=context.user, virtual_environments=[], hackathon=context.hackathon) expr.save() template_content = self.template_library.load_template(context.template) expr.status = ExprStatus.STARTING expr.save() # context contains complex object, we need create another serializable one with only simple fields new_context = Context(template_content=template_content, template_name=context.template.name, hackathon_id=context.hackathon.id, experiment_id=expr.id, pre_alloc_enabled = context.pre_alloc_enabled) if context.get("user", None): new_context.user_id = context.user.id if self._internal_start_expr(new_context): new_context.experiment = expr return new_context self.rollback(new_context) return None
def stop_vm(self, resource_id, azure_key, template_units, virtual_environments, expr_id): """stop the virtual machine, and deallocate the resouces of the virtual machine NOTE: virtual_environments and expr_id are just a workaround to update db status, it will be elimated in future """ assert len(template_units) == len(virtual_environments) job_ctxs = [] ctx = Context(job_ctxs=job_ctxs, current_job_index=0, resource_id=resource_id) for i in xrange(0, len(template_units)): unit = template_units[i] ve = virtual_environments[i] job_ctxs.append( Context( cloud_service_name=unit.get_cloud_service_name(), deployment_slot=unit.get_deployment_slot(), virtual_machine_name=self.get_virtual_machine_name( unit.get_virtual_machine_name(), resource_id), # NOTE: ONLY callback purpose functions can depend on virutal_environment_id and expr id virtual_environment_id=ve.id, expr_id=expr_id, subscription_id=azure_key.subscription_id, pem_url=azure_key.pem_url, management_host=azure_key.management_host)) self.__schedule_stop(ctx)
def start_expr(self, context): """To start a new Experiment asynchronously :type context: Context :param context: the execution context. """ expr = Experiment(status=EStatus.INIT, template=context.template, user=context.user, virtual_environments=[], hackathon=context.hackathon) expr.save() template_content = self.template_library.load_template(context.template) expr.status = EStatus.STARTING expr.save() # context contains complex object, we need create another serializable one with only simple fields new_context = Context(template_content=template_content, template_name=context.template.name, hackathon_id=context.hackathon.id, experiment_id=expr.id) if context.get("user", None): new_context.user_id = context.user.id self._internal_start_expr(new_context) new_context.experiment = expr return new_context
def context(self): """Convert input to Context By default, convert json body to Convext for put/post request, convert args for get/delete request :rtype: Context :return Context object from request body or query """ caller = sys._getframe().f_back.f_code.co_name.lower() if caller in ["post", "put"] and not request.path == "/api/user/file": return Context.from_object(request.get_json(force=True)) else: return Context.from_object(request.args)
def __construct_setup_job_context(self, unit, azure_key, vm_name): # construct current virtual environment's context return Context( cloud_service_name=self.__get_cloud_service_name( unit.get_cloud_service_name(), azure_key.subscription_id), cloud_service_label=unit.get_cloud_service_label(), cloud_service_host=unit.get_cloud_service_location(), storage_account_name=unit.get_storage_account_name(), storage_account_description=unit.get_storage_account_description(), storage_account_label=unit.get_storage_account_label(), storage_account_location=unit.get_storage_account_location(), virtual_machine_name=vm_name, virtual_machine_label=unit.get_virtual_machine_label(), deployment_name=unit.get_deployment_name(), deployment_slot=unit.get_deployment_slot(), system_config=unit.get_system_config(), raw_system_config=unit.get_raw_system_config(), os_virtual_hard_disk=unit.get_os_virtual_hard_disk(), virtual_machine_size=unit.get_virtual_machine_size(), image_name=unit.get_image_name(), raw_network_config=unit.get_raw_network_config(), resource_extension_references=unit. get_resource_extension_references(), is_vm_image=unit.is_vm_image(), remote=unit.get_remote(), remote_endpoint_name=unit.get_remote_port_name())
def get_docker_host_server(self, context): hackathon = Hackathon.objects( id=context.hackathon_id).no_dereference().first() try: host_resp = self.docker_host_manager.get_available_docker_host( hackathon) except Exception as e: self.log.error(e) host_resp = Context(state=DHS_QUERY_STATE.ONGOING) context.trial = context.get("trial", 0) + 1 if host_resp.state == DHS_QUERY_STATE.SUCCESS: # assign ports self.__assign_ports(context, host_resp.docker_host_server) elif host_resp.state == DHS_QUERY_STATE.ONGOING and context.trial < 20: # tried up to 20 times self.log.debug( "host servers are all busy, %d times tried, will retry in 3 seconds" % context.trial) self.scheduler.add_once(FEATURE, "get_docker_host_server", context, seconds=3) else: self.log.error("no available host server") self._on_virtual_environment_failed(context)
def __save_template_to_storage(self, args): """save template to a file in storage which is chosen by configuration Parse out template from args and merge with default template value Then generate a file name, and save it to a physical file in storage :type args: dict :param args: description of template :return: context if no exception raised """ try: docker_template_units = [ DockerTemplateUnit(ve) for ve in args[BaseTemplate.VIRTUAL_ENVIRONMENTS] ] docker_template = DockerTemplate(args[BaseTemplate.TEMPLATE_NAME], args[BaseTemplate.DESCRIPTION], docker_template_units) file_name = '%s-%s-%s.js' % (g.user.name, args[BaseTemplate.TEMPLATE_NAME], str(uuid.uuid1())[0:8]) context = Context(file_name=file_name, file_type=FILE_TYPE.TEMPLATE, content=docker_template.dic) self.log.debug("save=ing template as file [%s]" % file_name) context = self.storage.save(context) return context except Exception as ex: self.log.error(ex) return None
def start_pre_alloc_exprs(self, user, template_name, hackathon_name=None, pre_alloc_num=0): self.log.debug("start_pre_alloc_exprs: %d " % pre_alloc_num) if pre_alloc_num == 0: return hackathon = self.__verify_hackathon(hackathon_name) template = self.__verify_template(hackathon, template_name) starter = self.get_starter(hackathon, template) if not starter: raise PreconditionFailed("either template not supported or hackathon resource not configured") while pre_alloc_num > 0: context = starter.start_expr(Context( template=template, user=user, hackathon=hackathon, pre_alloc_enabled=True)) if context == None: self.log.debug("pre_alloc_num left: %d " % pre_alloc_num) break else: self.__report_expr_status(context.experiment) pre_alloc_num -= 1
def upload_files(self, user_id, file_type): """Handle uploaded files from http request""" try: self.__validate_upload_files() except Exception as e: self.log.error(e) return bad_request("file size or file type unsupport") file_list = [] storage = RequiredFeature("storage") for file in request.files: file_content = request.files[file] pre_file_name = file_content.filename file_suffix = pre_file_name[pre_file_name.rfind('.'):] new_file_name = self.__generate_file_name(user_id, file_type, file_suffix) self.log.debug("upload file: " + new_file_name) context = Context(file_name=new_file_name, file_type=file_type, content=file_content) context = storage.save(context) # file_name is a random name created by server, pre_file_name is the original name file_info = { "file_name": new_file_name, "pre_file_name": pre_file_name, "url": context.url } file_list.append(file_info) return {"files": file_list}
def pull_images_for_hackathon(self, context): hackathon_id = context.hackathon_id templates = self.__get_templates_for_pull(hackathon_id) images = [self.__get_images_from_template(x) for x in templates] images_to_pull = flatten(images) self.log.debug('expected images: %s on hackathon: %s' % (images_to_pull, hackathon_id)) # get all docker host server on hackathon hosts = self.db.find_all_objects_by(DockerHostServer, hackathon_id=hackathon_id) # loop to get every docker host for docker_host in hosts: download_images = self.__get_undownloaded_images_on_docker_host( docker_host, images_to_pull) self.log.debug('need to pull images: %s on host: %s' % (download_images, docker_host.vm_name)) for dl_image in download_images: image = dl_image.split(':')[0] tag = dl_image.split(':')[1] context = Context(image=image, tag=tag, docker_host=docker_host.id) self.scheduler.add_once(feature="hosted_docker", method="pull_image", context=context, seconds=3)
def __generate_create_cloud_service_context(self, context): return Context( azure_key_id=context.azure_key_id, experiment_id=context.experiment_id, name=context.template_unit.get_cloud_service_name(), label=context.template_unit.get_cloud_service_label(), location=context.template_unit.get_cloud_service_location())
def __create_storage_account_in_azure_service(self, context, args_context): try: commit_azure_log(args_context.experiment_id, ALOperation.CREATE_STORAGE_ACCOUNT, ALStatus.START) result = self.azure_adapter.create_storage_account( args_context.azure_key_id, args_context.name, args_context.description, args_context.label, args_context.location) except Exception as e: m = '%s [%s] %s' % (AZURE_RESOURCE_TYPE.STORAGE_ACCOUNT, args_context.name, e.message) commit_azure_log(args_context.experiment_id, ALOperation.CREATE_STORAGE_ACCOUNT, ALStatus.FAIL, m, 0) self.log.error(e) raise InternalServerError( "azure service raised a exception when create storage account") # query async operation status query_context = Context( request_id=result.id, azure_key_id=context.azure_key_id, feature='azure_storage_account_service', true_method='create_storage_account_async_true', false_method='create_storage_account_async_false', method_args_context=context) self.scheduler.add_once(feature='azure_adapter', method='query_async_operation_status', context=query_context, seconds=3)
def __azure_service_create_vm(self, deployment_name, context): commit_azure_log(context.experiment_id, ALOperation.CREATE_VIRTUAL_MACHINE, ALStatus.START) try: result = self.azure_adapter.add_virtual_machine( context.cloud_service_name, deployment_name, context.virtual_machine_name, context.system_config, context.os_virtual_hard_disk, context.network_config, context.virtual_machine_size, context.vm_image_name) except Exception as e: m = '%s [%s] %s' % (AZURE_RESOURCE_TYPE.VIRTUAL_MACHINE, context.virtual_machine_name, e.message) commit_azure_log(context.experiment_id, ALOperation.CREATE_VIRTUAL_MACHINE, ALStatus.FAIL, m, 0) self.log.error(e) raise InternalServerError("Azure service create vm failed") # query async operation status query_context = Context( request_id=result.id, azure_key_id=context.azure_key_id, feature='azure_vm_service', true_method='create_virtual_machine_async_true_1', false_method='create_virtual_machine_async_false_1', method_args_context=self.__generate_base_context( context.azure_key_id, context.experiment_id, context.template_unit)) self.scheduler.add_once(feature='azure_service', method='query_async_operation_status', context=query_context, seconds=3)
def pull_images_for_hackathon(self, context): hackathon_id = context.hackathon_id # get templates which is online and provided by docker templates = self.__get_templates_for_pull(hackathon_id) # get expected images on hackathons' templates images = map(lambda x: self.__get_images_from_template(x), templates) expected_images = flatten(images) self.log.debug('expected images: %s on hackathon: %s' % (expected_images, hackathon_id)) # get all docker host server on hackathon hosts = self.db.find_all_objects_by(DockerHostServer, hackathon_id=hackathon_id) # loop to get every docker host for docker_host in hosts: download_images = self.__get_undownloaded_images_on_docker_host( docker_host, expected_images) self.log.debug('need to pull images: %s on host: %s' % (download_images, docker_host.vm_name)) for dl_image in download_images: image = dl_image.split(':')[0] tag = dl_image.split(':')[1] context = Context(image=image, tag=tag, docker_host=docker_host) self.scheduler.add_once(feature="hosted_docker", method="pull_image", context=context, seconds=3)
def create_virtual_machine_dm_true(self, context): cloud_service_name = context.template_unit.get_cloud_service_name() deployment_slot = context.template_unit.get_deployment_slot() deployment_name = context.template_unit.get_deployment_name() virtual_machine_name = '%s-%d' % ( context.template_unit.get_virtual_machine_name(), context.experiment_id) m = '%s [%s] created' % (AZURE_RESOURCE_TYPE.DEPLOYMENT, deployment_slot) commit_azure_deployment(deployment_name, deployment_slot, ADStatus.RUNNING, cloud_service_name, context.experiment_id) commit_azure_log(context.experiment_id, ALOperation.CREATE_DEPLOYMENT, ALStatus.END, m, 0) self.log.debug(m) # query virtual machine status args_context = Context( cloud_service_name=cloud_service_name, deployment_name=deployment_name, virtual_machine_name=virtual_machine_name, status=AVMStatus.READY_ROLE, feature='azure_vm_service', true_method='create_virtual_machine_async_true_2', method_args_context=context) self.scheduler.add_once(feature='azure_service', method='query_virtual_machine_status', context=args_context, seconds=3)
def stop_virtual_machine_async_true(self, context): """ :param context : contains azure_key_id, experiment_id, template_unit, need_status :return: """ cloud_service_name = context.template_unit.get_cloud_service_name() deployment_slot = context.template_unit.get_deployment_slot() query_context = Context( cloud_service_name=cloud_service_name, deployment_slot=deployment_slot, deployment_name=self.azure_adapter.get_deployment_name( context.azure_key_id, cloud_service_name, deployment_slot), virtual_machine_name='%s-%d' % (context.template_unit.get_virtual_machine_name(), context.experiment_id), status=context.need_status, feature='azure_vm_service', true_method='stop_virtual_machine_vm_true', method_args_context=context, ) self.scheduler.add_once(feature='azure_service', method='query_virtual_machine_status', context=query_context, seconds=VIRTUAL_MACHINE_TICK)
def create_virtual_machine_vm_true_1(self, context): # check updating network_config operation if context.template_unit.is_vm_image(): cloud_service_name = context.template_unit.get_cloud_service_name() deployment_slot = context.template_unit.get_deployment_slot() deployment_name = self.azure_adapter.get_deployment_name( cloud_service_name, deployment_slot) virtual_machine_name = '%s-%d' % ( context.template_unit.get_virtual_machine_name(), context.experiment_id) network_config = context.template_unit.get_network_config( self.azure_adapter, True) result = self.azure_adapter.update_virtual_machine_network_config( cloud_service_name, deployment_name, virtual_machine_name, network_config) query_context = Context( request_id=result.id, azure_key_id=context.azure_key_id, feature='azure_vm_service', true_method='create_virtual_machine_async_true_2', false_method='create_virtual_machine_async_false_2', method_args_context=context) self.scheduler.add_once(feature='azure_service', method='query_async_operation_status', context=query_context, seconds=3) else: self.__create_virtual_machine_helper(context.azure_key_id, context.experiment_id, context.template_unit)
def check_hackathon_for_pre_allocate_expr(self): """Check all hackathon for pre-allocate Add an interval job for hackathon if it's pre-allocate is enabled. Otherwise try to remove the schedule job """ hackathon_list = self.db.find_all_objects(Hackathon) for hack in hackathon_list: job_id = "pre_allocate_expr_" + str(hack.id) is_job_exists = self.scheduler.has_job(job_id) if hack.is_pre_allocate_enabled(): if is_job_exists: self.log.debug( "pre_allocate job already exists for hackathon %s" % str(hack.id)) continue self.log.debug("add pre_allocate job for hackathon %s" % str(hack.id)) next_run_time = self.util.get_now() + timedelta( seconds=hack.id * 10) pre_allocate_interval = self.__get_pre_allocate_interval(hack) self.scheduler.add_interval( feature="expr_manager", method="pre_allocate_expr", id=job_id, context=Context(hackathon_id=hack.id), next_run_time=next_run_time, seconds=pre_allocate_interval) elif is_job_exists: self.log.debug( "remove job for hackathon %s since pre_allocate is disabled" % str(hack.id)) self.scheduler.remove_job(job_id) return True
def __ensure_images_for_hackathon(self, hackathon): # only ensure those alauda is disabled if hackathon.config.get( HACKATHON_CONFIG.CLOUD_PROVIDER) == CLOUD_PROVIDER.ALAUDA: self.log.debug( "schedule job of hackathon '%s(%d)' removed for alauda enabled" % (hackathon.name, hackathon.id)) self.scheduler.remove_job(self.__get_schedule_job_id(hackathon)) return job_id = self.__get_schedule_job_id(hackathon) job_exist = self.scheduler.has_job(job_id) if hackathon.event_end_time < self.util.get_now(): if job_exist: self.scheduler.remove_job(job_id) return else: if job_exist: self.log.debug("job %s existed" % job_id) else: self.log.debug( "adding schedule job to ensure images for hackathon %s" % hackathon.name) next_run_time = self.util.get_now() + timedelta(seconds=3) context = Context(hackathon_id=hackathon.id) self.scheduler.add_interval( feature="hackathon_template_manager", method="pull_images_for_hackathon", id=job_id, context=context, next_run_time=next_run_time, minutes=60)
def test_creat_hosted_service(self): mock_create = Mock() mock_wait = Mock() self.service.service.create_cloud_service = mock_create self.service.service.wait_for_operation_status = mock_wait mock_create.return_value = Context(request_id=test_conf.meanless_id) mock_wait.return_value = Context(status=ASYNC_OP_RESULT.SUCCEEDED) self.assertTrue( self.service.create_cloud_service(test_conf.meanless_name, test_conf.meanless_name, test_conf.meanless_name)) mock_create.side_effect = AzureHttpError(233, 233) self.assertFalse( self.service.create_cloud_service(test_conf.meanless_name, test_conf.meanless_name, test_conf.meanless_name))
def stop_expr(self, expr_id, force=0): """ :param expr_id: experiment id :param force: 0: only stop container and release ports, 1: force stop and delete container and release ports. :return: """ self.log.debug("begin to stop %d" % expr_id) expr = self.db.find_first_object_by(Experiment, id=expr_id, status=EStatus.RUNNING) if expr is not None: # Docker docker = self.docker.get_docker(expr.hackathon) if expr.template.provider == VE_PROVIDER.DOCKER: # stop containers for c in expr.virtual_environments.all(): try: self.log.debug("begin to stop %s" % c.name) if force: docker.delete(c.name, virtual_environment=c, container=c.container, expr_id=expr_id) c.status = VEStatus.DELETED else: docker.stop(c.name, virtual_environment=c, container=c.container, expr_id=expr_id) c.status = VEStatus.STOPPED except Exception as e: self.log.error(e) self.__roll_back(expr_id) return internal_server_error( 'Failed stop/delete container') if force: expr.status = EStatus.DELETED else: expr.status = EStatus.STOPPED self.db.commit() else: try: # todo support delete azure vm azure_key_id = self.docker.load_azure_key_id(expr.id) context = Context(azure_key_id=azure_key_id, experiment_id=expr.id, action=AVMStatus.STOPPED_DEALLOCATED) self.azure_vm_service.stop_vm_entry(context) except Exception as e: self.log.error(e) return internal_server_error('Failed stopping azure') self.log.debug("experiment %d ended success" % expr_id) return ok('OK') else: return ok('expr not exist')
def __on_add_virtual_machine_success(self, sctx): ctx = sctx.job_ctxs[sctx.current_job_index] # add virtual machine success, record sctx.remote_created.append( Context(type=REMOTE_CREATED_RECORD.TYPE_ADD_VIRTUAL_MACHINE, cloud_service_name=ctx.cloud_service_name, deployment_name=ctx.deployment_name, virtual_machine_name=ctx.virtual_machine_name)) self.__wait_for_virtual_machine_ready(sctx)
def __start_new_expr(self, hackathon, template, user_id): # new expr expr = self.db.add_object_kwargs(Experiment, user_id=user_id, hackathon_id=hackathon.id, status=EStatus.INIT, template_id=template.id) self.db.commit() curr_num = self.db.count( Experiment, Experiment.user_id == ReservedUser.DefaultUserID, Experiment.template == template, (Experiment.status == EStatus.STARTING) | (Experiment.status == EStatus.RUNNING)) if template.provider == VE_PROVIDER.DOCKER: try: template_dic = self.template_manager.load_template(template) virtual_environments_list = template_dic[ BaseTemplate.VIRTUAL_ENVIRONMENTS] if curr_num != 0 and curr_num >= self.util.get_config( "pre_allocate.docker"): return expr.status = EStatus.STARTING self.db.commit() map( lambda virtual_environment_dic: self.__remote_start_container( hackathon, expr, virtual_environment_dic), virtual_environments_list) expr.status = EStatus.RUNNING self.db.commit() except Exception as e: self.log.error(e) self.log.error("Failed starting containers") self.__roll_back(expr.id) return internal_server_error('Failed starting containers') else: if curr_num != 0 and curr_num >= self.util.get_config( "pre_allocate.azure"): return expr.status = EStatus.STARTING self.db.commit() try: azure_key_id = self.docker.load_azure_key_id(expr.id) context = Context(azure_key_id=azure_key_id, experiment_id=expr.id) self.azure_vm_service.create_vm_for_expr_entry(context) except Exception as e: self.log.error(e) return internal_server_error('Failed starting azure vm') # after everything is ready, set the expr state to running # response to caller return self.__report_expr_status(expr)
def __stop_vm(self, experiment, azure_key, template_units): job_ctxs = [] ctx = Context(job_ctxs=job_ctxs, current_job_index=0, experiment_id=experiment.id, subscription_id=azure_key.subscription_id, pem_url=azure_key.get_local_pem_url(), management_host=azure_key.management_host) for i in xrange(0, len(template_units)): unit = template_units[i] job_ctxs.append( Context(cloud_service_name=self.__get_cloud_service_name( unit.get_cloud_service_name(), azure_key.subscription_id), deployment_slot=unit.get_deployment_slot(), virtual_machine_name=self.__get_virtual_machine_name( unit.get_virtual_machine_name(), experiment.id))) self.__schedule_stop(ctx)
def create_virtual_machine_async_true_3(self, context): args_context = Context( azure_key_id=context.azure_key_id, cloud_service_name=context.template_unit.get_cloud_service_name(), deployment_name=context.template_unit.get_deployment_name(), feature='azure_vm_service', true_method='create_virtual_machine_dm_true', method_args_context=context) self.scheduler.add_once(feature='azure_service', method='query_deployment_status', context=args_context, seconds=3)
def __start_new_expr(self, hackathon, template, user): starter = self.get_starter(hackathon, template) if not starter: raise PreconditionFailed( "either template not supported or hackathon resource not configured" ) context = starter.start_expr( Context(template=template, user=user, hackathon=hackathon)) return self.__report_expr_status(context.experiment)
def __create_hackathon(self, creator, context): """Insert hackathon and creator(admin of course) to database We enforce that default config are used during the creation :type context: Context :param context: context of the args to create a new hackathon :rtype: Hackathon :return hackathon instance """ new_hack = Hackathon( name=context.name, display_name=context.display_name, ribbon=context.get("ribbon"), description=context.get("description"), short_description=context.get("short_description"), location=context.get("location"), banners=context.get("banners", []), status=HACK_STATUS.INIT, creator=creator, type=context.get("type", HACK_TYPE.HACKATHON), config=context.get("config", Context()).to_dict(), tags=context.get("tags", []), event_start_time=context.get("event_start_time"), event_end_time=context.get("event_end_time"), registration_start_time=context.get("registration_start_time"), registration_end_time=context.get("registration_end_time"), judge_start_time=context.get("judge_start_time"), judge_end_time=context.get("judge_end_time")) # basic xss prevention if new_hack.description: # case None type new_hack.description = self.cleaner.clean_html( new_hack.description) new_hack.save() # add the current login user as admin and creator try: admin = UserHackathon(user=creator, hackathon=new_hack, role=HACK_USER_TYPE.ADMIN, status=HACK_USER_STATUS.AUTO_PASSED, remark='creator') admin.save() except Exception as ex: # TODO: send out a email to remind administrator to deal with this problems self.log.error(ex) raise InternalServerError( "fail to create the default administrator") return new_hack
def get_available_docker_host(self, hackathon): vms = DockerHostServer.objects.filter(__raw__={'$where': 'this.container_count+1 < this.container_max_count'}) \ .filter(hackathon=hackathon, state=DockerHostServerStatus.DOCKER_READY, disabled=False).all() if self.util.is_local(): if len(vms) > 0: return Context(state=DHS_QUERY_STATE.SUCCESS, docker_host_server=vms[0]) else: return Context(state=DHS_QUERY_STATE.FAILED) has_locked_host = False for host in vms: # check docker status if not self.docker.ping(host): host.state = DockerHostServerStatus.UNAVAILABLE host.save() continue # cloud service locked? if not self.is_host_server_locked(host): return Context(state=DHS_QUERY_STATE.SUCCESS, docker_host_server=host) else: has_locked_host = True if has_locked_host: # still has available host but locked return Context(state=DHS_QUERY_STATE.ONGOING) elif self.start_new_docker_host_vm(hackathon): # new VM is starting return Context(state=DHS_QUERY_STATE.ONGOING) else: # no VM found or starting return Context(state=DHS_QUERY_STATE.FAILED)
def stop_expr(self, expr_id): """ :param expr_id: experiment id :return: """ self.log.debug("begin to stop %s" % str(expr_id)) expr = Experiment.objects(id=expr_id).first() if expr is not None: starter = self.get_starter(expr.hackathon, expr.template) if starter: starter.stop_expr(Context(experiment_id=expr.id, experiment=expr)) self.log.debug("experiment %s ended success" % expr_id) return ok('OK') else: return ok()
def __start_vm(self, experiment, hackathon, template_units): azure_keys = hackathon.azure_keys # TODO: which key to use? azure_key = azure_keys[0] experiment.azure_key = azure_key # job context job_ctxs = [] ctx = Context( job_ctxs=job_ctxs, current_job_index=0, subscription_id=azure_key.subscription_id, pem_url=azure_key.get_local_pem_url(), management_host=azure_key.management_host, experiment_id=experiment.id, # remote_created is used to store the resources we create we create remote # so we can do rollback # TODO: if the user create a virtual machine with vm_image, we have to config the network of it # but so far we have no way to rollback the network settings of it remote_created=[]) # create virtual environments for units # and setup setup job contenxt # the setup of each unit must be SERLIALLY EXECUTED # to avoid the creation of same resource in same time # TODO: we still have't avoid the parrallel excution of the setup of same template for i in xrange(len(template_units)): unit = template_units[i] vm_name = self.__get_virtual_machine_name( unit.get_virtual_machine_name(), experiment.id) # set up virtual environment experiment.virtual_environments.append( VirtualEnvironment(provider=VE_PROVIDER.AZURE, name=vm_name, image=unit.get_image_name(), status=VEStatus.INIT, remote_provider=VERemoteProvider.Guacamole)) # construct job context job_ctxs.append( self.__construct_setup_job_context(unit, azure_key, vm_name)) # save constructed experiment, and execute from first job content experiment.save() self.__schedule_setup(ctx)
def roll_back(self, expr_id): """ roll back when exception occurred :param expr_id: experiment id """ self.log.debug("Starting rollback experiment %s..." % expr_id) expr = Experiment.objects(id=expr_id) if not expr: self.log.warn("rollback failed due to experiment not found") return starter = self.get_starter(expr.hackathon, expr.template) if not starter: self.log.warn("rollback failed due to no starter found") return return starter.rollback(Context(experiment=expr))