def submit_experiment(template: str, name: str = None, run_kind: RunKinds = RunKinds.TRAINING, script_location: str = None, script_parameters: Tuple[str, ...] = None, pack_params: List[Tuple[str, str]] = None, parameter_range: List[Tuple[str, str]] = None, parameter_set: Tuple[str, ...] = None, script_folder_location: str = None, env_variables: List[str] = None, requirements_file: str = None) -> (List[Run], Dict[str, str], str): script_parameters = script_parameters if script_parameters else () parameter_set = parameter_set if parameter_set else () parameter_range = parameter_range if parameter_range else [] log.debug("Submit experiment - start") try: namespace = get_kubectl_current_context_namespace() global submitted_namespace submitted_namespace = namespace except Exception: message = Texts.GET_NAMESPACE_ERROR_MSG log.exception(message) raise SubmitExperimentError(message) try: with spinner(text=Texts.PREPARING_RESOURCE_DEFINITIONS_MSG): experiment_name, labels = generate_exp_name_and_labels(script_name=script_location, namespace=namespace, name=name, run_kind=run_kind) runs_list = prepare_list_of_runs(experiment_name=experiment_name, parameter_range=parameter_range, parameter_set=parameter_set, template_name=template) except SubmitExperimentError as exe: log.exception(str(exe)) raise exe except Exception: message = Texts.SUBMIT_PREPARATION_ERROR_MSG log.exception(message) raise SubmitExperimentError(message) global submitted_experiment submitted_experiment = experiment_name # Ctrl-C handling signal.signal(signal.SIGINT, ctrl_c_handler_for_submit) signal.signal(signal.SIGTERM, ctrl_c_handler_for_submit) try: config = Config() # start port forwarding # noinspection PyBroadException with K8sProxy(NAUTAAppNames.DOCKER_REGISTRY, port=config.local_registry_port) as proxy: # Save port that was actually used in configuration if proxy.tunnel_port != config.local_registry_port: config.local_registry_port = proxy.tunnel_port experiment_run_folders = [] # List of local directories used by experiment's runs try: # run socat if on Windows or Mac OS if get_current_os() in (OS.WINDOWS, OS.MACOS): # noinspection PyBroadException try: with spinner(text=Texts.CLUSTER_CONNECTION_MSG): socat.start(proxy.tunnel_port) except Exception: error_msg = Texts.LOCAL_DOCKER_TUNNEL_ERROR_MSG log.exception(error_msg) raise SubmitExperimentError(error_msg) cluster_registry_port = get_app_service_node_port(nauta_app_name=NAUTAAppNames.DOCKER_REGISTRY) # prepare environments for all experiment's runs for experiment_run in runs_list: if script_parameters and experiment_run.parameters: current_script_parameters = script_parameters + experiment_run.parameters elif script_parameters: current_script_parameters = script_parameters elif experiment_run.parameters: current_script_parameters = experiment_run.parameters else: current_script_parameters = "" run_folder, script_location, pod_count = \ prepare_experiment_environment(experiment_name=experiment_name, run_name=experiment_run.name, local_script_location=script_location, script_folder_location=script_folder_location, # noqa: E501 script_parameters=current_script_parameters, pack_type=template, pack_params=pack_params, local_registry_port=proxy.tunnel_port, cluster_registry_port=cluster_registry_port, env_variables=env_variables, requirements_file=requirements_file) # Set correct pod count if not pod_count or pod_count < 1: raise SubmitExperimentError('Unable to determine pod count: make sure that values.yaml ' 'file in your pack has podCount field with positive integer value.') experiment_run.pod_count = pod_count experiment_run_folders.append(run_folder) script_name = None if script_location is not None: script_name = os.path.basename(script_location) # Prepend script_name parameter to run description only for display purposes. experiment_run.parameters = script_parameters if not experiment_run.parameters \ else experiment_run.parameters + script_parameters if experiment_run.parameters and script_name: experiment_run.parameters = (script_name, ) + experiment_run.parameters elif script_name: experiment_run.parameters = (script_name, ) except SubmitExperimentError as e: log.exception(Texts.ENV_CREATION_ERROR_MSG) e.message += f' {Texts.ENV_CREATION_ERROR_MSG}' raise except Exception: # any error in this step breaks execution of this command message = Texts.ENV_CREATION_ERROR_MSG log.exception(message) # just in case - remove folders that were created with a success for experiment_run_folder in experiment_run_folders: delete_environment(experiment_run_folder) # if ps or pr option is used - first ask whether experiment(s) should be submitted if parameter_range or parameter_set: click.echo(Texts.CONFIRM_SUBMIT_MSG) click.echo(tabulate({RUN_NAME: [run.name for run in runs_list], RUN_PARAMETERS: ["\n".join(run.parameters) if run.parameters else "" for run in runs_list]}, headers=[RUN_NAME, RUN_PARAMETERS], tablefmt="orgtbl")) if not click.confirm(Texts.CONFIRM_SUBMIT_QUESTION_MSG, default=True): for experiment_run_folder in experiment_run_folders: delete_environment(experiment_run_folder) exit() # create Experiment model # TODO template_name & template_namespace should be filled after Template implementation parameter_range_spec = [f'-pr {param_name} {param_value}' for param_name, param_value in parameter_range] parameter_set_spec = [f'-ps {ps_spec}' for ps_spec in parameter_set] experiment_parameters_spec = list(script_parameters) + parameter_range_spec + parameter_set_spec experiment = experiments_model.Experiment(name=experiment_name, template_name=template, parameters_spec=experiment_parameters_spec, template_namespace="template-namespace") experiment.create(namespace=namespace, labels=labels) # submit runs run_errors = {} for run, run_folder in zip(runs_list, experiment_run_folders): try: run.state = RunStatus.QUEUED with spinner(text=Texts.CREATING_RESOURCES_MSG.format(run_name=run.name)): # Add Run object with runKind label and pack params as annotations run.create(namespace=namespace, labels={'runKind': run_kind.value}, annotations={pack_param_name: pack_param_value for pack_param_name, pack_param_value in pack_params}) submitted_runs.append(run) submit_draft_pack(run_folder, namespace=namespace) except Exception as exe: delete_environment(run_folder) try: run.state = RunStatus.FAILED run_errors[run.name] = str(exe) run.update() except Exception as rexe: # update of non-existing run may fail log.debug(Texts.ERROR_DURING_PATCHING_RUN.format(str(rexe))) # Delete experiment if no Runs were submitted if not submitted_runs: click.echo(Texts.SUBMISSION_FAIL_ERROR_MSG) delete_k8s_object("experiment", experiment_name) # Change experiment status to submitted experiment.state = experiments_model.ExperimentStatus.SUBMITTED experiment.update() except LocalPortOccupiedError as exe: click.echo(exe.message) raise SubmitExperimentError(exe.message) except K8sProxyCloseError: log.exception('Error during closing of a proxy for a {}'.format(NAUTAAppNames.DOCKER_REGISTRY)) raise K8sProxyCloseError(Texts.PROXY_CLOSE_ERROR_MSG) except K8sProxyOpenError: error_msg = Texts.PROXY_OPEN_ERROR_MSG log.exception(error_msg) raise SubmitExperimentError(error_msg) except SubmitExperimentError: raise except Exception as exe: error_msg = Texts.SUBMIT_OTHER_ERROR_MSG log.exception(error_msg) raise SubmitExperimentError(error_msg) from exe finally: with spinner(text=Texts.CLUSTER_CONNECTION_CLOSING_MSG): # noinspection PyBroadException try: socat.stop() except Exception: log.exception("Error during closing of a proxy for a local docker-host tunnel") raise K8sProxyCloseError(Texts.DOCKER_TUNNEL_CLOSE_ERROR_MSG) # remove semaphores from all exp folders remove_sempahore(experiment_name) log.debug("Submit - finish") return runs_list, run_errors, script_location