def jupyter_edit(conf: AiscalatorConfig, param=None, param_raw=None): """ Starts a Jupyter Lab environment configured to edit the focused step Parameters ---------- conf : AiscalatorConfig Configuration object for the step param : list list of tuples of parameters param_raw : list list of tuples of raw parameters Returns ------- string Url of the running jupyter lab """ logger = logging.getLogger(__name__) conf.validate_config() docker_image = build(conf) if docker_image: # TODO: shutdown other jupyter lab still running notebook, _ = notebook_file(conf.step_field('task.code_path')) notebook = os.path.basename(notebook) if conf.step_extract_parameters(): jupyter_run(conf, prepare_only=True, param=param, param_raw=param_raw) commands = _prepare_docker_env( conf, [docker_image, "start.sh", 'jupyter', 'lab'], "edit") return wait_for_jupyter_lab(commands, logger, notebook, 10000, "work/notebook") raise Exception("Failed to build docker image")
def _docker_compose(conf: AiscalatorConfig, extra_commands: list): """ Run the docker-compose command Parameters ---------- conf : AiscalatorConfig Configuration object for the application extra_commands : list list of sub-commands to run in docker-compose """ logger = logging.getLogger(__name__) conf.validate_config() dockerfile = join(conf.app_config_home(), "config", conf.airflow_docker_compose_file()) commands = ["docker-compose"] # Prepare a temp folder to run the command from with TemporaryDirectory(prefix="aiscalator_") as tmp: with open(join(tmp, ".env"), mode="w") as env_file: # concatenate all the env files into one for env in conf.user_env_file(conf.dag_field("definition.env")): if isfile(env): with open(env, mode="r") as file: for line in file: env_file.write(line) utils.copy_replace(join(tmp, ".env"), join(dirname(dockerfile), ".env")) commands += ["-f", dockerfile] + extra_commands logger.info("Running...: %s", " ".join(commands)) utils.subprocess_run(commands, no_redirect=True)
def airflow_edit(conf: AiscalatorConfig): """ Starts an airflow environment Parameters ---------- conf : AiscalatorConfig Configuration object for the application """ logger = logging.getLogger(__name__) conf.validate_config() docker_image = _airflow_docker_build(conf) if docker_image: # TODO: shutdown other jupyter lab still running port = 10001 notebook = basename(conf.dag_field('definition.code_path')) notebook, notebook_py = utils.notebook_file(notebook) commands = _prepare_docker_env(conf, [ "aiscalator/airflow:" + docker_image, "bash", "/start-jupyter.sh", "/usr/local/airflow/work/" + notebook_py + ":/usr/local/airflow/dags/" + notebook_py ], port) return utils.wait_for_jupyter_lab(commands, logger, notebook, port, "work") raise Exception("Failed to build docker image")
def jupyter_run(conf: AiscalatorConfig, prepare_only=False, param=None, param_raw=None): """ Executes the step in browserless mode using papermill Parameters ---------- conf : AiscalatorConfig Configuration object for the step prepare_only : bool Indicates if papermill should replace the parameters of the notebook only or it should execute all the cells too Returns ------- string the path to the output notebook resulting from the execution of this step """ logger = logging.getLogger(__name__) conf.validate_config() docker_image = build(conf) if not docker_image: raise Exception("Failed to build docker image") notebook, _ = notebook_file(conf.step_file_path('task.code_path')) notebook = os.path.join("/home/jovyan/work/notebook/", os.path.basename(notebook)) notebook_output = conf.step_notebook_output_path(notebook) commands = _prepare_docker_env( conf, [ docker_image, "bash", "start-papermill.sh", "papermill", notebook, notebook_output ], "run_" + conf.step_name() + "_" # add timestamp to name to handle multiple concurrent runs + datetime.datetime.now().strftime('%Y%m%d_%H%M%S')) if prepare_only: commands.append("--prepare-only") parameters = conf.step_extract_parameters() if parameters: commands += parameters if param: for parameter in param: commands += ["-p", parameter[0], parameter[1]] if param_raw: for raw_parameter in param_raw: commands += ["-r", raw_parameter[0], raw_parameter[1]] log = LogRegexAnalyzer() logger.info("Running...: %s", " ".join(commands)) returncode = subprocess_run(commands, log_function=log.grep_logs) if returncode: logger.error("Run was not successful, returned status code is: " + str(returncode)) sys.exit(returncode) return os.path.join(conf.step_file_path('task.execution_dir_path'), os.path.basename(notebook_output))
def airflow_setup(conf: AiscalatorConfig, config_home: str, workspace: list, append: bool = True): """ Setup the airflow configuration files and environment Parameters ---------- conf : AiscalatorConfig Configuration object for the application config_home : str path to the configuration home directory workspace : list List of path to directories to mount as volumes to airflow workers to use as workspaces append : bool flag to tell if workspace should be appended to the list in the config or replace it. """ logger = logging.getLogger(__name__) conf.validate_config() if config_home: makedirs(config_home, exist_ok=True) conf.redefine_app_config_home(config_home) ws_path = "airflow.setup.workspace_paths" if conf.app_config_has(ws_path): if append: workspace += conf.app_config()[ws_path] conf.redefine_airflow_workspaces(workspace) image = 'latest' if _docker_compose_grep(conf): image = _airflow_docker_build(conf) if not image: raise Exception("Failed to build docker image") src = utils.data_file("../config/docker/airflow/config/") dst = join(conf.app_config_home(), "config") logger.info("Generating a new configuration folder for aiscalator:\n\t%s", dst) makedirs(dst, exist_ok=True) makedirs(join(conf.app_config_home(), "dags"), exist_ok=True) makedirs(join(conf.app_config_home(), "pgdata"), exist_ok=True) makedirs(join(conf.app_config_home(), "workspace"), exist_ok=True) pattern = [ r"(\s+)# - workspace #", "aiscalator/airflow:latest", ] workspace = [] for line in conf.app_config()[ws_path]: host_src, container_dst = _split_workspace_string(conf, line) # bind the same path from host in the container (after creating a # symbolic link at container_dst path) workspace += [r"\1- " + host_src + ':' + host_src] workspace += [r"\1# - workspace #"] value = [ "\n".join(workspace), "aiscalator/airflow:" + image, ] for file in listdir(src): utils.copy_replace(join(src, file), join(dst, file), pattern=pattern, replace_value=value)