def _airflow_docker_build(conf: AiscalatorConfig): """ Build the aiscalator/airflow image and return its ID.""" logger = logging.getLogger(__name__) # TODO get airflow dockerfile from conf? conf.app_config_home() dockerfile_dir = utils.data_file("../config/docker/airflow") # TODO customize dockerfile with apt_packages, requirements etc docker_gid, docker_group = _find_docker_gid_group() commands = [ "docker", "build", "--build-arg", "DOCKER_GID=" + str(docker_gid), "--build-arg", "DOCKER_GROUP=" + str(docker_group), "--rm", "-t", "aiscalator/airflow:latest", dockerfile_dir ] log = LogRegexAnalyzer(b'Successfully built ([a-zA-Z0-9]+)\n') logger.info("Running...: %s", " ".join(commands)) utils.subprocess_run(commands, log_function=log.grep_logs) result = log.artifact() if result: # tag the image built with the sha256 of the dockerfile tag = utils.sha256(join(dockerfile_dir, 'Dockerfile'))[:12] commands = [ "docker", "tag", result, "aiscalator/airflow:" + tag ] logger.info("Running...: %s", " ".join(commands)) utils.subprocess_run(commands) return tag return None
def _docker_compose(conf: AiscalatorConfig, extra_commands: list): """ Run the docker-compose command Parameters ---------- conf : AiscalatorConfig Configuration object for the application extra_commands : list list of sub-commands to run in docker-compose """ logger = logging.getLogger(__name__) conf.validate_config() dockerfile = join(conf.app_config_home(), "config", conf.airflow_docker_compose_file()) commands = ["docker-compose"] # Prepare a temp folder to run the command from with TemporaryDirectory(prefix="aiscalator_") as tmp: with open(join(tmp, ".env"), mode="w") as env_file: # concatenate all the env files into one for env in conf.user_env_file(conf.dag_field("definition.env")): if isfile(env): with open(env, mode="r") as file: for line in file: env_file.write(line) utils.copy_replace(join(tmp, ".env"), join(dirname(dockerfile), ".env")) commands += ["-f", dockerfile] + extra_commands logger.info("Running...: %s", " ".join(commands)) utils.subprocess_run(commands, no_redirect=True)
def _run_build(conf: AiscalatorConfig): """ Run the docker build command to produce the image and tag it. Parameters ---------- conf : AiscalatorConfig Configuration object for this step Returns ------- str the docker image ID that was built """ logger = logging.getLogger(__name__) commands = ["docker", "build", "--rm"] output_docker_name = None if conf.has_step_field("docker_image.output_docker_name"): output_docker_name = conf.step_field("docker_image.output_docker_name") commands += ["-t", output_docker_name + ":latest"] commands += ["."] log = LogRegexAnalyzer(b'Successfully built ([a-zA-Z0-9]+)\n') logger.info("Running...: %s", " ".join(commands)) utils.subprocess_run(commands, log_function=log.grep_logs) result = log.artifact() test = (result and output_docker_name is not None and conf.has_step_field("docker_image.output_docker_tag")) if test: commands = ["docker", "tag"] output_docker_tag = conf.step_field("docker_image.output_docker_tag") commands += [result, output_docker_name + ":" + output_docker_tag] # TODO implement docker tag output_docker_tag_commit_hash logger.info("Running...: %s", " ".join(commands)) utils.subprocess_run(commands) return result
def jupyter_run(conf: AiscalatorConfig, prepare_only=False, param=None, param_raw=None): """ Executes the step in browserless mode using papermill Parameters ---------- conf : AiscalatorConfig Configuration object for the step prepare_only : bool Indicates if papermill should replace the parameters of the notebook only or it should execute all the cells too Returns ------- string the path to the output notebook resulting from the execution of this step """ logger = logging.getLogger(__name__) conf.validate_config() docker_image = build(conf) if not docker_image: raise Exception("Failed to build docker image") notebook, _ = notebook_file(conf.step_file_path('task.code_path')) notebook = os.path.join("/home/jovyan/work/notebook/", os.path.basename(notebook)) notebook_output = conf.step_notebook_output_path(notebook) commands = _prepare_docker_env(conf, [ docker_image, "bash", "start-papermill.sh", "papermill", notebook, notebook_output ], "run") if prepare_only: commands.append("--prepare-only") parameters = conf.step_extract_parameters() if parameters: commands += parameters if param: for parameter in param: commands += ["-p", parameter[0], parameter[1]] if param_raw: for raw_parameter in param_raw: commands += ["-r", raw_parameter[0], raw_parameter[1]] log = LogRegexAnalyzer() logger.info("Running...: %s", " ".join(commands)) subprocess_run(commands, log_function=log.grep_logs) return os.path.join(conf.step_file_path('task.execution_dir_path'), os.path.basename(notebook_output))