def _airflow_docker_build(conf: AiscalatorConfig): """ Build the aiscalator/airflow image and return its ID.""" logger = logging.getLogger(__name__) # TODO get airflow dockerfile from conf? conf.app_config_home() dockerfile_dir = utils.data_file("../config/docker/airflow") # TODO customize dockerfile with apt_packages, requirements etc docker_gid, docker_group = _find_docker_gid_group() commands = [ "docker", "build", "--build-arg", "DOCKER_GID=" + str(docker_gid), "--build-arg", "DOCKER_GROUP=" + str(docker_group), "--rm", "-t", "aiscalator/airflow:latest", dockerfile_dir ] log = LogRegexAnalyzer(b'Successfully built ([a-zA-Z0-9]+)\n') logger.info("Running...: %s", " ".join(commands)) utils.subprocess_run(commands, log_function=log.grep_logs) result = log.artifact() if result: # tag the image built with the sha256 of the dockerfile tag = utils.sha256(join(dockerfile_dir, 'Dockerfile'))[:12] commands = [ "docker", "tag", result, "aiscalator/airflow:" + tag ] logger.info("Running...: %s", " ".join(commands)) utils.subprocess_run(commands) return tag return None
def _generate_global_config() -> str: """Generate a standard configuration file for the application in the user's home folder ~/.aiscalator/config/aiscalator.conf from the template file in aiscalator/config/template/aiscalator.conf """ logger = getLogger(__name__) dst = os.path.join(os.path.expanduser("~"), ".aiscalator/config/aiscalator.conf") logger.info("Generating a new configuration file for aiscalator:\n\t%s", dst) pattern = [ "testUserID", "generation_date", ] replace_value = [ generate_user_id(), '"' + str(datetime.utcnow().replace(tzinfo=timezone("UTC"))) + '" // in UTC timezone', ] dst_dir = os.path.dirname(dst) if dst_dir: os.makedirs(dst_dir, exist_ok=True) copy_replace(data_file("../config/template/aiscalator.conf"), dst, pattern=pattern, replace_value=replace_value) open(os.path.join(dst_dir, "apt_packages.txt"), 'a').close() open(os.path.join(dst_dir, "requirements.txt"), 'a').close() open(os.path.join(dst_dir, "lab_extensions.txt"), 'a').close() return dst
def _prepare_build_dir(conf, dst, input_docker_src): """ Copies all necessary files for building docker images in a tmp folder, substituting some specific macros accordingly to handle customized images such as: - add-apt-repository - apt-install packages - pip install packages - jupyter lab extensions Parameters ---------- conf : AiscalatorConfig Configuration object for this step dst : str temporary folder where to prepare the files input_docker_src : str name of the dockerfile package to use """ input_docker_dir = utils.data_file("../config/docker/" + input_docker_src) if conf.app_config_has("jupyter.dockerfile_src"): # dockerfile is redefined in application configuration dockerfile_src = conf.app_config()["jupyter.dockerfile_src"] input_docker_dir = _find_docker_src(input_docker_src, dockerfile_src) if isdir(input_docker_dir): dockerfile = input_docker_dir + "/Dockerfile" with TemporaryDirectory(prefix="aiscalator_") as tmp: stg = "jupyter.docker_image" allow = (conf.app_config_has(stg + ".allow_apt_repository") and conf.app_config()[stg + ".allow_apt_repository"]) if allow: dockerfile = _include_apt_repo(conf, dockerfile, join(tmp, "apt_repository")) allow = (conf.app_config_has(stg + ".allow_apt_packages") and conf.app_config()[stg + ".allow_apt_packages"]) if allow: dockerfile = _include_apt_package(conf, dockerfile, join(tmp, "apt_package")) allow = (conf.app_config_has(stg + ".allow_requirements") and conf.app_config()[stg + ".allow_requirements"]) if allow: dockerfile = _include_requirements(conf, dockerfile, join(tmp, "requirements"), dst) allow = (conf.app_config_has(stg + ".allow_lab_extensions") and conf.app_config()[stg + ".allow_lab_extensions"]) if allow: dockerfile = _include_lab_extensions( conf, dockerfile, join(tmp, "lab_extension")) copy(dockerfile, dst + '/Dockerfile') # copy the other files other than Dockerfile for file in listdir(input_docker_dir): if file != "Dockerfile": copy(join(input_docker_dir, file), join(dst, file))
def jupyter_new(name, path, output_format="hocon"): """ Starts a Jupyter Lab environment configured to edit a brand new step Parameters ---------- name : str name of the new step path : str path to where the new step files should be created output_format : str the format of the new configuration file to produce Returns ------- string Url of the running jupyter lab """ step_file = os.path.join(path, name, name) + '.conf' if os.path.dirname(step_file): makedirs(os.path.dirname(step_file), exist_ok=True) copy_replace(data_file("../config/template/step.conf"), step_file, pattern="Untitled", replace_value=name) if output_format != 'hocon': file = os.path.join(path, name, name) + '.' + output_format step_file = convert_to_format(step_file, output=file, output_format=output_format) notebook = os.path.join(path, name, 'notebook', name) + '.ipynb' if os.path.dirname(notebook): makedirs(os.path.dirname(notebook), exist_ok=True) copy_replace(data_file("../config/template/notebook.json"), notebook) open(os.path.join(path, name, "apt_repository.txt"), 'a').close() open(os.path.join(path, name, "apt_packages.txt"), 'a').close() open(os.path.join(path, name, "requirements.txt"), 'a').close() open(os.path.join(path, name, "lab_extensions.txt"), 'a').close() jupyter_edit(AiscalatorConfig(config=step_file, step_selection=name))
def validate_config(self): """ Check if all the fields in the reference config are defined in focused steps too. Otherwise raise an Exception (either pyhocon.ConfigMissingException or pyhocon.ConfigWrongTypeException) """ reference = data_file("../config/template/minimum_aiscalator.conf") ref = pyhocon.ConfigFactory.parse_file(reference) msg = "In Global Application Configuration file " _validate_configs(self._app_conf, ref, msg, missing_exception=True, type_mismatch_exception=True) reference = data_file("../config/template/aiscalator.conf") ref = pyhocon.ConfigFactory.parse_file(reference) msg = "In Global Application Configuration file " _validate_configs(self._app_conf, ref, msg, missing_exception=False, type_mismatch_exception=True) if self._step_name: reference = data_file("../config/template/minimum_step.conf") ref = pyhocon.ConfigFactory.parse_file(reference) msg = "in step named " + self._step_name _validate_configs(self._step, ref["steps"]["Untitled"], msg, missing_exception=True, type_mismatch_exception=True) reference = data_file("../config/template/step.conf") ref = pyhocon.ConfigFactory.parse_file(reference) msg = "in step named " + self._step_name _validate_configs(self._step, ref["steps"]["Untitled"], msg, missing_exception=False, type_mismatch_exception=True) if self._dag_name: reference = data_file("../config/template/minimum_dag.conf") ref = pyhocon.ConfigFactory.parse_file(reference) msg = "in dag named " + self._dag_name _validate_configs(self._dag, ref["dags"]["Untitled"], msg, missing_exception=True, type_mismatch_exception=True) reference = data_file("../config/template/step.conf") ref = pyhocon.ConfigFactory.parse_file(reference) msg = "in dag named " + self._dag_name _validate_configs(self._dag, ref["dags"]["Untitled"], msg, missing_exception=False, type_mismatch_exception=True)
def _find_docker_src(input_docker_src, dirs): """ Finds a pre-configured dockerfile package or return the default one. Parameters ---------- input_docker_src : str name of the dockerfile package to use dirs : list list of directories to check Returns ------- str path to the corresponding dockerfile package """ for src in dirs: if isfile(join(src, input_docker_src, "Dockerfile")): return src return utils.data_file("../config/docker/" + input_docker_src)
def airflow_setup(conf: AiscalatorConfig, config_home: str, workspace: list, append: bool = True): """ Setup the airflow configuration files and environment Parameters ---------- conf : AiscalatorConfig Configuration object for the application config_home : str path to the configuration home directory workspace : list List of path to directories to mount as volumes to airflow workers to use as workspaces append : bool flag to tell if workspace should be appended to the list in the config or replace it. """ logger = logging.getLogger(__name__) conf.validate_config() if config_home: makedirs(config_home, exist_ok=True) conf.redefine_app_config_home(config_home) ws_path = "airflow.setup.workspace_paths" if conf.app_config_has(ws_path): if append: workspace += conf.app_config()[ws_path] conf.redefine_airflow_workspaces(workspace) image = 'latest' if _docker_compose_grep(conf): image = _airflow_docker_build(conf) if not image: raise Exception("Failed to build docker image") src = utils.data_file("../config/docker/airflow/config/") dst = join(conf.app_config_home(), "config") logger.info("Generating a new configuration folder for aiscalator:\n\t%s", dst) makedirs(dst, exist_ok=True) makedirs(join(conf.app_config_home(), "dags"), exist_ok=True) makedirs(join(conf.app_config_home(), "pgdata"), exist_ok=True) makedirs(join(conf.app_config_home(), "workspace"), exist_ok=True) pattern = [ r"(\s+)# - workspace #", "aiscalator/airflow:latest", ] workspace = [] for line in conf.app_config()[ws_path]: host_src, container_dst = _split_workspace_string(conf, line) # bind the same path from host in the container (after creating a # symbolic link at container_dst path) workspace += [r"\1- " + host_src + ':' + host_src] workspace += [r"\1# - workspace #"] value = [ "\n".join(workspace), "aiscalator/airflow:" + image, ] for file in listdir(src): utils.copy_replace(join(src, file), join(dst, file), pattern=pattern, replace_value=value)