Example #1
0
def _airflow_docker_build(conf: AiscalatorConfig):
    """ Build the aiscalator/airflow image and return its ID."""
    logger = logging.getLogger(__name__)
    # TODO get airflow dockerfile from conf?
    conf.app_config_home()
    dockerfile_dir = utils.data_file("../config/docker/airflow")
    # TODO customize dockerfile with apt_packages, requirements etc
    docker_gid, docker_group = _find_docker_gid_group()
    commands = [
        "docker", "build",
        "--build-arg", "DOCKER_GID=" + str(docker_gid),
        "--build-arg", "DOCKER_GROUP=" + str(docker_group),
        "--rm", "-t", "aiscalator/airflow:latest",
        dockerfile_dir
    ]
    log = LogRegexAnalyzer(b'Successfully built ([a-zA-Z0-9]+)\n')
    logger.info("Running...: %s", " ".join(commands))
    utils.subprocess_run(commands, log_function=log.grep_logs)
    result = log.artifact()
    if result:
        # tag the image built with the sha256 of the dockerfile
        tag = utils.sha256(join(dockerfile_dir, 'Dockerfile'))[:12]
        commands = [
            "docker", "tag", result, "aiscalator/airflow:" + tag
        ]
        logger.info("Running...: %s", " ".join(commands))
        utils.subprocess_run(commands)
        return tag
    return None
Example #2
0
def _generate_global_config() -> str:
    """Generate a standard configuration file for the application in the
    user's home folder ~/.aiscalator/config/aiscalator.conf from the
    template file in aiscalator/config/template/aiscalator.conf
    """
    logger = getLogger(__name__)
    dst = os.path.join(os.path.expanduser("~"),
                       ".aiscalator/config/aiscalator.conf")
    logger.info("Generating a new configuration file for aiscalator:\n\t%s",
                dst)
    pattern = [
        "testUserID",
        "generation_date",
    ]
    replace_value = [
        generate_user_id(),
        '"' + str(datetime.utcnow().replace(tzinfo=timezone("UTC"))) +
        '" // in UTC timezone',
    ]
    dst_dir = os.path.dirname(dst)
    if dst_dir:
        os.makedirs(dst_dir, exist_ok=True)
    copy_replace(data_file("../config/template/aiscalator.conf"),
                 dst,
                 pattern=pattern,
                 replace_value=replace_value)
    open(os.path.join(dst_dir, "apt_packages.txt"), 'a').close()
    open(os.path.join(dst_dir, "requirements.txt"), 'a').close()
    open(os.path.join(dst_dir, "lab_extensions.txt"), 'a').close()
    return dst
Example #3
0
def _prepare_build_dir(conf, dst, input_docker_src):
    """
    Copies all necessary files for building docker images in a tmp folder,
    substituting some specific macros accordingly to handle customized
    images such as:
    - add-apt-repository
    - apt-install packages
    - pip install packages
    - jupyter lab extensions

    Parameters
    ----------
    conf : AiscalatorConfig
        Configuration object for this step
    dst : str
        temporary folder where to prepare the files
    input_docker_src : str
        name of the dockerfile package to use

    """
    input_docker_dir = utils.data_file("../config/docker/" + input_docker_src)

    if conf.app_config_has("jupyter.dockerfile_src"):
        # dockerfile is redefined in application configuration
        dockerfile_src = conf.app_config()["jupyter.dockerfile_src"]
        input_docker_dir = _find_docker_src(input_docker_src, dockerfile_src)

    if isdir(input_docker_dir):
        dockerfile = input_docker_dir + "/Dockerfile"
        with TemporaryDirectory(prefix="aiscalator_") as tmp:
            stg = "jupyter.docker_image"
            allow = (conf.app_config_has(stg + ".allow_apt_repository")
                     and conf.app_config()[stg + ".allow_apt_repository"])
            if allow:
                dockerfile = _include_apt_repo(conf, dockerfile,
                                               join(tmp, "apt_repository"))
            allow = (conf.app_config_has(stg + ".allow_apt_packages")
                     and conf.app_config()[stg + ".allow_apt_packages"])
            if allow:
                dockerfile = _include_apt_package(conf, dockerfile,
                                                  join(tmp, "apt_package"))
            allow = (conf.app_config_has(stg + ".allow_requirements")
                     and conf.app_config()[stg + ".allow_requirements"])
            if allow:
                dockerfile = _include_requirements(conf, dockerfile,
                                                   join(tmp, "requirements"),
                                                   dst)
            allow = (conf.app_config_has(stg + ".allow_lab_extensions")
                     and conf.app_config()[stg + ".allow_lab_extensions"])
            if allow:
                dockerfile = _include_lab_extensions(
                    conf, dockerfile, join(tmp, "lab_extension"))
            copy(dockerfile, dst + '/Dockerfile')
        # copy the other files other than Dockerfile
        for file in listdir(input_docker_dir):
            if file != "Dockerfile":
                copy(join(input_docker_dir, file), join(dst, file))
Example #4
0
def jupyter_new(name, path, output_format="hocon"):
    """
    Starts a Jupyter Lab environment configured to edit a brand new step

    Parameters
    ----------
    name : str
        name of the new step
    path : str
        path to where the new step files should be created
    output_format : str
        the format of the new configuration file to produce
    Returns
    -------
    string
        Url of the running jupyter lab
    """
    step_file = os.path.join(path, name, name) + '.conf'
    if os.path.dirname(step_file):
        makedirs(os.path.dirname(step_file), exist_ok=True)
    copy_replace(data_file("../config/template/step.conf"),
                 step_file,
                 pattern="Untitled",
                 replace_value=name)
    if output_format != 'hocon':
        file = os.path.join(path, name, name) + '.' + output_format
        step_file = convert_to_format(step_file,
                                      output=file,
                                      output_format=output_format)

    notebook = os.path.join(path, name, 'notebook', name) + '.ipynb'
    if os.path.dirname(notebook):
        makedirs(os.path.dirname(notebook), exist_ok=True)
    copy_replace(data_file("../config/template/notebook.json"), notebook)

    open(os.path.join(path, name, "apt_repository.txt"), 'a').close()
    open(os.path.join(path, name, "apt_packages.txt"), 'a').close()
    open(os.path.join(path, name, "requirements.txt"), 'a').close()
    open(os.path.join(path, name, "lab_extensions.txt"), 'a').close()
    jupyter_edit(AiscalatorConfig(config=step_file, step_selection=name))
Example #5
0
    def validate_config(self):
        """
        Check if all the fields in the reference config are
        defined in focused steps too. Otherwise
        raise an Exception (either pyhocon.ConfigMissingException
        or pyhocon.ConfigWrongTypeException)

        """
        reference = data_file("../config/template/minimum_aiscalator.conf")
        ref = pyhocon.ConfigFactory.parse_file(reference)
        msg = "In Global Application Configuration file "
        _validate_configs(self._app_conf,
                          ref,
                          msg,
                          missing_exception=True,
                          type_mismatch_exception=True)
        reference = data_file("../config/template/aiscalator.conf")
        ref = pyhocon.ConfigFactory.parse_file(reference)
        msg = "In Global Application Configuration file "
        _validate_configs(self._app_conf,
                          ref,
                          msg,
                          missing_exception=False,
                          type_mismatch_exception=True)
        if self._step_name:
            reference = data_file("../config/template/minimum_step.conf")
            ref = pyhocon.ConfigFactory.parse_file(reference)
            msg = "in step named " + self._step_name
            _validate_configs(self._step,
                              ref["steps"]["Untitled"],
                              msg,
                              missing_exception=True,
                              type_mismatch_exception=True)
            reference = data_file("../config/template/step.conf")
            ref = pyhocon.ConfigFactory.parse_file(reference)
            msg = "in step named " + self._step_name
            _validate_configs(self._step,
                              ref["steps"]["Untitled"],
                              msg,
                              missing_exception=False,
                              type_mismatch_exception=True)
        if self._dag_name:
            reference = data_file("../config/template/minimum_dag.conf")
            ref = pyhocon.ConfigFactory.parse_file(reference)
            msg = "in dag named " + self._dag_name
            _validate_configs(self._dag,
                              ref["dags"]["Untitled"],
                              msg,
                              missing_exception=True,
                              type_mismatch_exception=True)
            reference = data_file("../config/template/step.conf")
            ref = pyhocon.ConfigFactory.parse_file(reference)
            msg = "in dag named " + self._dag_name
            _validate_configs(self._dag,
                              ref["dags"]["Untitled"],
                              msg,
                              missing_exception=False,
                              type_mismatch_exception=True)
Example #6
0
def _find_docker_src(input_docker_src, dirs):
    """
    Finds a pre-configured dockerfile package or return the default one.

    Parameters
    ----------
    input_docker_src : str
        name of the dockerfile package to use
    dirs : list
        list of directories to check

    Returns
    -------
    str
        path to the corresponding dockerfile package
    """
    for src in dirs:
        if isfile(join(src, input_docker_src, "Dockerfile")):
            return src
    return utils.data_file("../config/docker/" + input_docker_src)
Example #7
0
def airflow_setup(conf: AiscalatorConfig,
                  config_home: str,
                  workspace: list,
                  append: bool = True):
    """
    Setup the airflow configuration files and environment

    Parameters
    ----------
    conf : AiscalatorConfig
        Configuration object for the application
    config_home : str
        path to the configuration home directory
    workspace : list
        List of path to directories to mount as volumes
        to airflow workers to use as workspaces
    append : bool
        flag to tell if workspace should be appended to
        the list in the config or replace it.

    """
    logger = logging.getLogger(__name__)
    conf.validate_config()
    if config_home:
        makedirs(config_home, exist_ok=True)
        conf.redefine_app_config_home(config_home)
    ws_path = "airflow.setup.workspace_paths"
    if conf.app_config_has(ws_path):
        if append:
            workspace += conf.app_config()[ws_path]
    conf.redefine_airflow_workspaces(workspace)
    image = 'latest'
    if _docker_compose_grep(conf):
        image = _airflow_docker_build(conf)
        if not image:
            raise Exception("Failed to build docker image")
    src = utils.data_file("../config/docker/airflow/config/")
    dst = join(conf.app_config_home(), "config")
    logger.info("Generating a new configuration folder for aiscalator:\n\t%s",
                dst)
    makedirs(dst, exist_ok=True)
    makedirs(join(conf.app_config_home(), "dags"), exist_ok=True)
    makedirs(join(conf.app_config_home(), "pgdata"), exist_ok=True)
    makedirs(join(conf.app_config_home(), "workspace"), exist_ok=True)
    pattern = [
        r"(\s+)# - workspace #",
        "aiscalator/airflow:latest",
    ]
    workspace = []
    for line in conf.app_config()[ws_path]:
        host_src, container_dst = _split_workspace_string(conf, line)
        # bind the same path from host in the container (after creating a
        # symbolic link at container_dst path)
        workspace += [r"\1- " + host_src + ':' + host_src]
    workspace += [r"\1# - workspace #"]
    value = [
        "\n".join(workspace),
        "aiscalator/airflow:" + image,
    ]
    for file in listdir(src):
        utils.copy_replace(join(src, file),
                           join(dst, file),
                           pattern=pattern,
                           replace_value=value)