예제 #1
0
def _airflow_docker_build(conf: AiscalatorConfig):
    """ Build the aiscalator/airflow image and return its ID."""
    logger = logging.getLogger(__name__)
    # TODO get airflow dockerfile from conf?
    conf.app_config_home()
    dockerfile_dir = utils.data_file("../config/docker/airflow")
    # TODO customize dockerfile with apt_packages, requirements etc
    docker_gid, docker_group = _find_docker_gid_group()
    commands = [
        "docker", "build",
        "--build-arg", "DOCKER_GID=" + str(docker_gid),
        "--build-arg", "DOCKER_GROUP=" + str(docker_group),
        "--rm", "-t", "aiscalator/airflow:latest",
        dockerfile_dir
    ]
    log = LogRegexAnalyzer(b'Successfully built ([a-zA-Z0-9]+)\n')
    logger.info("Running...: %s", " ".join(commands))
    utils.subprocess_run(commands, log_function=log.grep_logs)
    result = log.artifact()
    if result:
        # tag the image built with the sha256 of the dockerfile
        tag = utils.sha256(join(dockerfile_dir, 'Dockerfile'))[:12]
        commands = [
            "docker", "tag", result, "aiscalator/airflow:" + tag
        ]
        logger.info("Running...: %s", " ".join(commands))
        utils.subprocess_run(commands)
        return tag
    return None
예제 #2
0
def _docker_compose(conf: AiscalatorConfig,
                    extra_commands: list):
    """
    Run the docker-compose command

    Parameters
    ----------
    conf : AiscalatorConfig
        Configuration object for the application
    extra_commands : list
        list of sub-commands to run in docker-compose

    """
    logger = logging.getLogger(__name__)
    conf.validate_config()
    dockerfile = join(conf.app_config_home(), "config",
                      conf.airflow_docker_compose_file())
    commands = ["docker-compose"]
    # Prepare a temp folder to run the command from
    with TemporaryDirectory(prefix="aiscalator_") as tmp:
        with open(join(tmp, ".env"), mode="w") as env_file:
            # concatenate all the env files into one
            for env in conf.user_env_file(conf.dag_field("definition.env")):
                if isfile(env):
                    with open(env, mode="r") as file:
                        for line in file:
                            env_file.write(line)
        utils.copy_replace(join(tmp, ".env"),
                           join(dirname(dockerfile), ".env"))
    commands += ["-f", dockerfile] + extra_commands
    logger.info("Running...: %s", " ".join(commands))
    utils.subprocess_run(commands, no_redirect=True)
예제 #3
0
def _docker_compose_grep(conf: AiscalatorConfig):
    """
    Checks if the docker-compose file is using the
    aiscalator/airflow docker image. In which case,
    we need to make sure that image is properly built
    and available.

    Parameters
    ----------
    conf : AiscalatorConfig
        Configuration object for the application

    Returns
    -------
    bool
        Returns if aiscalator/airflow docker image is
        needed and should be built.
    """
    docker_compose_file = join(conf.app_config_home(), "config",
                               conf.airflow_docker_compose_file())
    pattern = re.compile(r"\s+image:\s+aiscalator/airflow")
    try:
        with open(docker_compose_file, "r") as file:
            for line in file:
                if re.match(pattern, line):
                    # docker compose needs the image
                    return True
    except FileNotFoundError:
        # no docker compose, default will need the image
        return True
    return False
예제 #4
0
def airflow_push(conf: AiscalatorConfig):
    """
    Starts an airflow environment

    Parameters
    ----------
    conf : AiscalatorConfig
        Configuration object for the application

    """
    # TODO to implement
    logging.error("Not implemented yet %s", conf.app_config_home())
예제 #5
0
def _split_workspace_string(conf: AiscalatorConfig, workspace):
    """
    Interprets the workspace string and split into src and dst
    paths:
    - The src is a path on the host machine.
    - The dst is a path on the container.
    In case, the workspace string doesn't specify both paths
    separated by a ':', this function will automatically mount it
    in the $app_config_home_directory/work/ folder creating a
    symbolic link with the same basename as the workspace.

    Parameters
    ----------
    conf : AiscalatorConfig
        Configuration object for the step
    workspace : str
        the workspace string to interpret
    Returns
    -------
    (str, str)
        A tuple with both src and dst paths
    """
    logger = logging.getLogger(__name__)
    root_dir = conf.app_config_home()
    if workspace.strip():
        if ':' in workspace:
            src = abspath(workspace.split(':')[0])
            if not src.startswith('/'):
                src = abspath(join(root_dir, src))
            dst = workspace.split(':')[1]
            if not dst.startswith('/'):
                dst = abspath(join(root_dir, dst))
        else:
            src = abspath(workspace)
            if not src.startswith('/'):
                src = abspath(join(root_dir, src))
            # in the workspace special folder, we'll create the same named
            # folder (basename) that is actually a link back to the one
            # we want to include in our workspace.
            dst = join("workspace", basename(workspace.strip('/')))
            link = join(root_dir, dst)
            if realpath(src) != realpath(link):
                if exists(link) and islink(link):
                    logger.warning("Removing an existing symbolic"
                                   " link %s -> %s",
                                   link, realpath(link))
                    remove(link)
                if not exists(link):
                    logger.info("Creating a symbolic link %s -> %s", link, src)
                    symlink(src, link)
            dst = "/usr/local/airflow/" + dst
        return src, dst
    return None, None
예제 #6
0
def airflow_setup(conf: AiscalatorConfig,
                  config_home: str,
                  workspace: list,
                  append: bool = True):
    """
    Setup the airflow configuration files and environment

    Parameters
    ----------
    conf : AiscalatorConfig
        Configuration object for the application
    config_home : str
        path to the configuration home directory
    workspace : list
        List of path to directories to mount as volumes
        to airflow workers to use as workspaces
    append : bool
        flag to tell if workspace should be appended to
        the list in the config or replace it.

    """
    logger = logging.getLogger(__name__)
    conf.validate_config()
    if config_home:
        makedirs(config_home, exist_ok=True)
        conf.redefine_app_config_home(config_home)
    ws_path = "airflow.setup.workspace_paths"
    if conf.app_config_has(ws_path):
        if append:
            workspace += conf.app_config()[ws_path]
    conf.redefine_airflow_workspaces(workspace)
    image = 'latest'
    if _docker_compose_grep(conf):
        image = _airflow_docker_build(conf)
        if not image:
            raise Exception("Failed to build docker image")
    src = utils.data_file("../config/docker/airflow/config/")
    dst = join(conf.app_config_home(), "config")
    logger.info("Generating a new configuration folder for aiscalator:\n\t%s",
                dst)
    makedirs(dst, exist_ok=True)
    makedirs(join(conf.app_config_home(), "dags"), exist_ok=True)
    makedirs(join(conf.app_config_home(), "pgdata"), exist_ok=True)
    makedirs(join(conf.app_config_home(), "workspace"), exist_ok=True)
    pattern = [
        r"(\s+)# - workspace #",
        "aiscalator/airflow:latest",
    ]
    workspace = []
    for line in conf.app_config()[ws_path]:
        host_src, container_dst = _split_workspace_string(conf, line)
        # bind the same path from host in the container (after creating a
        # symbolic link at container_dst path)
        workspace += [r"\1- " + host_src + ':' + host_src]
    workspace += [r"\1# - workspace #"]
    value = [
        "\n".join(workspace),
        "aiscalator/airflow:" + image,
    ]
    for file in listdir(src):
        utils.copy_replace(join(src, file),
                           join(dst, file),
                           pattern=pattern,
                           replace_value=value)
예제 #7
0
def _prepare_docker_env(conf: AiscalatorConfig, program, port):
    """
    Assembles the list of commands to execute a docker run call

    When calling "docker run ...", this function also adds a set of
    additional parameters to mount the proper volumes and expose the
    correct environment for the call in the docker image.

    Parameters
    ----------
    conf : AiscalatorConfig
        Configuration object for the step
    program : List
        the rest of the commands to execute as part of
        the docker run call

    Returns
    -------
    List
        The full Array of Strings representing the commands to execute
        in the docker run call
    """
    logger = logging.getLogger(__name__)
    commands = [
        "docker", "run", "--name", conf.dag_container_name() + "_edit",
        "--rm",
        # TODO improve port publishing
        "-p", str(port) + ":8888",
        "-p", "18080:8080",
    ]
    for env in conf.user_env_file(conf.dag_field("definition.env")):
        if isfile(env):
            commands += ["--env-file", env]
    commands += [
        "--mount", "type=bind,source=/var/run/docker.sock,"
                   "target=/var/run/docker.sock",
    ]
    code_path = conf.dag_file_path('definition.code_path')
    notebook, _ = utils.notebook_file(code_path)
    utils.check_notebook_dir(logger, notebook)
    commands += [
        "--mount", "type=bind,source=" + dirname(notebook) +
        ",target=/usr/local/airflow/work/",
    ]
    if conf.config_path() is not None:
        commands += [
            "--mount",
            "type=bind,source=" + abspath(conf.config_path()) +
            ",target="
            "/usr/local/airflow/" + basename(conf.config_path()),
        ]
    workspace = []
    ws_path = "airflow.setup.workspace_paths"
    if conf.app_config_has(ws_path):
        ws_home = join(conf.app_config_home(),
                       "workspace")
        makedirs(ws_home, exist_ok=True)
        for folder in conf.app_config()[ws_path]:
            src, dst = _split_workspace_string(conf, folder)
            # bind the same path from host in the container (after creating
            # a symbolic link at dst path)
            workspace += [src + ":" + src]
            commands += [
                "--mount", "type=bind,source=" + src +
                ",target=" + src
            ]
        commands += [
            "--mount", "type=bind,source=" + ws_home +
            ",target=/usr/local/airflow/workspace/"
        ]
    commands += program + workspace
    return commands