def remove_serialized_deployment_definitions(node: NodeInternal):
    """Removes the deployment definitions file, if it exists.

        :param node: Node to run commands on.
    """
    path = get_deployment_definitions_file_path(node=node)
    node.run("rm -f '{path}'".format(path=path))
Beispiel #2
0
def install_compute_node_access_key(access_node: NodeInternal):
    """Adds the local key to authorized keys file for compute nodes,
        in case it was not added, which is a plausible scenario, when pulling
        deployments.

        :param access_node: Access node to install keys on.

    """
    access_node.run_impl("echo Testing connection...", install_keys=True)
Beispiel #3
0
def upload_entry_point(contents: str,
                       node: NodeInternal,
                       runtime_dir: Optional[str] = None) -> str:
    """Uploads the entry point script and returns its path.

        :param contents: Script contents.

        :param node: Node to upload the entry point to.

        :param runtime_dir: Runtime dir for deployment script.
                            Default: ~/.idact/entry_points.

    """
    log = get_logger(__name__)

    result = []

    entry_point_location = runtime_dir if runtime_dir else ENTRY_POINT_LOCATION

    @fabric.decorators.task
    def task():
        """Creates the entry point dir and file.
            Fails if it couldn't be created."""
        with capture_fabric_output_to_log():
            run("mkdir -p {entry_point_location}"
                " && chmod 700 {entry_point_location}".format(
                    entry_point_location=entry_point_location))

            file_name = get_random_file_name(
                length=ENTRY_POINT_FILE_NAME_LENGTH)
            file_path = run("echo {entry_point_location}/{file_name}".format(
                entry_point_location=entry_point_location,
                file_name=file_name))
            file_exists = exists(file_path)

        if file_exists:
            log.warning("Overwriting randomly named entry point file:"
                        " %s", file_path)

        with stage_debug(log, "Uploading the entry point script."):
            with capture_fabric_output_to_log():
                real_path = run("echo {file_path}".format(file_path=file_path))
                file = BytesIO(contents.encode('ascii'))
                put(file, real_path, mode=0o700)

        with stage_debug(log, "Checking the entry point script was uploaded."):
            with capture_fabric_output_to_log():
                run("cat {real_path} > /dev/null".format(real_path=real_path))
        result.append(real_path)

    node.run_task(task)

    return result[0]
Beispiel #4
0
def deploy_generic(node: NodeInternal,
                   script_contents: str,
                   runtime_dir: str) -> GenericDeployment:
    """Deploys a program on the node.

        :param node: Node to deploy the program on.

        :param script_contents: Deployment script contents.

        :param runtime_dir: Runtime dir to remove.

    """
    log = get_logger(__name__)
    with stage_debug(log, "Uploading entry point."):
        script_path = upload_entry_point(contents=script_contents,
                                         node=node,
                                         runtime_dir=runtime_dir)

    with stage_debug(log, "Executing the deployment command."):
        output = node.run(get_deployment_command(
            script_path=script_path))

    lines = output.splitlines()
    pid = int(lines[0])
    return GenericDeployment(node=node,
                             pid=pid,
                             runtime_dir=runtime_dir)
def serialize_deployment_definitions_to_cluster(
        node: NodeInternal, deployments: DeploymentDefinitions):  # noqa
    """Uploads deployment definitions to the cluster, replacing
        any definitions file already there.

        :param node: Node to serialize definitions to.

        :param deployments: Deployments to upload.

    """
    log = get_logger(__name__)
    with stage_debug(log, "Serializing deployment definitions to cluster."):
        serialized = deployments.serialize()
        file_contents = json.dumps(serialized, sort_keys=True, indent=4)
        parent_path = get_deployment_definitions_parent_path(node=node)
        node.run("mkdir -p {parent_path}"
                 " && chmod 700 {parent_path}".format(parent_path=parent_path))
        path = get_deployment_definitions_file_path(node=node)
        put_file_on_node(node=node, remote_path=path, contents=file_contents)
def create_runtime_dir(node: NodeInternal) -> str:
    """Creates and returns the path to a random dir on node.

        The created dir is a subdir of `~/.idact/runtime`, see
        :attr:`.DEPLOYMENT_RUNTIME_DIR_FORMAT`.

        :param node: Node to create a runtime dir on.

    """
    deployment_id = get_random_file_name(length=DEPLOYMENT_ID_LENGTH)
    formatted_runtime_dir = DEPLOYMENT_RUNTIME_DIR_FORMAT.format(
        deployment_id=deployment_id)

    node.run("mkdir -p {formatted_runtime_dir}"
             " && chmod 700 {formatted_runtime_dir}".format(
                 formatted_runtime_dir=formatted_runtime_dir))
    runtime_dir = node.run("readlink -vf {}".format(formatted_runtime_dir))

    return runtime_dir
Beispiel #7
0
def put_file_on_node(node: NodeInternal, remote_path: str, contents: str):
    """Runs a task on the node that uploads a file.

        :param node: Node to upload the file to.

        :param remote_path: Remote file path.

        :param contents: File contents.

    """
    log = get_logger(__name__)
    with stage_debug(log, "Putting file on node %s: %s", node.host,
                     remote_path):

        @fabric.decorators.task
        def file_upload_task():
            with capture_fabric_output_to_log():
                put_remote_file(remote_path=remote_path, contents=contents)

        node.run_task(task=file_upload_task)
Beispiel #8
0
def get_scratch_from_environment_variable(node: NodeInternal) -> str:
    """Returns the scratch path by getting the value of the environment
        variable defined in config.

        :param node: Node to get the environment variable from.

    """
    assert node.config.scratch.startswith('$')
    variable_name = node.config.scratch[1:]
    variable_name_quoted = shlex.quote(variable_name)
    scratch = node.run("printenv {}".format(variable_name_quoted))
    return scratch
Beispiel #9
0
def create_scratch_subdir(node: NodeInternal) -> str:
    """Creates and returns the path to a scratch subdirectory on this node.

        The created dir is a subdirectory of :meth:`.ClusterConfig.scratch`.
        It is not unique to a deployment, Dask takes care of that.

        :param node: Node to create the scratch subdir on.

    """
    if node.config.scratch.startswith('/'):
        scratch = node.config.scratch
    else:
        scratch = get_scratch_from_environment_variable(node=node)

    scratch_subdir = shlex.quote("{scratch}/{subdir}".format(
        scratch=scratch, subdir=SCRATCH_SUBDIR))
    node.run(
        "mkdir -p {scratch_subdir}"
        " && chmod 700 {scratch_subdir}".format(scratch_subdir=scratch_subdir))
    scratch_subdir_realpath = node.run(
        "readlink -vf {}".format(scratch_subdir))

    return scratch_subdir_realpath
Beispiel #10
0
def file_exists_on_node(node: NodeInternal, path: str):
    """Returns True, if the file exists on the node.

        :param node: Node to run commands on.

        :param path: File path.

    """
    @fabric.decorators.task
    def task():
        with capture_fabric_output_to_log():
            return exists(path)

    return node.run_task(task=task)
Beispiel #11
0
def run_sbatch(args: SbatchArguments, node: NodeInternal) -> Tuple[int, str]:
    """Runs sbatch on the given node. Returns the job id and the path
        to the entry point script.

        :param args: Arguments to use for allocation.

        :param node: Node to run sbatch on.

    """
    log = get_logger(__name__)

    request, entry_point_script_path = prepare_sbatch_allocation_request(
        args=args, config=node.config, node=node)
    log.debug("Allocation request: %s", request)
    output = node.run_impl(request, install_keys=True)
    job_id = int(output.split(';')[0])

    return job_id, entry_point_script_path
Beispiel #12
0
def get_file_from_node(node: NodeInternal, remote_path: str) -> str:
    """Runs a task on the node that downloads a file and returns its contents.

        :param node: Node to download the file from.

        :param remote_path: Remote file path.

    """
    log = get_logger(__name__)

    @fabric.decorators.task
    def file_upload_task():
        with capture_fabric_output_to_log():
            return get_remote_file(remote_path=remote_path)

    with stage_debug(log, "Getting file from node %s: %s", node.host,
                     remote_path):
        return node.run_task(task=file_upload_task)
Beispiel #13
0
def deploy_dask_worker(node: NodeInternal,
                       scheduler: DaskSchedulerDeployment) -> DaskWorkerDeployment:  # noqa, pylint: disable=line-too-long
    """Deploys a Dask worker on the node.

        :param node: Node to deploy on.

        :param scheduler: Already deployed scheduler.

    """
    log = get_logger(__name__)

    with ExitStack() as stack:
        with stage_debug(log, "Creating a runtime dir."):
            runtime_dir = create_runtime_dir(node=node)
            stack.enter_context(
                remove_runtime_dir_on_failure(node=node,
                                              runtime_dir=runtime_dir))

        with stage_debug(log, "Obtaining a free remote port."):
            bokeh_port = get_free_remote_port(node=node)

        with stage_debug(log, "Creating a scratch subdirectory."):
            scratch_subdir = create_scratch_subdir(node=node)

        log_file = create_log_file(node=node, runtime_dir=runtime_dir)

        script_contents = get_worker_deployment_script(
            scheduler_address=scheduler.address,
            bokeh_port=bokeh_port,
            scratch_subdir=scratch_subdir,
            cores=node.cores,
            memory_limit=node.memory,
            log_file=log_file,
            config=node.config)

        log.debug("Deployment script contents: %s", script_contents)

        with stage_debug(log, "Deploying script."):
            deployment = deploy_generic(node=node,
                                        script_contents=script_contents,
                                        runtime_dir=runtime_dir)
            stack.enter_context(cancel_on_failure(deployment))

        @fabric.decorators.task
        def validate_worker_started_from_log():
            """Checks that the worker has started correctly based on
                the log file."""
            with capture_fabric_output_to_log():
                output = get_remote_file(remote_path=log_file)
            log.debug("Log file: %s", output)
            validate_worker_started(output=output)

        with stage_debug(log, "Checking if worker started."):
            retry_with_config(
                lambda: node.run_task(task=validate_worker_started_from_log),
                name=Retry.CHECK_WORKER_STARTED,
                config=node.config)

        with stage_debug(log, "Opening a tunnel to bokeh diagnostics server."):
            bokeh_tunnel = node.tunnel(here=bokeh_port, there=bokeh_port)
            stack.enter_context(close_tunnel_on_failure(bokeh_tunnel))
            log.debug("Diagnostics local port: %d", bokeh_tunnel.here)

        return DaskWorkerDeployment(deployment=deployment,
                                    bokeh_tunnel=bokeh_tunnel)
Beispiel #14
0
def deploy_jupyter(node: NodeInternal, local_port: int) -> JupyterDeployment:
    """Deploys a Jupyter Notebook server on the node, and creates a tunnel
        to a local port.

        :param node: Node to deploy Jupyter Notebook on.

        :param local_port: Local tunnel binding port.

    """
    log = get_logger(__name__)

    with stage_debug(log, "Creating a runtime dir."):
        runtime_dir = create_runtime_dir(node=node)

    with stage_debug(log, "Obtaining a free remote port."):
        remote_port = get_free_remote_port(node=node)

    if node.config.use_jupyter_lab:
        jupyter_version = 'lab'
    else:
        jupyter_version = 'notebook'

    deployment_commands = [
        'export JUPYTER_RUNTIME_DIR="{runtime_dir}"'.format(
            runtime_dir=runtime_dir),
        get_command_to_append_local_bin()
    ]

    log_file = create_log_file(node=node, runtime_dir=runtime_dir)

    deployment_commands.append('jupyter {jupyter_version}'
                               ' --ip 127.0.0.1'
                               ' --port "{remote_port}"'
                               ' --no-browser > {log_file} 2>&1'.format(
                                   jupyter_version=jupyter_version,
                                   remote_port=remote_port,
                                   log_file=log_file))

    script_contents = get_deployment_script_contents(
        deployment_commands=deployment_commands,
        setup_actions=node.config.setup_actions.jupyter)

    log.debug("Deployment script contents: %s", script_contents)
    with stage_debug(log, "Deploying script."):
        deployment = deploy_generic(node=node,
                                    script_contents=script_contents,
                                    runtime_dir=runtime_dir)

    with cancel_on_failure(deployment):

        @fabric.decorators.task
        def load_nbserver_json():
            """Loads notebook parameters from a json file."""
            with capture_fabric_output_to_log():
                with cd(runtime_dir):
                    nbserver_json_path = run(
                        "readlink -vf $PWD/nbserver-*.json").splitlines()[0]
                run("cat '{log_file}' || exit 0".format(log_file=log_file))
                run("cat '{nbserver_json_path}' > /dev/null".format(
                    nbserver_json_path=nbserver_json_path))
                nbserver_json_str = get_remote_file(nbserver_json_path)
                nbserver_json = json.loads(nbserver_json_str)
                return int(nbserver_json['port']), nbserver_json['token']

        with stage_debug(log, "Obtaining info about notebook from json file."):
            actual_port, token = retry_with_config(
                lambda: node.run_task(task=load_nbserver_json),
                name=Retry.JUPYTER_JSON,
                config=node.config)

        with stage_debug(log, "Opening a tunnel to notebook."):
            tunnel = node.tunnel(there=actual_port, here=local_port)

        return JupyterDeploymentImpl(deployment=deployment,
                                     tunnel=tunnel,
                                     token=token)
Beispiel #15
0
def deploy_dask_scheduler(node: NodeInternal) -> DaskSchedulerDeployment:
    """Deploys a Dask scheduler on the node.

        :param node: Node to deploy on.

    """
    log = get_logger(__name__)

    with ExitStack() as stack:
        with stage_debug(log, "Creating a runtime dir."):
            runtime_dir = create_runtime_dir(node=node)
            stack.enter_context(
                remove_runtime_dir_on_failure(node=node,
                                              runtime_dir=runtime_dir))

        with stage_debug(log, "Obtaining free remote ports."):
            remote_port, bokeh_port = get_free_remote_ports(count=2, node=node)

        with stage_debug(log, "Creating a scratch subdirectory."):
            scratch_subdir = create_scratch_subdir(node=node)

        log_file = create_log_file(node=node, runtime_dir=runtime_dir)

        script_contents = get_scheduler_deployment_script(
            remote_port=remote_port,
            bokeh_port=bokeh_port,
            scratch_subdir=scratch_subdir,
            log_file=log_file,
            config=node.config)

        log.debug("Deployment script contents: %s", script_contents)

        with stage_debug(log, "Deploying script."):
            deployment = deploy_generic(node=node,
                                        script_contents=script_contents,
                                        runtime_dir=runtime_dir)
            stack.enter_context(cancel_on_failure(deployment))

        @fabric.decorators.task
        def extract_address_from_log() -> str:
            """Extracts scheduler address from a log file."""
            with capture_fabric_output_to_log():
                output = get_remote_file(remote_path=log_file)
            log.debug("Log file: %s", output)
            return extract_address_from_output(output=output)

        with stage_debug(log, "Obtaining scheduler address."):
            address = retry_with_config(
                lambda: node.run_task(task=extract_address_from_log),
                name=Retry.GET_SCHEDULER_ADDRESS,
                config=node.config)

        with stage_debug(log, "Opening a tunnel to scheduler."):
            tunnel = node.tunnel(here=remote_port, there=remote_port)
            stack.enter_context(close_tunnel_on_failure(tunnel))
            log.debug("Scheduler local port: %d", tunnel.here)

        with stage_debug(log, "Opening a tunnel to bokeh diagnostics server."):
            bokeh_tunnel = node.tunnel(here=bokeh_port, there=bokeh_port)
            stack.enter_context(close_tunnel_on_failure(bokeh_tunnel))
            log.debug("Diagnostics local port: %d", bokeh_tunnel.here)

        return DaskSchedulerDeployment(deployment=deployment,
                                       tunnel=tunnel,
                                       bokeh_tunnel=bokeh_tunnel,
                                       address=address)