예제 #1
def run_scancel(job_id: int, node: Node):
    """Cancels the job by running `scancel`.

        :param job_id: Job id to cancel

        :param node:   Node to run scancel on.


    node.run("scancel {}".format(int(job_id)))
예제 #2
def check_address_reachable_from_node(node: Node, ip_address: str, port: int):
    """Attempts to connect to this address from node using TCP.

        :param node:        Node to test the connection from.
        :param ip_address:  Ip address to connect to.
        :param port:        Port to connect to.

    node.run("python -c 'import socket; s=socket.socket("
             "socket.AF_INET, socket.SOCK_STREAM);"
             " s.connect((\"{ip_address}\", {port})); "
             " s.close()'".format(ip_address=ip_address, port=port))
def get_remote_environment_path(node: Node, path: Optional[str]) -> str:
    """Returns the remote path to config file, or Raises a RuntimeError.

        :param node: Remote node.

        :param path: Optional remote path.

    if path is None:
        path = node.run("printenv IDACT_CONFIG_PATH || echo {}".format(

    path = node.run("readlink -vf {}".format(path))
    if not path:
        raise RuntimeError("Unable to determine remote config path.")

    return path
예제 #4
def remove_runtime_dir(node: Node, runtime_dir: str):
    """Removes a runtime dir for deployment.

        Removes all files in it that do not start with a dot.
        Does not remove nested directories. On failure, produces a warning.

        :param node: Node to run commands on.

        :param runtime_dir: Path to the deployment dir.

        node.run("rm -f {runtime_dir}/*"
                 " && rmdir {runtime_dir}".format(runtime_dir=runtime_dir))
    except RuntimeError:
        log = get_logger(__name__)
        log.warning("Failed to remove runtime dir: '%s'.", runtime_dir)
        log.debug("Failed to remove runtime dir due to exception.", exc_info=1)
예제 #5
def get_free_remote_port(node: Node) -> int:
    """Returns a free remote port.

        Uses a Python snippet to determine a free port by binding a socket
        to port 0 and immediately releasing it.

        :param node: Node to find a port on.

    output = node.run("python -c 'import socket; s=socket.socket();"
                      " s.bind((str(), 0)); print(s.getsockname()[1]);"
                      " s.close()'")
    return int(output)
예제 #6
def extract_squeue_format_R(value: str, node: Node) -> Optional[List[str]]:
    """Extracts the job node list `%R` from `squeue` output, and calls `scontrol`
        to extract each hostname.

        :param value: Job node list in a compact format, e.g. `node[1-7]`.

        :param node:  Node to call scontrol on.
    if value.startswith('('):
        return None

    output = node.run("scontrol show hostname {}".format(shlex.quote(value)))
    hosts = [validate_hostname(i) for i in output.splitlines()]
    return hosts if hosts else None
예제 #7
def get_node_memory_usage(node: Node) -> bitmath.KiB:
    """Returns the sum of RES (Resident Memory Size) of all user processes
        on the node as reported by top."""
    # The commands below:
    # 1. Run top for one iteration.
    # 2. Skip the header and column names.
    # 3. Print only the column containing kibibytes used per process.
    # Values are not added up there, because awk does not support big integers.
    command = "top -b -n 1 -u $USER | awk 'NR>7 { print $6; }'"
    result = node.run(command)
    lines = result.splitlines()
    usage_by_process = map(parse_top_res_format_to_kib, lines)
    usage_total_int = sum(usage_by_process)
    usage_total_kibibytes = bitmath.KiB(usage_total_int)
    return usage_total_kibibytes
예제 #8
def ptree(pid: int, node: Node) -> List[int]:
    """Returns a list containing this PID and all its descendants.

        :param pid: Parent process pid.

        :param node: Node to run pgrep on.

    result = node.run("{list_descendants};"
                      " echo $(list_descendants {pid})".format(
                          list_descendants=LIST_DESCENDANTS, pid=pid))
    if not result:
        return [pid]
    descendant_pids = [
        int(descendant_pid) for descendant_pid in result.split(' ')

    return [pid] + descendant_pids
예제 #9
def run_squeue(node: Node) -> Dict[int, SqueueResult]:
    """Runs `squeue` and extracts job statuses as results.

        :param node: Node to run `squeue` on.

    now = utc_now()
    output = node.run("squeue"
                      " --user $USER"
                      " --format '%A|%D|%L|%r|%R|%T'")
    lines = output.splitlines()[1:]  # Ignore header.
    results = {squeue_result.job_id: squeue_result
               for squeue_result
               in [extract_squeue_line(now=now,
                   for line in lines]
               if squeue_result is not None}

    return results