Esempio n. 1
0
def slurm_nodes_status():
    """
    Function issued a sinfo command to get the reasons for down, drained, fail
    or failing state of a node.
    Command is sinfo -R --format='%100E|%19H|%30N|%t'
    Output to parse: Not responding |2020-07-25T22:39:23|skylake106|down*
    :return: dictionary where nodes names are the keys
    """
    cmd = ["sinfo", "-R", "--format='%100E|%19H|%30N|%t'"]
    run = " ".join(cmd)
    data, err = ssh_wrapper(run)
    if not data:
        debug("No data received, returning empty dictionary")
        return {}
    result = []
    for line in data:
        if ("REASON" or "TIMESTAMP" or "NODELIST" or "STATE") in line:
            debug("Skipping headline: %s" % line)
            continue
        info = line.split("|")
        if len(info) != 4:
            error("Wrong format: %s" % line)
            continue
        reason = info[0].strip()
        date = dt.strptime(info[1].strip(), "%Y-%m-%dT%H:%M:%S")
        node = info[2].strip()
        stat = info[3].strip()
        result.append({
            "date": date.strftime("%Y-%m-%d %X %Z"),
            "date_full": date.strftime("%c"),
            "reason": reason,
            "status": stat,
            "node": node
        })
    return result
Esempio n. 2
0
def web_admin_user_info():
    """
    Executes linux w command on a remote server and parse the result to be
    returned as JSON
    :return: List of dictionaries with user information like:
    {"username": login, "from": host, "process": cmd}
    """

    data = request.get_json()
    if not data:
        raise ValueError("Expecting application/json requests")

    server = str(data["server"]).strip()
    if not server:
        raise ValueError("Server is not defined")
    result, err = ssh_wrapper("PROCPS_USERLEN=32 PROCPS_FROMLEN=90 w -s -h",
                              host=server)
    if not result:
        raise ValueError("Error getting partition information: %s" % err)

    users = []
    for user in result:
        output = user.split()
        login = output[0].strip()
        host = output[2].strip()
        cmd = " ".join(output[4:]).strip()

        users.append({"username": login, "from": host, "process": cmd})
    return jsonify(data=users)
Esempio n. 3
0
def get_scratch():
    cmd = "beegfs-ctl --getquota --csv --uid %s" % current_user.login
    result, err = ssh_wrapper(cmd)
    if not result:
        raise ValueError("No scratch space info found")

    info = result[1]
    name, uid, used, total, files, hard = info.split(",")
    usage = "{0:.1%}".format(float(used) / float(total))
    free = float(total) - float(used)
    return {"usage": usage, "total": total, "used": used, "free": free,
            "used_label": bytes2human(used), "free_label": bytes2human(free)}
Esempio n. 4
0
def slurm_consumption_raw(name, start, finish):
    """
    Build a remote query to SLURM DB to obtain a project's CPU consumption.
    :param name: Account name, in out case it's a project's name
    :param start: starting date for accounting query
    :param finish: end date for accounting query should be now by default
    :return: Raw result of sreport command
    """
    cmd = ["sreport", "cluster", "AccountUtilizationByUser", "-t", "hours"]
    cmd += ["-nP", "format=Account,Login,Used", "Accounts=%s" % name]
    cmd += ["start=%s" % start, "end=%s" % finish]
    run = " ".join(cmd)
    data, err = ssh_wrapper(run)
    if not data:
        debug("No data received, nothing to return")
        return None, run
    debug("Got raw consumption values for project %s: %s" % (name, data))
    return data, run
Esempio n. 5
0
def slurm_partition_info():
    result, err = ssh_wrapper("sinfo -s")
    if not result:
        raise ValueError("Error getting partition information: %s" % err)

    partition = []
    for record in result:
        if "PARTITION" in record:
            continue
        name, avail, time, nodes, nodelist = record.split()
        name = name.strip()
        nodes = nodes.strip()
        allocated, idle, other, total = nodes.split("/")
        partition.append({
            "name": name,
            "allocated": allocated,
            "idle": idle,
            "other": other,
            "total": int(total)
        })
    return partition
Esempio n. 6
0
def get_server_info(server):
    tmp = {}
    result, err = ssh_wrapper("uptime && free -m", host=server)
    if not result:
        error("Error getting information from the remote server: %s" % err)
        return tmp

    uptime_data = memory_data = swap_data = ""
    for i in result:
        if "load average" in i:
            uptime_data = i
        elif "Mem" in i:
            memory_data = i
        elif "Swap" in i:
            swap_data = i

    uptime = parse_uptime(uptime_data)
    swap = parse_swap(swap_data)
    memory = parse_memory(memory_data)
    total = dict(list(memory.items()) + list(swap.items()))
    return {"server": server, "uptime": uptime, "mem": total}
Esempio n. 7
0
def get_project_conso(name, start, finish):
    cmd = ["sreport", "cluster", "AccountUtilizationByUser", "-t", "hours"]
    cmd += ["-nP", "format=Account,Login,Used", "Accounts=%s" % name]
    cmd += ["start=%s" % start, "end=%s" % finish]
    run = " ".join(cmd)
    data, err = ssh_wrapper(run)
    if not data:
        debug("No data received, nothing to return")
        return None
    result = {}
    for item in data:
        item = item.strip()
        items = item.split("|")
        if len(items) != 3:
            continue
        login = items[1]
        conso = items[2]
        if not login:
            result[name] = int(conso)
        else:
            result[login] = int(conso)
    debug("Project '%s' consumption: %s" % (name, result))
    return result
Esempio n. 8
0
def get_jobs(start, end, last=10):
    cmd = ["sacct", "-nPX",
           "--format=JobID,State,Start,Account,JobName,CPUTime,Partition",
           "--start=%s" % start, "--end=%s" % end, "-u", current_user.login,
           "|", "sort", "-n", "-r", "|", "head", "-%s" % last]
    run = " ".join(cmd)

    result, err = ssh_wrapper(run)

    if not result:
        raise ValueError("No jobs found from %s to %s" % (start, end))
    jobs = []
    for job in result:
        tmp = {}
        job = job.strip().split("|")
        tmp["id"] = job[0]
        tmp["project"] = job[3]
        tmp["state"] = job[1]
        tmp["partition"] = job[6]
        tmp["date"] = job[2]
        tmp["name"] = job[4]
        tmp["duration"] = job[5]
        jobs.append(tmp)
    return jobs
Esempio n. 9
0
def space_info():
    """
    Run df -h command on a remote server and return parsed information as a list
    of dictionaries. Dictionary format is:
    {"filesystem": ...,
        "size": ...,
        "used": ...,
        "available": ...,
        "use": ...,
        "mountpoint": ...}
    :return: List of dict
    """
    result, err = ssh_wrapper("df -h")
    if not result:
        raise ValueError("Error getting disk space information: %s" % err)

    space = []
    for record in result:
        if "Filesystem" in record:
            continue
        keywords = [
            "/home", "/save", "/trinity/shared", "/scratch", "/scratchfast",
            "/scratchw"
        ]
        filesystem, size, used, avail, use, mountpoint = record.split()
        if mountpoint.strip() not in keywords:
            continue
        space.append({
            "filesystem": filesystem.strip(),
            "size": size.strip(),
            "used": used.strip(),
            "available": avail.strip(),
            "use": use.strip(),
            "mountpoint": mountpoint.strip()
        })
    return space