Beispiel #1
0
def get_nodes_dict(xml=None):
    """Similar to derived getnodes from vsc.pbs.interface.get_nodes_dict

    returns a dict of nodes, with a 'status' field which is a dict of statusses
    """
    if xml is None:
        cmd = "mdiag -n --format=xml"
        err, xml = RunTimeout.run(cmd.split(), timeout=60)
        if err:
            _log.error("Problem occurred running %s: %s (%s)" %
                       (cmd, err, xml))
            return None

    # build tree
    tree = etree.fromstring(xml)
    nodes = {}
    for node in tree:
        # <node AVLCLASS="[bshort][debug][short][long][special][workshop]"
        # CFGCLASS="[bshort][debug][short][long][special][workshop]"
        # FEATURES="hadoop,collectl" FLAGS="rmdetected" JOBLIST="3956525"
        # LASTUPDATETIME="1363206409" LOAD="8.160000" MAXJOB="0"
        # MAXJOBPERUSER="******" MAXLOAD="0.000000" NODEID="node001.gengar.gent.vsc"
        # NODEINDEX="1" NODESTATE="Busy" OS="linux" OSLIST="linux" PARTITION="gengar"
        # PRIORITY="0" PROCSPEED="0" RADISK="92194" RAMEM="16053" RAPROC="0" RASWAP="34219"
        # RCDISK="92381" RCMEM="16053" RCPROC="8" RCSWAP="36533" RESCOUNT="1"
        # RMACCESSLIST="gengar" RSVLIST="3956525" SPEED="1.000000" STATACTIVETIME="24357970"
        # STATMODIFYTIME="1363076905" STATTOTALTIME="25499884" STATUPTIME="24971920">
        try:
            host = node.get("NODEID")
            nodes[host] = {}
            nodes[host]['xml'] = node.items()
            states = MOAB_PBS_NODEMAP[node.get("NODESTATE").lower()]
            derived = {
                'states': states,
                'state': states[0],
                'size': str2byte(node.get("RCDISK") + "mb"),
                'physmem': str2byte(node.get("RCMEM") + "mb"),
                'np': int(node.get("RCPROC")),
            }
        except (TypeError, AttributeError) as e:
            del nodes[host]
            node_txt = etree.tostring(node, pretty_print=True)
            if host in ('localhost', ):
                _log.debug("Skipping %s (%s)" % (host, node_txt))
                continue
            else:
                raise type(e)("%s for node %s" % (e, node_txt))

        # add state mapping to derived
        pbs_nodes.make_state_map(derived)

        nodes[host]['derived'] = derived

    return nodes
Beispiel #2
0
def get_nodes_dict(xml=None):
    """Similar to derived getnodes from vsc.pbs.interface.get_nodes_dict

    returns a dict of nodes, with a 'status' field which is a dict of statusses
    """
    if xml is None:
        cmd = "mdiag -n --format=xml"
        err, xml = RunTimeout.run(cmd.split(), timeout=60)
        if err:
            _log.error("Problem occurred running %s: %s (%s)" % (cmd, err, xml))
            return None

    # build tree
    tree = etree.fromstring(xml)
    nodes = {}
    for node in tree:
        # <node AVLCLASS="[bshort][debug][short][long][special][workshop]"
        # CFGCLASS="[bshort][debug][short][long][special][workshop]"
        # FEATURES="hadoop,collectl" FLAGS="rmdetected" JOBLIST="3956525"
        # LASTUPDATETIME="1363206409" LOAD="8.160000" MAXJOB="0"
        # MAXJOBPERUSER="******" MAXLOAD="0.000000" NODEID="node001.gengar.gent.vsc"
        # NODEINDEX="1" NODESTATE="Busy" OS="linux" OSLIST="linux" PARTITION="gengar"
        # PRIORITY="0" PROCSPEED="0" RADISK="92194" RAMEM="16053" RAPROC="0" RASWAP="34219"
        # RCDISK="92381" RCMEM="16053" RCPROC="8" RCSWAP="36533" RESCOUNT="1"
        # RMACCESSLIST="gengar" RSVLIST="3956525" SPEED="1.000000" STATACTIVETIME="24357970"
        # STATMODIFYTIME="1363076905" STATTOTALTIME="25499884" STATUPTIME="24971920">
        try:
            host = node.get("NODEID")
            nodes[host] = {}
            nodes[host]['xml'] = node.items()
            states = MOAB_PBS_NODEMAP[node.get("NODESTATE").lower()]
            derived = {
                'states': states,
                'state': states[0],
                'size': str2byte(node.get("RCDISK") + "mb"),
                'physmem': str2byte(node.get("RCMEM") + "mb"),
                'np': int(node.get("RCPROC")),
                }
        except (TypeError, AttributeError) as e:
            del nodes[host]
            node_txt = etree.tostring(node, pretty_print=True)
            if host in ('localhost', ):
                _log.debug("Skipping %s (%s)" % (host, node_txt))
                continue
            else:
                raise type(e)("%s for node %s" % (e, node_txt))

        # add state mapping to derived
        pbs_nodes.make_state_map(derived)

        nodes[host]['derived'] = derived


    return nodes
Beispiel #3
0
def get_nodes_dict():
    """Get the pbs_nodes equivalent info as dict"""
    query = get_query()
    node_states = query.getnodes([])
    for name, full_state in node_states.items():
        # just add states
        states = full_state[ATTR_STATE]
        if ND_free in states and ATTR_JOBS in full_state:
            _log.debug('Added free_and_job node %s' % (name))
            states.insert(0, ND_free_and_job)
        if ND_free in states and ATTR_JOBS not in full_state:
            _log.debug('Append idle node %s' % (name))
            states.append(ND_idle)  # append it, not insert
        if ND_offline in states and ATTR_JOBS not in full_state:
            _log.debug('Append idle node %s' % (name))
            states.append(ND_idle)

        if ATTR_ERROR in full_state:
            _log.debug('Added error node %s' % (name))
            states.insert(0, ND_error)
        if ND_down in states and ATTR_ERROR in full_state:
            _log.debug('Added down_on_error node %s' % (name))
            states.insert(0, ND_down_on_error)

        # extend the node dict with derived dict (for convenience)
        derived = {}
        if ATTR_JOBS in full_state:
            jobs = full_state.get_jobs()
            if not all(JOBID_REG.search(x.strip()) for x in jobs):
                _log.debug('Added bad node %s for jobs %s' % (name, jobs))
                states.insert(0, ND_bad)
            derived[ATTR_JOBS] = jobs

        derived[ATTR_STATES] = [str(x) for x in states]
        make_state_map(derived)

        if ATTR_NP in full_state:
            derived[ATTR_NP] = int(full_state[ATTR_NP][0])
        if ATTR_STATUS in full_state:
            status = full_state[ATTR_STATUS]
            for prop in ['physmem', 'totmem', 'size']:
                if prop not in status:
                    continue
                val = status.get(prop)[0]
                if prop in ('size',):
                    # 'size': ['539214180kb:539416640kb']
                    # - use 2nd field
                    val = val.split(':')[1]
                derived[prop] = str2byte(val)

        full_state['derived'] = derived
        _log.debug("node %s derived data %s " % (name, derived))

    return node_states
Beispiel #4
0
def get_nodes_dict():
    """Get the pbs_nodes equivalent info as dict"""
    query = get_query()
    node_states = query.getnodes([])
    for name, full_state in node_states.items():
        # just add states
        states = full_state[ATTR_STATE]
        if ND_free in states and ATTR_JOBS in full_state:
            _log.debug('Added free_and_job node %s' % (name))
            states.insert(0, ND_free_and_job)
        if ND_free in states and ATTR_JOBS not in full_state:
            _log.debug('Append idle node %s' % (name))
            states.append(ND_idle)  # append it, not insert
        if ND_offline in states and ATTR_JOBS not in full_state:
            _log.debug('Append idle node %s' % (name))
            states.append(ND_idle)

        if ATTR_ERROR in full_state:
            _log.debug('Added error node %s' % (name))
            states.insert(0, ND_error)
        if ND_down in states and ATTR_ERROR in full_state:
            _log.debug('Added down_on_error node %s' % (name))
            states.insert(0, ND_down_on_error)

        # extend the node dict with derived dict (for convenience)
        derived = {}
        if ATTR_JOBS in full_state:
            jobs = full_state.get_jobs()
            if not all(JOBID_REG.search(x.strip()) for x in jobs):
                _log.debug('Added bad node %s for jobs %s' % (name, jobs))
                states.insert(0, ND_bad)
            derived[ATTR_JOBS] = jobs

        derived[ATTR_STATES] = [str(x) for x in states]
        make_state_map(derived)

        if ATTR_NP in full_state:
            derived[ATTR_NP] = int(full_state[ATTR_NP][0])
        if ATTR_STATUS in full_state:
            status = full_state[ATTR_STATUS]
            for prop in ['physmem', 'totmem', 'size']:
                if prop not in status:
                    continue
                val = status.get(prop)[0]
                if prop in ('size', ):
                    # 'size': ['539214180kb:539416640kb']
                    # - use 2nd field
                    val = val.split(':')[1]
                derived[prop] = str2byte(val)

        full_state['derived'] = derived
        _log.debug("node %s derived data %s " % (name, derived))

    return node_states
Beispiel #5
0
def collect_nodeinfo():
    """Collect node information"""
    types = {}
    state_list = []
    node_list = []
    re_host_id = re.compile(r"(?P<id>\d+)")

    for idx, (node, full_state) in enumerate(get_nodes()):
        # A node can have serveral states. We are only interested in first entry.
        derived = full_state['derived']

        # what state to report?
        state_list.append(derived[ATTR_STATE])

        if derived[ATTR_NODESTATE] == NDST_OK:
            cores = derived.get(ATTR_NP, None)
            physmem = derived.get('physmem', None)
            totmem = derived.get('totmem', None)
            size = derived.get('size', None)

            if all([cores, physmem, totmem,
                    size]):  # there shouldn't be any value 0
                # round mem to 1 gb, size to 5gb
                GB = str2byte('gb')
                pmem = ceil(10 * physmem / GB) / 10
                tmem = ceil(10 * totmem / GB) / 10
                swap = tmem - pmem
                dsize = ceil(10 * size / (5 * GB)) / 2
                typ = (cores, pmem, swap, dsize)
                if typ not in types:
                    types[typ] = []
                types[typ].append(node)

        result = re_host_id.search(node)
        if result:
            node_list.append(result.group('id'))
        else:
            node_list.append(str(idx + 1))  # offset +1

    return node_list, state_list, types
Beispiel #6
0
def collect_nodeinfo():
    """Collect node information"""
    types = {}
    state_list = []
    node_list = []
    re_host_id = re.compile(r"(?P<id>\d+)")

    for idx, (node, full_state) in enumerate(get_nodes()):
        # A node can have serveral states. We are only interested in first entry.
        derived = full_state['derived']

        # what state to report?
        state_list.append(derived['state'])

        if derived['nodestate'] == NDST_OK:
            cores = derived.get('np', None)
            physmem = derived.get('physmem', None)
            totmem = derived.get('totmem', None)
            size = derived.get('size', None)

            if all([cores, physmem, totmem, size]):  # there shouldn't be any value 0
                # round mem to 1 gb, size to 5gb
                GB = str2byte('gb')
                pmem = ceil(10 * physmem / GB) / 10
                tmem = ceil(10 * totmem / GB) / 10
                swap = tmem - pmem
                dsize = ceil(10 * size / (5 * GB)) / 2
                typ = (cores, pmem, swap, dsize)
                if not typ in types:
                    types[typ] = []
                types[typ].append(node)

        result = re_host_id.search(node)
        if result:
            node_list.append(result.group('id'))
        else:
            node_list.append(str(idx + 1))  # offset +1

    return node_list, state_list, types
Beispiel #7
0
def get_jobs_dict():
    """Get jobs dict with derived info"""
    jobs = get_jobs()

    reg_user = re.compile(r"(?P<user>\w+)@\S+")

    nodes_cores = re.compile(r"(?P<nodes>\d+)(:ppn=(?P<cores>\d+))?")
    nodes_nocores = re.compile(r"(?P<nodes>node\d+).*?")

    for jobdata in jobs.values():
        derived = {}

        derived["state"] = jobdata["job_state"][0]

        r = reg_user.search(jobdata["Job_Owner"][0])
        if r:
            derived["user"] = r.group("user")

        if "Resource_List" in jobdata:
            resource_list = jobdata["Resource_List"]
            # walltime
            if "walltime" in resource_list:
                totalwallsec = str2sec(resource_list["walltime"][0])
                if totalwallsec is not None:
                    derived["totalwalltimesec"] = totalwallsec

            # nodes / cores
            if "neednodes" in resource_list:
                m = nodes_cores.match(resource_list["neednodes"][0])
                if not m:
                    if nodes_nocores.match(resource_list["neednodes"][0]):
                        m = nodes_cores.match("1")
            elif "nodes" in resource_list:
                m = nodes_cores.match(resource_list["nodes"][0])
            if m:
                nodes = int(m.group("nodes"))
                cores = 1
                if len(m.groups()) > 1 and m.group("cores"):
                    cores = int(m.group("cores"))
                derived["nodes"] = nodes
                derived["cores"] = cores

        # resource used
        if "resources_used" in jobdata:
            resources_used = jobdata["resources_used"]

            if "mem" in resources_used:
                derived["used_mem"] = str2byte(resources_used["mem"][0])

            if "vmem" in resources_used:
                derived["used_vmem"] = str2byte(resources_used["vmem"][0])

            if "walltime" in resources_used:
                sec = str2sec(resources_used["walltime"][0])
                if sec is not None:
                    derived["used_walltime"] = sec

            if "cput" in resources_used:
                sec = str2sec(resources_used["cput"][0])
                if sec is not None:
                    derived["used_cput"] = sec

        if "exec_host" in jobdata:
            nodes = jobdata.get_nodes()
            exec_hosts = {}
            for host in nodes:
                hostname = host.split("/")[0]
                if not hostname in exec_hosts:
                    exec_hosts[hostname] = 0
                exec_hosts[hostname] += 1
            derived["exec_hosts"] = exec_hosts

        jobdata["derived"] = derived

    return jobs
Beispiel #8
0
def get_jobs_dict():
    """Get jobs dict with derived info"""
    jobs = get_jobs()

    reg_user = re.compile(r"(?P<user>\w+)@\S+")

    nodes_cores = re.compile(r"(?P<nodes>\d+)(:ppn=(?P<cores>\d+))?")
    nodes_nocores = re.compile(r"(?P<nodes>node\d+).*?")

    for jobdata in jobs.values():
        derived = {}

        derived['state'] = jobdata['job_state'][0]

        r = reg_user.search(jobdata['Job_Owner'][0])
        if r:
            derived['user'] = r.group('user')

        if 'Resource_List' in jobdata:
            resource_list = jobdata['Resource_List']
            # walltime
            if 'walltime' in resource_list:
                totalwallsec = str2sec(resource_list['walltime'][0])
                if totalwallsec is not None:
                    derived['totalwalltimesec'] = totalwallsec

            # nodes / cores
            if 'neednodes' in resource_list:
                m = nodes_cores.match(resource_list['neednodes'][0])
                if not m:
                    if nodes_nocores.match(resource_list['neednodes'][0]):
                        m = nodes_cores.match("1")
            elif 'nodes' in resource_list:
                m = nodes_cores.match(resource_list['nodes'][0])
            if m:
                nodes = int(m.group('nodes'))
                cores = 1
                if len(m.groups()) > 1 and m.group('cores'):
                    cores = int(m.group('cores'))
                derived['nodes'] = nodes
                derived['cores'] = cores

        # resource used
        if 'resources_used' in jobdata:
            resources_used = jobdata['resources_used']

            if 'mem' in resources_used:
                derived['used_mem'] = str2byte(resources_used['mem'][0])

            if 'vmem' in resources_used:
                derived['used_vmem'] = str2byte(resources_used['vmem'][0])

            if 'walltime' in resources_used:
                sec = str2sec(resources_used['walltime'][0])
                if sec is not None:
                    derived['used_walltime'] = sec

            if 'cput' in resources_used:
                sec = str2sec(resources_used['cput'][0])
                if sec is not None:
                    derived['used_cput'] = sec

        if 'exec_host' in jobdata:
            exec_hosts = {}
            for host in jobdata['exec_host'][0].split('+'):
                hostname = host.split('/')[0]
                if not hostname in exec_hosts:
                    exec_hosts[hostname] = 0
                exec_hosts[hostname] += 1
            derived['exec_hosts'] = exec_hosts

        jobdata['derived'] = derived

    return jobs
Beispiel #9
0
def get_jobs_dict(attrs=None):
    """
    Get jobs dict with derived info

    attrs is passed to get_jobs
    """
    jobs = get_jobs(attrs=attrs)

    reg_user = re.compile(r"(?P<user>\w+)@\S+")

    nodes_cores = re.compile(r"(?P<nodes>\d+)(:ppn=(?P<cores>\d+))?")
    namednodes_cores = re.compile(r"(?P<nodes>node\d+[^:+]*)(:ppn=(?P<cores>\d+))?")
    nodes_nocores = re.compile(r"(?P<nodes>node\d+).*?")

    for jobdata in jobs.values():
        derived = {}

        derived['state'] = jobdata['job_state'][0]

        r = reg_user.search(jobdata['Job_Owner'][0])
        if r:
            derived['user'] = r.group('user')

        if 'Resource_List' in jobdata:
            resource_list = jobdata['Resource_List']
            # walltime
            if 'walltime' in resource_list:
                totalwallsec = str2sec(resource_list['walltime'][0])
                if totalwallsec is not None:
                    derived['totalwalltimesec'] = totalwallsec

            # nodes / cores
            need_nodes = None
            if 'neednodes' in resource_list:
                need_nodes = resource_list['neednodes'][0]
            elif 'nodes' in resource_list:
                need_nodes = resource_list['nodes'][0]
            if need_nodes is not None:
                m = nodes_cores.match(need_nodes)
                if not m:
                    namednode_m = namednodes_cores.match(need_nodes)
                    if namednode_m:
                        m = nodes_cores.match("1:ppn=%s" % (namednode_m.groups()[2] or "1"))
                    elif nodes_nocores.match(need_nodes):
                        m = nodes_cores.match("1")
            if m:
                nodes = int(m.group('nodes'))
                cores = 1
                if len(m.groups()) > 1 and m.group('cores'):
                    cores = int(m.group('cores'))
                derived['nodes'] = nodes
                derived['cores'] = cores

        # resource used
        if 'resources_used' in jobdata:
            resources_used = jobdata['resources_used']

            if 'mem' in resources_used:
                derived['used_mem'] = str2byte(resources_used['mem'][0])

            if 'vmem' in resources_used:
                derived['used_vmem'] = str2byte(resources_used['vmem'][0])

            if 'walltime' in resources_used:
                sec = str2sec(resources_used['walltime'][0])
                if sec is not None:
                    derived['used_walltime'] = sec

            if 'cput' in resources_used:
                sec = str2sec(resources_used['cput'][0])
                if sec is not None:
                    derived['used_cput'] = sec

        if 'exec_host' in jobdata:
            nodes = jobdata.get_nodes()
            exec_hosts = {}
            for host in nodes:
                hostname = host.split('/')[0]
                if hostname not in exec_hosts:
                    exec_hosts[hostname] = 0
                exec_hosts[hostname] += 1
            derived['exec_hosts'] = exec_hosts

        jobdata['derived'] = derived

    return jobs