Beispiel #1
0
def test_get_compute_nodes_info(pbsnodes_mocked_response, expected_output, mocker, test_datadir):
    pbsnodes_output = read_text(test_datadir / pbsnodes_mocked_response)
    mock = mocker.patch(
        "common.schedulers.torque_commands.check_command_output", return_value=pbsnodes_output, autospec=True
    )

    nodes = get_compute_nodes_info(hostname_filter=["host1"])

    mock.assert_called_with("/opt/torque/bin/pbsnodes -x host1", raise_on_error=False)
    assert_that(nodes).is_equal_to(expected_output)
def get_busy_nodes():
    nodes = get_compute_nodes_info()
    busy_nodes = 0
    for node in nodes.values():
        # when a node is added it transitions from down,offline,MOM-list-not-sent -> down -> free
        if node.jobs or (any(state in ["offline", "state-unknown"]
                             for state in node.state)
                         and "MOM-list-not-sent" not in node.state):
            busy_nodes += 1

    return busy_nodes
def get_busy_nodes():
    nodes = get_compute_nodes_info()
    logging.info("Found the following compute nodes:\n%s", nodes)
    busy_nodes = 0
    for node in nodes.values():
        # when a node is added it transitions from down,offline,MOM-list-not-sent -> down -> free
        if node.jobs or (any(state in ["state-unknown"]
                             for state in node.state)
                         and "MOM-list-not-sent" not in node.state):
            busy_nodes += 1

    return busy_nodes
def is_node_down():
    """Check if node is down according to scheduler"""
    try:
        hostname = check_command_output("hostname").strip()
        node = get_compute_nodes_info(hostname_filter=[hostname]).get(hostname)
        if node:
            log.info("Node is in state: '{0}'".format(node.state))
            if all(error_state not in node.state
                   for error_state in TORQUE_NODE_ERROR_STATES):
                return False
        else:
            log.warning("Node is not attached to scheduler. Reporting as down")
    except Exception as e:
        log.error(
            "Failed when checking if node is down with exception %s. Reporting node as down.",
            e)

    return True