예제 #1
0
def lockHost(hostname, unlock=False):
    mod = unlock and "-e" or "-d"
    command = ["qmod", mod, "all.q@%s" % hostname]

    try:
        run_sge_command(command, log)
    except subprocess.CalledProcessError:
        log.error("Error %s host %s", "unlocking" if unlock else "locking", hostname)
예제 #2
0
def lockHost(hostname, unlock=False):
    mod = unlock and "-e" or "-d"
    command = ["qmod", mod, "all.q@%s" % hostname]

    try:
        run_sge_command(command, log)
    except subprocess.CalledProcessError:
        log.error("Error %s host %s", "unlocking" if unlock else "locking",
                  hostname)
def _run_sge_command_for_multiple_hosts(hosts, command_template):
    """Sequentially run an sge command on the master node for the given hostnames."""
    succeeded_hosts = []
    for host in hosts:
        command = command_template.format(hostname=host.hostname,
                                          slots=host.slots)
        try:
            run_sge_command(command.format(hostname=host.hostname))
            succeeded_hosts.append(host)
        except Exception as e:
            logging.error("Failed when executing command %s with exception %s",
                          command, e)
    return succeeded_hosts
예제 #4
0
def removeHost(hostname, cluster_user, max_cluster_size):
    log.info('Removing %s', hostname)

    # Check if host is administrative host
    command = "qconf -sh"
    if _is_host_configured(command, hostname):
        # Removing host as administrative host
        command = ("qconf -dh %s" % hostname)
        run_sge_command(command, log)
    else:
        log.info('Host %s is not administrative host', hostname)

    # Check if host is in all.q (qconf -sq all.q)
    # Purge hostname from all.q
    try:
        command = ("qconf -purge queue '*' all.q@%s" % hostname)
        run_sge_command(command, log)
    except subprocess.CalledProcessError:
        log.warning("Unable to remove host %s from all.q", hostname)

    # Check if host is in @allhosts group (qconf -shgrp_resolved @allhosts)
    # Remove host from @allhosts group
    try:
        command = ("qconf -dattr hostgroup hostlist %s @allhosts" % hostname)
        run_sge_command(command, log)
    except subprocess.CalledProcessError:
        log.warning("Unable to remove host %s from @allhosts group", hostname)

    # Check if host is execution host
    command = "qconf -sel"
    if _is_host_configured(command, hostname):
        # Removing host as execution host
        command = ("qconf -de %s" % hostname)
        run_sge_command(command, log)
    else:
        log.info('Host %s is not execution host', hostname)

    # Check if host is submission host
    command = "qconf -ss"
    if _is_host_configured(command, hostname):
        # Removing host as submission host
        command = ("qconf -ds %s" % hostname)
        run_sge_command(command, log)
    else:
        log.info('Host %s is not submission host', hostname)
예제 #5
0
def removeHost(hostname, cluster_user, max_cluster_size):
    log.info('Removing %s', hostname)

    # Check if host is administrative host
    command = "qconf -sh"
    if _is_host_configured(command, hostname):
        # Removing host as administrative host
        command = ("qconf -dh %s" % hostname)
        run_sge_command(command, log)
    else:
        log.info('Host %s is not administrative host', hostname)

    # Check if host is in all.q (qconf -sq all.q)
    # Purge hostname from all.q
    try:
        command = ("qconf -purge queue '*' all.q@%s" % hostname)
        run_sge_command(command, log)
    except subprocess.CalledProcessError:
        log.warning("Unable to remove host %s from all.q", hostname)

    # Check if host is in @allhosts group (qconf -shgrp_resolved @allhosts)
    # Remove host from @allhosts group
    try:
        command = ("qconf -dattr hostgroup hostlist %s @allhosts" % hostname)
        run_sge_command(command, log)
    except subprocess.CalledProcessError:
        log.warning("Unable to remove host %s from @allhosts group", hostname)

    # Check if host is execution host
    command = "qconf -sel"
    if _is_host_configured(command, hostname):
        # Removing host as execution host
        command = ("qconf -de %s" % hostname)
        run_sge_command(command, log)
    else:
        log.info('Host %s is not execution host', hostname)

    # Check if host is submission host
    command = "qconf -ss"
    if _is_host_configured(command, hostname):
        # Removing host as submission host
        command = ("qconf -ds %s" % hostname)
        run_sge_command(command, log)
    else:
        log.info('Host %s is not submission host', hostname)
def unlock_host(hostname):
    logging.info("Unlocking host %s", hostname)
    command = ["qmod", "-e", "all.q@{0}".format(hostname)]
    run_sge_command(command)
예제 #7
0
def addHost(hostname, cluster_user, slots, max_cluster_size):
    log.info('Adding %s with %s slots' % (hostname, slots))

    # Adding host as administrative host
    try:
        command = ("qconf -ah %s" % hostname)
        run_sge_command(command, log)
    except subprocess.CalledProcessError:
        log.warning("Unable to add host %s as administrative host", hostname)

    # Adding host as submit host
    try:
        command = ("qconf -as %s" % hostname)
        run_sge_command(command, log)
    except subprocess.CalledProcessError:
        log.warning("Unable to add host %s as submission host", hostname)

    # Setup template to add execution host
    qconf_Ae_template = """hostname              %s
load_scaling          NONE
complex_values        NONE
user_lists            NONE
xuser_lists           NONE
projects              NONE
xprojects             NONE
usage_scaling         NONE
report_variables      NONE
"""

    with NamedTemporaryFile() as t:
        temp_template = open(t.name, 'w')
        temp_template.write(qconf_Ae_template % hostname)
        temp_template.flush()
        os.fsync(t.fileno())

        # Add host as an execution host
        try:
            command = ("qconf -Ae %s" % t.name)
            run_sge_command(command, log)
        except subprocess.CalledProcessError:
            log.warning("Unable to add host %s as execution host", hostname)

    # Connect and start SGE
    ssh = paramiko.SSHClient()
    ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
    hosts_key_file = os.path.expanduser("~" +
                                        cluster_user) + '/.ssh/known_hosts'
    user_key_file = os.path.expanduser("~" + cluster_user) + '/.ssh/id_rsa'
    iter = 0
    connected = False
    while iter < 3 and connected is False:
        try:
            log.info('Connecting to host: %s iter: %d' % (hostname, iter))
            ssh.connect(hostname,
                        username=cluster_user,
                        key_filename=user_key_file)
            connected = True
        except socket.error, e:
            log.error('Socket error: %s' % e)
            time.sleep(10 + iter)
            iter = iter + 1
            if iter == 3:
                log.critical("Unable to provision host")
                return
예제 #8
0
    except IOError:
        ssh._host_keys_filename = None
        pass
    ssh.save_host_keys(hosts_key_file)
    command = (
        "sudo sh -c \'cd {0} && {0}/inst_sge -noremote -x -auto /opt/parallelcluster/templates/sge/sge_inst.conf\'"
    ).format(sge.SGE_ROOT)
    stdin, stdout, stderr = ssh.exec_command(command)
    while not stdout.channel.exit_status_ready():
        time.sleep(1)
    ssh.close()

    # Add the host to the all.q
    try:
        command = ("qconf -aattr hostgroup hostlist %s @allhosts" % hostname)
        run_sge_command(command, log)
    except subprocess.CalledProcessError:
        log.warning("Unable to add host %s to all.q", hostname)

    # Set the numbers of slots for the host
    try:
        command = ('qconf -aattr queue slots ["%s=%s"] all.q' %
                   (hostname, slots))
        run_sge_command(command, log)
    except subprocess.CalledProcessError:
        log.warning("Unable to set the number of slots for the host %s",
                    hostname)


def removeHost(hostname, cluster_user, max_cluster_size):
    log.info('Removing %s', hostname)
예제 #9
0
def addHost(hostname, cluster_user, slots, max_cluster_size):
    log.info('Adding %s with %s slots' % (hostname,slots))

    # Adding host as administrative host
    try:
        command = ("qconf -ah %s" % hostname)
        run_sge_command(command, log)
    except subprocess.CalledProcessError:
        log.warning("Unable to add host %s as administrative host", hostname)

    # Adding host as submit host
    try:
        command = ("qconf -as %s" % hostname)
        run_sge_command(command, log)
    except subprocess.CalledProcessError:
        log.warning("Unable to add host %s as submission host", hostname)

    # Setup template to add execution host
    qconf_Ae_template = """hostname              %s
load_scaling          NONE
complex_values        NONE
user_lists            NONE
xuser_lists           NONE
projects              NONE
xprojects             NONE
usage_scaling         NONE
report_variables      NONE
"""

    with NamedTemporaryFile() as t:
        temp_template = open(t.name, 'w')
        temp_template.write(qconf_Ae_template % hostname)
        temp_template.flush()
        os.fsync(t.fileno())

        # Add host as an execution host
        try:
            command = ("qconf -Ae %s" % t.name)
            run_sge_command(command, log)
        except subprocess.CalledProcessError:
            log.warning("Unable to add host %s as execution host", hostname)

    # Connect and start SGE
    ssh = paramiko.SSHClient()
    ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
    hosts_key_file = os.path.expanduser("~" + cluster_user) + '/.ssh/known_hosts'
    user_key_file = os.path.expanduser("~" + cluster_user) + '/.ssh/id_rsa'
    iter = 0
    connected = False
    while iter < 3 and connected is False:
        try:
            log.info('Connecting to host: %s iter: %d' % (hostname, iter))
            ssh.connect(hostname, username=cluster_user, key_filename=user_key_file)
            connected = True
        except socket.error, e:
            log.error('Socket error: %s' % e)
            time.sleep(10 + iter)
            iter = iter + 1
            if iter == 3:
               log.critical("Unable to provision host")
               return
예제 #10
0
    except IOError:
        ssh._host_keys_filename = None
        pass
    ssh.save_host_keys(hosts_key_file)
    command = (
        "sudo sh -c \'cd {0} && {0}/inst_sge -noremote -x -auto /opt/parallelcluster/templates/sge/sge_inst.conf\'"
    ).format(sge.SGE_ROOT)
    stdin, stdout, stderr = ssh.exec_command(command)
    while not stdout.channel.exit_status_ready():
        time.sleep(1)
    ssh.close()

    # Add the host to the all.q
    try:
        command = ("qconf -aattr hostgroup hostlist %s @allhosts" % hostname)
        run_sge_command(command, log)
    except subprocess.CalledProcessError:
        log.warning("Unable to add host %s to all.q", hostname)

    # Set the numbers of slots for the host
    try:
        command = ('qconf -aattr queue slots ["%s=%s"] all.q' % (hostname, slots))
        run_sge_command(command, log)
    except subprocess.CalledProcessError:
        log.warning("Unable to set the number of slots for the host %s", hostname)


def removeHost(hostname, cluster_user, max_cluster_size):
    log.info('Removing %s', hostname)

    # Check if host is administrative host