def exec_qconf_command(hosts, qhost_command): if not hosts: return [] hostnames = ",".join([host.hostname for host in hosts]) try: logging.info("Executing operation '%s' for hosts %s", qhost_command.description, hostnames) command = "qconf {flags} {hostnames}".format( flags=qhost_command.command_flags, hostnames=hostnames) # setting raise_on_error to False and evaluating command output to decide if the execution was successful output = check_sge_command_output(command, raise_on_error=False) succeeded_hosts = [] # assuming output contains a message line for each node the command is executed for. for host, message in zip(hosts, output.split("\n")): if any( re.match(pattern, message) is not None for pattern in qhost_command.successful_messages): succeeded_hosts.append(host) return succeeded_hosts except Exception as e: logging.error( "Unable to execute operation '%s' for hosts %s. Failed with exception %s", qhost_command.description, hostnames, e, ) return []
def _is_host_configured(command, hostname): output = check_sge_command_output(command, log) # Expected output # ip-172-31-66-16.ec2.internal # ip-172-31-74-69.ec2.internal match = list(filter(lambda x: hostname in x.split(".")[0], output.split("\n"))) return True if len(match) > 0 else False
def _is_host_configured(command, hostname): output = check_sge_command_output(command, log) # Expected output # ip-172-31-66-16.ec2.internal # ip-172-31-74-69.ec2.internal match = list( filter(lambda x: hostname in x.split(".")[0], output.split("\n"))) return True if len(match) > 0 else False
def _run_qstat(full_format=False, hostname_filter=None, job_state_filter=None): command = "qstat -xml -g dt -u '*'" if full_format: command += " -f" if hostname_filter: command += " -l hostname={0}".format(hostname_filter) if job_state_filter: command += " -s {0}".format(job_state_filter) return check_sge_command_output(command)
def get_required_nodes(instance_properties): command = "qstat -g d -s p -u '*'" _output = check_sge_command_output(command, log) slots = 0 output = _output.split("\n")[2:] for line in output: line_arr = line.split() if len(line_arr) >= 8: slots += int(line_arr[7]) vcpus = instance_properties.get('slots') return -(-slots // vcpus)
def get_busy_nodes(instance_properties): command = "qstat -f" _output = check_sge_command_output(command, log) nodes = 0 output = _output.split("\n")[2:] for line in output: line_arr = line.split() if len(line_arr) == 5: # resv/used/tot. (resv, used, total) = line_arr[2].split('/') if int(used) > 0 or int(resv) > 0: nodes += 1 return nodes
def hasPendingJobs(): command = "qstat -g d -s p -u '*'" # Command outputs the pending jobs in the queue in the following format # job-ID prior name user state submit/start at queue slots ja-task-ID # ----------------------------------------------------------------------------------------------------------------- # 70 0.55500 job.sh ec2-user qw 08/08/2018 22:37:24 1 # 71 0.55500 job.sh ec2-user qw 08/08/2018 22:37:24 1 # 72 0.55500 job.sh ec2-user qw 08/08/2018 22:37:25 1 # 73 0.55500 job.sh ec2-user qw 08/08/2018 22:37:25 1 try: output = check_sge_command_output(command, log) lines = filter(None, output.split("\n")) has_pending = True if len(lines) > 1 else False error = False except subprocess.CalledProcessError: error = True has_pending = False return has_pending, error
def hasJobs(hostname): # Checking for running jobs on the node, with parallel job view expanded (-g t) command = "qstat -g t -l hostname={0} -u '*'".format(hostname) # Command output # job-ID prior name user state submit/start at queue master ja-task-ID # ------------------------------------------------------------------------------------------------------------------ # 16 0.6 0500 job.sh ec2-user r 02/06/2019 11:06:30 [email protected] SLAVE # [email protected] SLAVE # [email protected] SLAVE # [email protected] SLAVE # 17 0.50500 STDIN ec2-user r 02/06/2019 11:06:30 [email protected] MASTER 1 # 17 0.50500 STDIN ec2-user r 02/06/2019 11:06:30 [email protected] MASTER 2 try: output = check_sge_command_output(command, log) has_jobs = output != "" except subprocess.CalledProcessError: has_jobs = False return has_jobs