def create_alloc_script(self, message, config, LOG): """ parse the #TSI_" BSS parameters from the message and convert them to the proper 'salloc' command, adding code to parse the allocation ID from the reply Returns the command(s) to be run (as a list of lines). """ uspace_dir = extract_parameter(message, "USPACE_DIR") stdout = extract_parameter(message, "STDOUT", "stdout") alloc_id_file = extract_parameter(message, "ALLOCATION_ID", "ALLOCATION_ID") exit_code_file = extract_parameter(message, "EXIT_CODE_FILE", "UNICORE_SCRIPT_EXIT_CODE") alloc_options = self.parse_common_options(message, config, LOG) alloc_cmd = config['tsi.alloc_cmd'] for opt in alloc_options: alloc_cmd += " " + opt alloc_cmd += " > %s 2>&1 ; echo $? > %s" % (stdout, exit_code_file) cmds = [ alloc_cmd, # extract allocation id from stdout and write it to a file "grep -o '[[:digit:]]*' %s/%s | head -1 > %s" % (uspace_dir, stdout, alloc_id_file) ] return cmds
def ls(message, connector, config, LOG): """List directory or get information about a file The message sent by the XNJS is scanned for: TSI_FILE - name of file/path to list TSI_LS_MODE - "A" : just the file, "R" : directory recursive any other : dir non-recursive The TSI replies with TSI_OK and some lines of output The format of the output is as follows: Listing starts with the line: START_LISTING and ends with the line: END_LISTING The files are listed in depth-first order. Each time a sub-directory is found the entry for the sub-directory file is listed and then entries for all the file in the subdirectory are listed. The format for each listing line is detailed above in the get_info() method. When all files in a sub-directory have been listed and the listing is continuing with the parent directory, a line with a single "<" is printed. This is required even when the listing is non-recursive. """ path = extract_parameter(message, "FILE") path = expand_variables(path) mode = extract_parameter(message, "LS_MODE") allowed = ["R", "A", "N"] if mode not in allowed: connector.failed("Unknown TSI_LS mode: '%s', must be one of " "'R', 'A' or 'N'." % mode) return as_single_file = "A" == mode recurse = "R" == mode connector.write_message("START_LISTING") if os.path.exists(path): try: if os.path.isdir(path) and not as_single_file: list_directory(connector, path, recurse) else: info = get_info(path) connector.write_message(info) except: # this is somewhat wierd, but the perl TSI did it the same way pass connector.write_message("END_LISTING")
def put_file_chunk(message, connector, config, LOG): """Write part of a file, reading data from the XNJS via the data_in stream. The message sent by the XNJS is scanned for: TSI_FILE - name of file to write and mode TSI_FILESACTION - what to do (overwrite = 1 , append = 3) TSI_LENGTH - how many bytes to return """ path_and_mode = extract_parameter(message, "FILE") mode_index = path_and_mode.rindex(" ") path = expand_variables(path_and_mode[:mode_index]) mode = path_and_mode[mode_index + 1:] action = extract_parameter(message, "FILESACTION") if action is None: action = "1" length = int(extract_parameter(message, "LENGTH")) LOG.debug("Writing %d bytes of data to %s" % (length, path)) if action == "3": open_mode = "ab" else: open_mode = "wb" with io.FileIO(path, open_mode) as f: # the next message tells the XNJS to start sending data connector.ok("ENDOFMESSAGE") remaining = length while remaining > 0: buf = connector.read_data(remaining) bytes_read = len(buf) remaining -= bytes_read # write it out, taking care to handle partial writes write_offset = 0 must_write = bytes_read while must_write > 0: written = f.write(buf[write_offset:bytes_read]) write_offset += written must_write -= written # change mode to requested mode os.chmod(path, int(mode, 8))
def get_file_chunk(message, connector, config, LOG): """Return part of a file to the XNJS via the data_out stream. The message sent by the XNJS is scanned for: TSI_FILE - name of file to return TSI_START - start byte TSI_LENGTH - how many bytes to return """ path = extract_parameter(message, 'FILE') path = expand_variables(path) start = int(extract_parameter(message, 'START')) length = int(extract_parameter(message, 'LENGTH')) LOG.debug("Getting data from %s start at %d length %d" % (path, start, length)) with io.FileIO(path, "rb") as f: if f.seekable(): f.seek(start) buf = bytearray(length) total_bytes_read = 0 remaining = length while remaining > 0: read = f.readinto(buf) if read == 0: break total_bytes_read += read remaining -= read # reply and report total bytes read connector.ok("TSI_LENGTH %s\nENDOFMESSAGE" % total_bytes_read) # write it out, taking care to handle partial writes write_offset = 0 must_write = total_bytes_read while must_write > 0: written = connector.write_data(buf[write_offset:total_bytes_read]) if written is None: break write_offset += written must_write -= written
def process_acl(message, connector, config, LOG): operation = extract_parameter(message, "ACL_OPERATION") path = extract_parameter(message, "ACL_PATH") acl = config.get('tsi.acl', {}) if operation == "CHECK_SUPPORT": enabled = config.get('tsi.acl_enabled') if not enabled: connector.ok("false") return support = check_support(path, acl) if support == "NONE": connector.ok("false") else: connector.ok("true") elif operation == "GETFACL": support = check_support(path, acl) if support == "POSIX": getfacl_posix(path, connector, config, LOG) elif support == "NFS": getfacl_nfs(path, connector, config, LOG) else: connector.failed( "ERROR: Getting ACL on this file system is unsupported.") elif operation == "SETFACL": support = check_support(path, acl) command = extract_parameter(message, "ACL_COMMAND") command_spec = extract_parameter(message, "ACL_COMMAND_SPEC") if command_spec is None: connector.failed("Missing parameter TSI_ACL_COMMAND_SPEC") if command is None: connector.failed("Missing parameter TSI_ACL_COMMAND") if support == "POSIX": setfacl_posix(path, command, command_spec, connector, config, LOG) elif support == "NFS": setfacl_nfs(path, command, command_spec, connector, config, LOG) else: connector.failed( "ERROR: Setting ACL on this file system is unsupported.") else: connector.failed("UNSUPPORTED_OPERATION: '%s'" % operation)
def create_submit_script(self, message, config, LOG): """ parse the #TSI_" BSS parameters from the message and convert them to the proper BSS instructions. Returns the script to submit to the BSS (as a list of lines) """ submit_cmds = [] outcome_dir = extract_parameter(message, "OUTCOME_DIR") stderr = extract_parameter(message, "STDERR", "stderr") stdout = extract_parameter(message, "STDOUT", "stdout") umask = extract_parameter(message, "UMASK") uspace_dir = extract_parameter(message, "USPACE_DIR") array_spec = extract_number(message, "ARRAY") array_limit = extract_number(message, "ARRAY_LIMIT") submit_cmds.append("#!/bin/bash") for option in self.parse_common_options(message, config, LOG): submit_cmds.append("#SBATCH %s" % option) if array_spec > 0: if array_limit > 0: array_spec = str(array_spec) + "%" + str(array_limit) submit_cmds.append("#SBATCH --array=%s" % array_spec) submit_cmds.append( "UC_ARRAY_TASK_ID = \"$SLURM_ARRAY_TASK_ID\"; export UC_ARRAY_TASK_ID" ) stdout = stdout + "%a" stderr = stderr + "%a" submit_cmds.append("#SBATCH --output=%s/%s" % (outcome_dir, stdout)) submit_cmds.append("#SBATCH --error=%s/%s" % (outcome_dir, stderr)) submit_cmds.append("#SBATCH --chdir=%s" % uspace_dir) if umask is not None: submit_cmds.append("umask %s" % umask) return submit_cmds
def df(message, connector, config, LOG): """ determines the free space on a given partition and reports results on stdout in the format that the XNJS expects. The format of the output is as follows: Output starts with the line: START_DF and ends with the line: END_DF The following values are reported (in bytes): - TOTAL: The total space on the partition - FREE: The free space on the partition - USER: The user quota (optional) Every line is terminated by \n """ path = extract_parameter(message, "FILE") path = expand_variables(path) # TODO might want to add a cache or do not check # free space for certain paths command = "df -P -B 1 %s" % path (success, result) = run_command(command) total = free = user = '******' if success: try: for line in result.splitlines(): m = re.match(r"(\S+)\s+(\d+)\s+(\d+)\s+(\d+).+", line) if m is not None: total = m.group(2) free = m.group(4) except: connector.failed("Wrong or unexpected output from 'df' " "command: %s" % result) return connector.write_message("START_DF") connector.write_message("TOTAL %s" % total) connector.write_message("FREE %s" % free) connector.write_message("USER %s" % user) connector.write_message("END_DF") else: connector.failed(result)
def create_submit_script(self, message, config, LOG): """ parse the #TSI_" BSS parameters from the message and convert them to the proper BSS instructions. Returns the script to submit to the BSS (as a list of lines) """ submit_cmds = [] email = extract_parameter(message, "EMAIL", "NONE") jobname = extract_parameter(message, "JOBNAME", config['tsi.default_job_name']) outcome_dir = extract_parameter(message, "OUTCOME_DIR") project = extract_parameter(message, "PROJECT", "NONE") stderr = extract_parameter(message, "STDERR", "stderr") stdout = extract_parameter(message, "STDOUT", "stdout") umask = extract_parameter(message, "UMASK") uspace_dir = extract_parameter(message, "USPACE_DIR") memory = extract_number(message, "MEMORY") nodes = extract_number(message, "NODES") processors = extract_number(message, "PROCESSORS") processors_per_node = extract_number(message, "PROCESSORS_PER_NODE") total_processors = extract_number(message, "TOTAL_PROCESSORS") array_spec = extract_number(message, "ARRAY") array_limit = extract_number(message, "ARRAY_LIMIT") queue = extract_parameter(message, "QUEUE", "NONE") reservation_id = extract_parameter(message, "RESERVATION_REFERENCE", "NONE") req_time = extract_number(message, "TIME") nodes_filter = config.get("tsi.nodes_filter", "") user_nodes_filter = extract_parameter(message, "BSS_NODES_FILTER", "NONE") qos = extract_parameter(message, "QOS", "NONE") # first line has to be the shell submit_cmds.append("#!/bin/bash") # jobname: check for illegal characters m = re.search(r"[^0-9a-zA-Z\.:.=~/]", jobname) if m is not None: jobname = "UNICORE_job" submit_cmds.append("#SBATCH --job-name=%s" % jobname) if queue != "NONE": submit_cmds.append("#SBATCH --partition=%s" % queue) if project != "NONE": submit_cmds.append("#SBATCH --account=%s" % project) # nodes count if nodes >0: # Multiple node and/or processors submit_cmds.append("#SBATCH --nodes=%s" % nodes) if processors_per_node >0: submit_cmds.append( "#SBATCH --ntasks-per-node=%s" % processors_per_node) else: # request tasks and let Slurm figure out the nodes if total_processors > 0: submit_cmds.append("#SBATCH --ntasks=%s" % total_processors) # nodes filter, can be both global and user defined if user_nodes_filter != "NONE": if nodes_filter != "": nodes_filter = nodes_filter + "&" + user_nodes_filter else: nodes_filter = user_nodes_filter if nodes_filter != "": submit_cmds.append("#SBATCH --constraint=%s" % nodes_filter) if qos != "NONE": submit_cmds.append("#SBATCH --qos=%s" % qos) if memory >= 0: # memory per node, '0' means that the job requests all of the memory on each node submit_cmds.append("#SBATCH --mem=%s" % memory) if req_time > 0: # wall time. Convert to minutes, as accepted by SLURM time_in_minutes = req_time / 60 submit_cmds.append("#SBATCH --time=%d" % time_in_minutes) if email != "NONE": submit_cmds.append("#SBATCH --mail-user=%s" % email) submit_cmds.append("#SBATCH --mail-type=ALL") if reservation_id != "NONE": submit_cmds.append("#SBATCH --reservation=%s" % reservation_id) if array_spec > 0: if array_limit > 0: array_spec = str(array_spec) + "%" + str(array_limit) submit_cmds.append("#SBATCH --array=%s" % array_spec); submit_cmds.append("UC_ARRAY_TASK_ID = \"$SLURM_ARRAY_TASK_ID\"; export UC_ARRAY_TASK_ID"); stdout = stdout + "%a" stderr = stderr + "%a" submit_cmds.append("#SBATCH --output=%s/%s" % (outcome_dir, stdout)) submit_cmds.append("#SBATCH --error=%s/%s" % (outcome_dir, stderr)) submit_cmds.append("#SBATCH --chdir=%s" % uspace_dir) if umask is not None: submit_cmds.append("umask %s" % umask) return submit_cmds
def create_submit_script(self, message, config, LOG): """ parse the #TSI_" BSS parameters from the message and convert them to the proper BSS instructions. Returns the script to submit to the BSS (as a list of lines) """ submit_cmds = [] email = extract_parameter(message, "EMAIL", "NONE") jobname = extract_parameter(message, "JOBNAME", config['tsi.default_job_name']) outcome_dir = extract_parameter(message, "OUTCOME_DIR") project = extract_parameter(message, "PROJECT", "NONE") stderr = extract_parameter(message, "STDERR", "stderr") stdout = extract_parameter(message, "STDOUT", "stdout") umask = extract_parameter(message, "UMASK", "NONE") memory = extract_number(message, "MEMORY") nodes = extract_number(message, "NODES") processors_per_node = extract_number(message, "PROCESSORS_PER_NODE") total_processors = extract_number(message, "TOTAL_PROCESSORS") gpus = extract_parameter(message, "SSR_GPUS", "NONE") array_spec = extract_number(message, "ARRAY") array_limit = extract_number(message, "ARRAY_LIMIT") queue = extract_parameter(message, "QUEUE", "NONE") reservation_id = extract_parameter(message, "RESERVATION_REFERENCE", "NONE") req_time = extract_number(message, "TIME") lsf_memory_conversion_factor = config.get( 'tsi.lsf.memory_conversion_factor') if email != "NONE": submit_cmds.append("#BSUB -B -N -u %s" % email) if queue != "NONE": submit_cmds.append("#BSUB -q %s" % queue) if project != "NONE": submit_cmds.append("#BSUB -P %s" % project) # LSF slots: # use total_processors or nodes*processors per node slots = 0 if total_processors > 0: slots = total_processors elif nodes > 0 and processors_per_node > 0: slots = nodes * processors_per_node submit_cmds.append("#BSUB -R \"span[ptile=%s]\"" % processors_per_node) if slots > 0: submit_cmds.append("#BSUB -n %s" % slots) # GPUs gpu_count = 0 try: gpu_count = int(gpus) except: pass if gpu_count > 0: submit_cmds.append("#BSUB -gpu \"num=%s:j_exclusive=yes\"" % gpu_count) # Wallclock time: LSF requires minutes if req_time != "NONE": time_in_minutes = int(int(req_time) / 60) submit_cmds.append("#BSUB -W %s" % time_in_minutes) # Memory: LSF specifies a limit per process if memory != "NONE": if processors_per_node != "NONE": ppn = int(processors_per_node) memory = int(int(memory) / ppn) memory *= int(lsf_memory_conversion_factor) # submit_cmds.append("#BSUB -M %s" % memory) if reservation_id != "NONE": submit_cmds.append("#BSUB -U %s" % reservation_id) # Jobname: check that it fits the rules match = re.search(r"([a-zA-Z]\S{0,14})", jobname) if match is not None: jobname = match.group(1) else: jobname = "UNICORE_job" if array_spec > 0: if array_limit > 0: array_spec = "[" + str(array_spec) + "]" + "%" + str( array_limit) else: array_spec = "[" + str(array_spec) + "]" submit_cmds.append("#BSUB -J \"%s%s\"" % (jobname, array_spec)) submit_cmds.append( "UC_ARRAY_TASK_ID = \"$LSB_JOB_INDEX\"; export UC_ARRAY_TASK_ID" ) stdout = stdout + "%I" stderr = stderr + "%I" else: submit_cmds.append("#BSUB -J %s" % jobname) submit_cmds.append("#BSUB -o %s/%s" % (outcome_dir, stdout)) submit_cmds.append("#BSUB -e %s/%s" % (outcome_dir, stderr)) if umask is not None: submit_cmds.append("umask %s" % umask) return submit_cmds
def create_submit_script(self, message, config, LOG): """ parse the #TSI_" BSS parameters from the message and convert them to the proper BSS instructions. Returns the script to submit to the BSS (as a list of lines) """ submit_cmds = [] email = extract_parameter(message, "EMAIL", "NONE") jobname = extract_parameter(message, "JOBNAME", config['tsi.default_job_name']) outcome_dir = extract_parameter(message, "OUTCOME_DIR") project = extract_parameter(message, "PROJECT", "NONE") stderr = extract_parameter(message, "STDERR", "stderr") stdout = extract_parameter(message, "STDOUT", "stdout") umask = extract_parameter(message, "UMASK") uspace_dir = extract_parameter(message, "USPACE_DIR") nodes = extract_number(message, "NODES") nodes_filter = config.get("tsi.nodes_filter", "") if nodes_filter != "": nodes_filter = ":" + nodes_filter user_nodes_filter = extract_parameter(message, "BSS_NODES_FILTER", "NONE") if user_nodes_filter != "NONE": nodes_filter = nodes_filter + ":" + user_nodes_filter processors = extract_number(message, "PROCESSORS") processors_per_node = extract_number(message, "PROCESSORS_PER_NODE") total_processors = extract_number(message, "TOTAL_PROCESSORS") array_spec = extract_number(message, "ARRAY") array_limit = extract_number(message, "ARRAY_LIMIT") queue = extract_parameter(message, "QUEUE", "NONE") reservation_id = extract_parameter(message, "RESERVATION_REFERENCE", "NONE") req_time = extract_number(message, "TIME") # Jobname: # check that it fits the rules match = re.search(r"([a-zA-Z]\S{0,14})", jobname) if match is not None: jobname = match.group(1) else: jobname = "UNICORE_job" submit_cmds.append("#PBS -N %s" % jobname) if queue != "NONE": submit_cmds.append("#PBS -q %s" % queue) if project != "NONE": submit_cmds.append("#PBS -A %s" % project) # Nodes / CPUs if nodes > 0: submit_cmds.append("#PBS -l nodes=%s:ppn=%s%s" % (nodes, processors_per_node, nodes_filter)) if req_time > 0: # Job time requirement. Wallclock time in seconds. submit_cmds.append("#PBS -l walltime=%s" % req_time) if email == "NONE": email = "n" else: email = "abe -M %s" % email submit_cmds.append("#PBS -m %s" % email) if reservation_id != "NONE": submit_cmds.append("#PBS -W x=FLAGS:ADVRES:%s" % reservation_id) if array_spec > 0: if array_limit > 0: array_spec = str(array_spec) + "%" + str(array_limit) submit_cmds.append("#PBS -t %s" % array_spec) submit_cmds.append( "UC_ARRAY_TASK_ID = \"$PBS_ARRAYID\"; export UC_ARRAY_TASK_ID") stdout = stdout + "$PBS_ARRAYID" stderr = stderr + "$PBS_ARRAYID" submit_cmds.append("#PBS -o %s/%s" % (outcome_dir, stdout)) submit_cmds.append("#PBS -e %s/%s" % (outcome_dir, stderr)) submit_cmds.append("#PBS -d %s" % uspace_dir) if umask is not None: submit_cmds.append("#PBS -W umask=%s" % umask) return submit_cmds
def parse_common_options(self, message, config, LOG): """ parse #TSI_" BSS parameters from the message to create options common to both sbatch and salloc """ cmds = [] email = extract_parameter(message, "EMAIL", "NONE") jobname = extract_parameter(message, "JOBNAME", config['tsi.default_job_name']) project = extract_parameter(message, "PROJECT", "NONE") memory = extract_number(message, "MEMORY") nodes = extract_number(message, "NODES") processors = extract_number(message, "PROCESSORS") processors_per_node = extract_number(message, "PROCESSORS_PER_NODE") total_processors = extract_number(message, "TOTAL_PROCESSORS") queue = extract_parameter(message, "QUEUE", "NONE") reservation_id = extract_parameter(message, "RESERVATION_REFERENCE", "NONE") req_time = extract_number(message, "TIME") nodes_filter = config.get("tsi.nodes_filter", "") user_nodes_filter = extract_parameter(message, "BSS_NODES_FILTER", "NONE") qos = extract_parameter(message, "QOS", "NONE") exclusive = extract_parameter(message, "SSR_EXCLUSIVE", "NONE") # jobname: check for illegal characters m = re.search(r"[^0-9a-zA-Z\.:.=~/]", jobname) if m is not None: jobname = "UNICORE_job" cmds.append("--job-name=%s" % jobname) if queue != "NONE": cmds.append("--partition=%s" % queue) if exclusive.lower() == "true": cmds.append("--exclusive") if project != "NONE": cmds.append("--account=%s" % project) # nodes count if nodes > 0: # Multiple node and/or processors cmds.append("--nodes=%s" % nodes) if processors_per_node > 0: cmds.append("--ntasks-per-node=%s" % processors_per_node) else: # request tasks and let Slurm figure out the nodes if total_processors > 0: cmds.append("--ntasks=%s" % total_processors) # nodes filter, can be both global and user defined if user_nodes_filter != "NONE": if nodes_filter != "": nodes_filter = nodes_filter + "&" + user_nodes_filter else: nodes_filter = user_nodes_filter if nodes_filter != "": cmds.append("--constraint=%s" % nodes_filter) if qos != "NONE": cmds.append("--qos=%s" % qos) if memory >= 0: # memory per node, '0' means that the job requests all of the memory on each node cmds.append("--mem=%s" % memory) if req_time > 0: # wall time. Convert to minutes, as accepted by SLURM (minimum 1 min) time_in_minutes = max(int(req_time / 60), 1) cmds.append("--time=%d" % time_in_minutes) if email != "NONE": cmds.append("--mail-user=%s" % email) cmds.append("--mail-type=ALL") if reservation_id != "NONE": cmds.append("--reservation=%s" % reservation_id) return cmds
def create_submit_script(self, message, config, LOG): """ parse the #TSI_" BSS parameters from the message and convert them to the proper BSS instructions. Returns the script to submit to the BSS (as a list of lines) """ submit_cmds = [] email = extract_parameter(message, "EMAIL", "NONE") jobname = extract_parameter(message, "JOBNAME", config['tsi.default_job_name']) outcome_dir = extract_parameter(message, "OUTCOME_DIR") project = extract_parameter(message, "PROJECT", "NONE") stderr = extract_parameter(message, "STDERR", "stderr") stdout = extract_parameter(message, "STDOUT", "stdout") umask = extract_parameter(message, "UMASK") memory = extract_number(message, "MEMORY") nodes = extract_number(message, "NODES") queue = extract_parameter(message, "QUEUE", "NONE") reservation_id = extract_parameter(message, "RESERVATION_REFERENCE", "NONE") req_time = extract_number(message, "TIME") # BlueGene topology topology = extract_parameter(message, "SSR_TOPOLOGY", "Either") # first line is shell submit_cmds.append("#/bin/sh") # Jobname: # check that it fits the rules match = re.search(r"([a-zA-Z]\S{0,14})", jobname) if match is not None: jobname = match.group(1) else: jobname = "UNICORE_job" submit_cmds.append("# @ job_name = %s" % jobname) if queue != "NONE": submit_cmds.append("# @ class = %s" % queue) if project != "NONE": submit_cmds.append("# @ account_no = %s" % project) # Blue Gene stuff submit_cmds.append("# @ job_type = bluegene") submit_cmds.append("# @ bg_connectivity = %s" % topology) if memory > 0: submit_cmds.append("# @ bg_requirements = (Memory>= %s)" % memory) if nodes > 0: submit_cmds.append("# @ bg_size = %s" % nodes) # Job time requirement. Wallclock time in seconds. submit_cmds.append("# @ cpu_limit = %s" % req_time) if email != "NONE": submit_cmds.append("# @ notification = always") submit_cmds.append("# @ notify_user = %s" % email) if reservation_id != "NONE": submit_cmds.append("# @ ll_res_id = %s" % reservation_id) submit_cmds.append("# @ output = %s/%s" % (outcome_dir, stdout)) submit_cmds.append("# @ error = %s/%s" % (outcome_dir, stderr)) if umask is not None: submit_cmds.append("umask %s" % umask) submit_cmds.append("# @ comment = UNICORE") return submit_cmds