def parse_version_tag(self, tag): for attr in tag.attrib: if attr not in self.valid_version_attributes: msg = "{}: Illegal version attribute '{}'\n\n{}".format( os.path.basename(self.xmlfile), attr, ET.tostring(tag)) raise civet_exceptions.ParseError(msg) if attr == 'directory': version = int(tag.attrib[attr]) if version not in self.valid_directory_versions: msg = "{}: Invalid directory version '{}'\n\n{}".format( os.path.basename(self.xmlfile), version, ET.tostring(tag)) raise civet_exceptions.ParseError(msg) self.directory_version = version
def set_output_dir(self): # Register at most one output directory if PipelineFile.output_dir: raise civet_exceptions.ParseError( "ERROR: only one directory can " "be marked as the default output directory") PipelineFile.output_dir = self
def file(self, e): atts = e.attrib fid = atts['id'] # Ensure that the id is unique. if fid in self.options: raise civet_exceptions.ParseError( "{}: file id duplicates an option" "name: ".format(os.path.basename(self.xml_file), fid)) if fid in self.tool_files: raise civet_exceptions.ParseError( "{}: file id is a duplicate: {}".format( os.path.basename(self.xml_file), fid)) PipelineFile.parse_xml(e, self.tool_files) # Track all the tool temporary files, so that we can # delete them at the end of the tool's execution. if self.tool_files[fid].is_temp: self.tempfile_ids.append(fid)
def parameter_to_path(self): if self.is_parameter: idx = self.path - 1 if idx >= len(PipelineFile.params): msg = ( "Parameter out of range, File: {} referenced parameter: " "{} (pipeline was passed {} parameters)").format( self.id, self.path, len(PipelineFile.params)) raise civet_exceptions.ParseError(msg) self.path = PipelineFile.params[idx] self.is_parameter = False
def apply_in_dir_and_create_temp(self, files, circularity): ind = self.in_dir if (not ind) and (not self.is_temp): return if ind: if ind not in files: msg = ("ERROR: while processing file with id: '{}', " "in_dir is unknown file: '{}'".format(self.id, ind)) raise civet_exceptions.ParseError(msg) indf = files[ind] indf.finalize_file(files, circularity) my_dir = indf.path else: my_dir = PipelineFile.get_output_dir() if self.is_list: self.path = my_dir elif self.is_temp and not self.path: # If it is an anonymous temp, we'll create it in # the proper directory if self._is_dir: self.path = tempfile.mkdtemp(dir=my_dir) else: t = tempfile.NamedTemporaryFile(dir=my_dir, delete=False) name = t.name t.close() self.path = name if ind: self.in_dir = None elif ind: if os.path.isabs(self.path): raise civet_exceptions.ParseError( "Can't combine 'in_dir' attribute with absolute path") # Apply the containing directory to the path... self.path = os.path.join(my_dir, self.path) # in_dir has been applied, clear it. self.in_dir = None
def finalize_file(self, files, circularity): """ Take care of all the inter-file dependencies such as in_dir and based_on, as well as files passed in as parameters. """ if self.finalized: return # detect dependency cycles if self in circularity: msg = "File dependency cycle detected processing '{}' ".format( self.id) for f in circularity: msg = msg + "\n" + str(f) msg = msg + "\n\n" + str(self) raise civet_exceptions.ParseError(msg) circularity.append(self) self.parameter_to_path() self.apply_from_file(files, circularity) self.apply_based_on(files, circularity) if not self.is_string: # might raise civet_exception.ParseError # to be handled at a higher level self.apply_in_dir_and_create_temp(files, circularity) try: self.finalize_path() except Exception as e: # this was inserted to help diagnosing a programming error that # caused finalize_path() to fail in certain edge cases. # if it ever happens again this would provide useful information # the user could share with us to help locate the problem sys.exit("ERROR CALLING finalize_path() for {}: {}\n" "{}".format(self.id, self.path, e)) self.finalized = True # Make sure a directory exists, unless explicitly requested # to not do so. if self._is_dir and self.create: utilities.make_sure_path_exists(self.path) check = circularity.pop() if check != self: print("circularity.pop() failed!\ncheck:{}".format(check), file=sys.stderr) print(" self:{}".format(self), file=sys.stderr) sys.exit(1)
def apply_based_on(self, files, circularity): if not self.based_on: return # the based_on attribute is the fid of another file # whose path we're going to mangle to create ours. # make sure this references an actual file id: if self.based_on not in files: msg = "ERROR: '{}' is based on unknown file: '{}'".format( self.id, self.based_on) raise civet_exceptions.ParseError(msg) bof = files[self.based_on] bof.finalize_file(files, circularity) if bof.list_from_param: # based on a filelist passed as a parameter, use first file in the # filelist path = bof.path.split(',')[0] else: path = bof.path # strip out any path - based_on only operates on filenames temp_path = os.path.basename(path) now = datetime.datetime.now() # do the replace first, so there is no chance other based_on # actions could affect the pattern matching if self.replace: temp_path = re.sub(self.pattern, self.replace, temp_path) if self.append: temp_path = temp_path + self.append if self.datestamp_append: temp_path += now.strftime(self.datestamp_append) if self.datestamp_prepend: temp_path = now.strftime(self.datestamp_prepend) + temp_path self.path = temp_path
def _parse_XML(self, xmlfile, params, skip_validation=False, queue=None, submit_jobs=True, completion_mail=True, search_path="", user_override_file=None, keep_temp=False, release_jobs=True, force_conditional_steps=False, delay=None, email_address=None, error_email_address=None, walltime_multiplier=1, write_pipeline_files=False, tool_exec_mode=ToolExecModes.BATCH_STANDARD, error_email=True, job_name_prefix="CIVET__"): try: pipe = ET.parse(xmlfile).getroot() except ET.ParseError as e: raise civet_exceptions.ParseError("XML ParseError when parsing {}: {}".format(xmlfile, e)) self.job_name_prefix = job_name_prefix if job_name_prefix is not None else "" # Register the directory of the master (pipeline) XML. # We'll use it to locate tool XML files. self.master_XML_dir = os.path.abspath(os.path.dirname(xmlfile)) # Save some that we only need for reporting use, later. self.xmlfile = xmlfile self.params = params self.search_path = search_path self.user_override_file = user_override_file # search path for tool XML files self.user_search_path = search_path self.default_tool_search_path = "" # option overrides self.option_overrides = {} # used while generating a summary of all file ids and their final path # for logging self.file_summary = {} override_file = os.path.splitext(xmlfile)[0] + '.options' if os.path.exists(override_file): self.parse_override_file(override_file, "pipeline") try: if user_override_file: self.parse_override_file(user_override_file, "user") except IOError as e: sys.exit("Error opening options file '{}':\n\t".format(user_override_file) + os.strerror(e.errno)) self.execution_mode = tool_exec_mode # Register the parameters that may be file paths PipelineFile.register_params(params) # The outermost tag must be pipeline; it must have a name # and must not have text if pipe.tag != "pipeline": raise civet_exceptions.ParseError("Outermost tag of pipeline definition must be <pipeline></pipeline>") if 'name' not in pipe.attrib: raise civet_exceptions.ParseError("<pipeline> 'name' attribute is required") self.name = pipe.attrib['name'] if pipe.text.strip(): raise civet_exceptions.ParseError("<pipeline> tag may not contain text") # We need to process all our files before we process anything # else. Stash anything not a file and process it in a later pass. pending = [] # Set up for some properties self._output_dir = None self._log_dir = None self._job_runner = None self.validation_file = os.path.splitext(xmlfile)[0] + '_validation.data' self.queue = queue self.submit_jobs = submit_jobs self.completion_mail = completion_mail self.error_email = error_email self.keep_temp = keep_temp self.release_jobs = release_jobs self.force_conditional_steps = force_conditional_steps self.skip_validation = skip_validation self.delay = delay self.walltime_multiplier = walltime_multiplier if email_address: self.email_address = os.path.expandvars(email_address) else: self.email_address = getpass.getuser() if error_email_address: self.error_email_address = os.path.expandvars(error_email_address) else: self.error_email_address = self.email_address if self.delay: try: hours, minutes = utilities.parse_delay_string(self.delay) except ValueError as e: message = "Error parsing delay parameter '{}'. {}".format(self.delay, e) sys.exit(message) self.delay_timestamp = datetime.datetime.now() + datetime.timedelta(hours=hours, minutes=minutes) if 'tool_search_path' in pipe.attrib: self.default_tool_search_path = pipe.attrib['tool_search_path'] if 'path' in pipe.attrib: path_dirs = [] file_dir = os.path.abspath(self.master_XML_dir) for d in pipe.attrib['path'].split(':'): if os.path.isabs(d): path_dirs.append(d) else: path_dirs.append(os.path.join(file_dir, d)) self.path = ':'.join(path_dirs) else: self.path = None self.display_name = pipe.attrib.get('display_name', None) # And track the major components of the pipeline self.description = None self._steps = [] self._files = {} self.foreach_barriers = {} self.foreach_tasks = {} # create some implicitly defined file IDs PipelineFile.add_simple_dir("PIPELINE_ROOT", self.master_XML_dir, self._files, input=True) # Walk the child tags. for child in pipe: t = child.tag if t not in Pipeline.valid_tags: msg = "{}: Illegal tag: {}".format(os.path.basename(self.xmlfile), t) raise civet_exceptions.ParseError(msg) if t == 'step' or t == 'foreach': pending.append(child) elif t == 'description': # currently only used by the Civet UI, ignored by the civet # framework, but we will make sure the tag only occurs once if self.description: raise civet_exceptions.ParseError("a pipeline can only contain one <description> tag") else: # since we aren't actually using the description, just # set it to True for now self.description = True else: # <file> <dir> <filelist> and <string> are all handled by PipelineFile PipelineFile.parse_xml(child, self._files) # Here we have finished parsing the files in the pipeline XML. # Time to fix up various aspects of files that need to have # all files done first. try: PipelineFile.finalize_file_paths(self._files) except civet_exceptions.ParseError as e: # add the xml file path to the exception message msg = "{}: {}".format(os.path.basename(self.xmlfile), e) raise civet_exceptions.ParseError(msg) if write_pipeline_files: sumarize_files(self._files, 'pipeline_files') with open(os.path.join(self.log_dir, "pipeline_files.json"), 'w') as f: f.write(json.dumps(self.file_summary, indent=4, sort_keys=True)) # Now that our files are all processed and fixed up, we can # process the rest of the XML involved with this pipeline. for child in pending: t = child.tag if t == 'step': self._steps.append(Step(child, self._files)) elif t == 'foreach': self._steps.append(ForEach(child, self._files))
def __init__(self, id, path, files, is_file=False, is_temp=False, is_input=False, is_dir=False, is_string=False, based_on=None, pattern=None, replace=None, append=None, datestamp_prepend=None, datestamp_append=None, in_dir=None, is_parameter=False, is_list=False, from_file=None, create=True, default_output=False, foreach_dep=None, description=None, paired=False): self.id = id self.path = path self.cloud_path = None self._is_file = is_file self.is_temp = is_temp self.is_input = is_input self._is_dir = is_dir self.is_string = is_string self.based_on = based_on self.pattern = pattern self.replace = replace self.append = append self.datestamp_prepend = datestamp_prepend self.datestamp_append = datestamp_append self.in_dir = in_dir self.is_parameter = is_parameter self.is_list = is_list self.create = create self.finalized = False self.creator_job = None self.consumer_jobs = [] self.foreach_dep = foreach_dep self.from_file = from_file self.description = description self.paired = paired # need a separate variable for this because is_parameter gets reset to # False once the param number -> value conversion happens self.list_from_param = True if is_list and is_parameter else False if self.id in files: # We've already seen this file ID. raise civet_exceptions.ParseError( "File with ID '{}' was already defined".format(self.id)) else: # Register this file in the files/options namespace files[self.id] = self # Mark this as the default output directory if necessary. if default_output: self.set_output_dir()
def parse_xml(e, files): import pipeline_parse as PL t = e.tag att = e.attrib # Make sure that we have the right kind of tag. if t not in PipelineFile.validFileTags: msg = "{}: Invalid tag '{}:'\n\n{}".format( os.path.basename(PL.xmlfile), t, ET.tostring(e)) raise civet_exceptions.ParseError(msg) # id attribute is required, make sure this id is not already # in use, or, if it is, that it has the same attributes. file_id = att['id'] # What kind of file... is_file = t == 'file' is_dir = t == 'dir' is_list = t == 'filelist' is_string = t == 'string' # Init some variables. path = None based_on = None pattern = None replace = None append = None datestamp_prepend = None datestamp_append = None is_parameter = False default_output = False foreach_dep = None from_file = None description = None paired = False # make sure that the attributes make sense with the type of tag we are if is_file: for a in att: if a not in PipelineFile.valid_common_attributes + PipelineFile.valid_file_attributes: msg = "Illegal pipeline file attribute: '{}'\n\n{}".format( a, ET.tostring(e)) raise civet_exceptions.ParseError(msg) elif is_dir: for a in att: if a not in PipelineFile.valid_common_attributes + PipelineFile.valid_dir_attributes: msg = "Illegal pipeline dir attribute: '{}'\n\n{}".format( a, ET.tostring(e)) raise civet_exceptions.ParseError(msg) if 'default_output' in att: default_output = att['default_output'].upper() == 'TRUE' if 'in_dir' in att: msg = ("Must not combine default_output and " "in_dir attributes.\n\n{}").format(ET.tostring(e)) raise civet_exceptions.ParseError(msg) if 'from_file' in att: from_file = att['from_file'] if 'filespec' in att: msg = ("Must not combine 'from_file' and " "'filespec'\n\n{}").format(ET.tostring(e)) raise civet_exceptions.ParseError(msg) valid_source = ['filespec', 'based_on', 'parameter', 'from_file'] if True not in [x in att for x in valid_source]: msg = ("dir tag must contain one of: {}" "\n\n{}").format(", ".join(valid_source), ET.tostring(e)) raise civet_exceptions.ParseError(msg) elif is_string: for a in att: if a not in PipelineFile.valid_common_attributes + PipelineFile.valid_string_attributes: msg = "Illegal pipeline string attribute '{}'\n\n{}".format( a, ET.tostring(e)) raise civet_exceptions.ParseError(msg) elif is_list: for a in att: if a not in PipelineFile.valid_list_attributes: msg = "Illegal pipeline filelist attribute '{}'\n\n{}".format( a, ET.tostring(e)) raise civet_exceptions.ParseError(msg) # What kind of file? is_temp = False if 'temp' in att: is_temp = att['temp'].upper() == 'TRUE' # Input? is_input = False if 'input' in att: is_input = att['input'].upper() == 'TRUE' # Create directory? create = False if is_dir: if 'create' in att: create = att['create'].upper() == 'TRUE' elif not is_input: # if the directory is not flagged as an input (should exist at # submit time) then we will make sure it exists. create = True in_dir = None if 'in_dir' in att: in_dir = att['in_dir'] if 'filespec' in att: path = att['filespec'] if 'value' in att: path = att['value'] if 'parameter' in att: if path: msg = ("Must not have both 'filespec' and 'parameter' " "attributes:\n\n{}").format(ET.tostring(e)) raise civet_exceptions.ParseError(msg) if in_dir: msg = ("Must not have both 'in_dir' and 'parameter' " "attributes:\n\n{}").format(ET.tostring(e)) raise civet_exceptions.ParseError(msg) path = int(att['parameter']) is_parameter = True if is_list and 'pattern' in att: pattern = att['pattern'] if 'foreach_id' in att: foreach_dep = att['foreach_id'] if 'based_on' in att: if path or from_file: msg = ("Must not combined 'based_on' with 'filespec', " "'parameter', or 'from_file' " "attributes:\n\n{}").format(ET.tostring(e)) raise civet_exceptions.ParseError(msg) based_on = att['based_on'] if 'pattern' in att: pattern = att['pattern'] if not 'replace' in att: msg = ("'pattern' attribute specified without 'replace' " "attribute:\n\n{}").format(ET.tostring(e)) raise civet_exceptions.ParseError(msg) replace = att['replace'] if 'datestamp_append' in att: datestamp_append = att['datestamp_append'] if 'datestamp_prepend' in att: datestamp_prepend = att['datestamp_prepend'] if 'append' in att: if datestamp_append: msg = ("'append' attribute is incompatible with " "'datestamp_append' attribute:" "\n\n{}").format(ET.tostring(e)) raise civet_exceptions.ParseError(msg) append = att['append'] if 'description' in att: description = att['description'] paired = att.get('paired', 'FALSE').upper() == 'TRUE' if is_list and not ((pattern and in_dir) or is_parameter): msg = ("'filelist' requires 'in_dir' and 'pattern' or it must be " "passed as a parameter\n\n{}".format(ET.tostring(e))) raise civet_exceptions.ParseError(msg) if is_list and pattern and is_input: msg = ("pattern based filelist may not be specified as " "input:\n\n{}").format(ET.tostring(e)) raise civet_exceptions.ParseError(msg) PipelineFile(file_id, path, files, is_file, is_temp, is_input, is_dir, is_string, based_on, pattern, replace, append, datestamp_prepend, datestamp_append, in_dir, is_parameter, is_list, from_file, create, default_output, foreach_dep, description, paired)
def parse_XML(self, xmlfile, params, skip_validation=False, queue=None, submit_jobs=True, completion_mail=True, search_path="", user_override_file=None, keep_temp=False, release_jobs=True, force_conditional_steps=False, delay=None, email_address=None, error_email_address=None, walltime_multiplier=1): pipe = ET.parse(xmlfile).getroot() # Register the directory of the master (pipeline) XML. # We'll use it to locate tool XML files. self.master_XML_dir = os.path.abspath(os.path.dirname(xmlfile)) # Save some that we only need for reporting use, later. self.xmlfile = xmlfile self.params = params self.search_path = search_path self.user_override_file = user_override_file # search path for tool XML files self.user_search_path = search_path self.default_tool_search_path = "" # option overrides self.option_overrides = {} override_file = os.path.splitext(xmlfile)[0] + '.options' if os.path.exists(override_file): self.parse_override_file(override_file, "pipeline") if user_override_file and os.path.exists(user_override_file): self.parse_override_file(user_override_file, "user") # Register the parameters that may be file paths PipelineFile.register_params(params) # The outermost tag must be pipeline; it must have a name # and must not have text if pipe.tag != "pipeline": raise civet_exceptions.ParseError( "Outermost tag of pipeline definition must be <pipeline></pipeline>" ) self.name = pipe.attrib['name'] if pipe.text.strip(): raise civet_exceptions.ParseError( "<pipeline> tag may not contain text") # We need to process all our files before we process anything # else. Stash anything not a file and process it in a later pass. pending = [] # Set up for some properties self._output_dir = None self._log_dir = None self._job_runner = None self.validation_file = os.path.splitext( xmlfile)[0] + '_validation.data' self.queue = queue self.submit_jobs = submit_jobs self.completion_mail = completion_mail self.keep_temp = keep_temp self.release_jobs = release_jobs self.force_conditional_steps = force_conditional_steps self.skip_validation = skip_validation self.delay = delay self.walltime_multiplier = walltime_multiplier if email_address: self.email_address = os.path.expandvars(email_address) else: self.email_address = getpass.getuser() if error_email_address: self.error_email_address = os.path.expandvars(error_email_address) else: self.error_email_address = self.email_address self.directory_version = 1 if self.delay: try: hours, minutes = utilities.parse_delay_string(self.delay) except ValueError as e: message = "Error parsing delay parameter '{}'. {}".format( self.delay, e) sys.exit(message) self.delay_timestamp = datetime.datetime.now( ) + datetime.timedelta(hours=hours, minutes=minutes) if 'tool_search_path' in pipe.attrib: self.default_tool_search_path = pipe.attrib['tool_search_path'] if 'path' in pipe.attrib: path_dirs = [] file_dir = os.path.abspath(self.master_XML_dir) for d in pipe.attrib['path'].split(':'): if os.path.isabs(d): path_dirs.append(d) else: path_dirs.append(os.path.join(file_dir, d)) self.path = ':'.join(path_dirs) else: self.path = None # And track the major components of the pipeline self._steps = [] self._files = {} self.foreach_barriers = {} # create some implicitly defined file IDs PipelineFile.add_simple_dir("PIPELINE_ROOT", self.master_XML_dir, self._files) # Walk the child tags. for child in pipe: t = child.tag if t not in Pipeline.valid_tags: msg = "{}: Illegal tag: {}".format( os.path.basename(self.xmlfile), t) raise civet_exceptions.ParseError(msg) if t == 'step' or t == 'foreach': pending.append(child) elif t == 'version': self.parse_version_tag(child) else: # <file> <dir> <filelist> and <string> are all handled by PipelineFile PipelineFile.parse_XML(child, self._files) # Here we have finished parsing the files in the pipeline XML. # Time to fix up various aspects of files that need to have # all files done first. try: PipelineFile.fix_up_files(self._files) except civet_exceptions.ParseError as e: # fix_up_files can throw a civet_exceptions.ParseError, however # it doesn't know what file it is in at the time, so we catch it # here, add the filename to the message, and raise an exception msg = "{}: {}".format(os.path.basename(self.xmlfile), e) raise civet_exceptions.ParseError(msg) # Now that our files are all processed and fixed up, we can # process the rest of the XML involved with this pipeline. for child in pending: t = child.tag if t == 'step': self._steps.append(Step(child, self._files)) elif t == 'foreach': self._steps.append(ForEach(child, self._files))
def __init__(self, e, tool): # Stash the options and tool_files dictionaries. We'll need # them to fix up the command lines. # tool is a reference to the tool object that will contain # this command. self.tool = tool self.options = tool.options self.tool_files = tool.tool_files self.version_command = None self.real_version_command = None self.if_exists_files = [] self.if_not_exists_files = [] # get current pipeline symbols import pipeline_parse as PL atts = e.attrib for a in atts: if a not in Command.validAtts: msg = "{}: Unknown attribute in command tag: {}\n\n{}".format( os.path.basename(tool.xml_file), a, ET.tostring(e)) raise civet_exceptions.ParseError(msg) # The program attribute is required. The remainder are optional. try: self.program = atts['program'] except KeyError: msg = "{}: program attribute is required for <command> tag.\n\n{}" raise civet_exceptions.ParseError( msg.format(tool.xml_file, ET.tostring(e))) # Delimiters are optional (and unusual!) if 'delimiters' in atts: self.delims = atts['delimiters'] if len(self.delims) != 2: msg = "{}: command tag delimiters must be exactly two characters.\n\n{}".format( os.path.basename(tool.xml_file), ET.tostring(e)) raise civet_exceptions.ParseError(msg) else: self.delims = '{}' delim_1 = self.delims[0] delim_2 = self.delims[1] if delim_1 in '|()': delim_1 = '\\' + delim_1 if delim_2 in '|()': delim_2 = '\\' + delim_2 self.replacePattern = re.compile(delim_1 + '(.*?)' + delim_2) # Capture desired output redirection if 'stdout_id' in atts: self.stdout_id = atts['stdout_id'] else: self.stdout_id = None if 'stderr_id' in atts: self.stderr_id = atts['stderr_id'] else: self.stderr_id = None if 'if_exists' in atts and not PL.force_conditional_steps: for f in atts['if_exists'].split(','): f = f.strip() if f not in self.tool_files: msg = "{}: unknown file ID in command 'if_exists' attribute: {}\n\n{}".format( os.path.basename(tool.xml_file), f, ET.tostring(e)) raise civet_exceptions.ParseError(msg) self.if_exists_files.append(self.tool_files[f].path) if 'if_not_exists' in atts and not PL.force_conditional_steps: for f in atts['if_not_exists'].split(','): f = f.strip() if f not in self.tool_files: msg = "{}: unknown file ID in command 'if_not_exists' attribute: {}\n\n{}".format( os.path.basename(tool.xml_file), f, ET.tostring(e)) raise civet_exceptions.ParseError(msg) self.if_not_exists_files.append(self.tool_files[f].path) if 'if_exists_logic' in atts: logic_type = atts['if_exists_logic'].strip().upper() if logic_type not in ['AND', 'OR']: msg = "{}: value of 'if_exists_logic' must be 'AND' or 'OR'\n\n{}".format( os.path.basename(tool.xml_file), ET.tostring(e)) raise civet_exceptions.ParseError(msg) self.if_exists_logic = logic_type else: self.if_exists_logic = 'AND' # The command text can be either in the command element's text, # or as the "tail" of the child <version_command> tag. Sigh. # Therefore we'll process it in parts. if e.text: command_text = e.text else: command_text = "" # Only allow one child in a Command tag child_found = False for child in e: if child_found: msg = "{}: only one subtag allowed in command tag:\n\n{}".format( os.path.basename(tool.xml_file), ET.tostring(e)) raise civet_exceptions.ParseError(msg) child_found = True t = child.tag if t != 'version_command': msg = "{}: unknown child tag '{}' in command:\n\n{}".format( os.path.basename(tool.xml_file), t, ET.tostring(e)) raise civet_exceptions.ParseError(msg) self.version_command = re.sub('\s+', ' ', child.text).strip() # Get any command text that the parser considers part of this # child. if child.tail: command_text += child.tail # Strip out excess white space in the command if command_text: self.command_template = re.sub('\s+', ' ', command_text).strip() else: self.command_template = '' tool.commands.append(self)
def __init__(self, xml_file, ins, outs, pipeline_files, name, walltime, tool_config_prefix, step_name): # Don't understand why this has to be here as well to get some # symbols. But it seems to be needed. import pipeline_parse as PL self.options = {} self.commands = [] self.tempfile_ids = [] self.ins = ins self.outs = outs self.skip_validation = PL.skip_validation self.option_overrides = {} self.thread_option_max = 0 self.modules = list(config.default_modules) self.name_from_pipeline = name self.verify_files = [] self.tool_files = {} self.pipeline_files = pipeline_files self.step_name = step_name self.docker_image = None # check the search path for the XML file, otherwise fall back to # the same directory as the pipeline XML. CLIA pipelines do not pass # in a search path, so the tool XML needs to be in the same directory # as the pipeline XML self.xml_file = self.search_for_xml(xml_file) if not self.xml_file: msg = "Could not find tool XML file: {0}".format(xml_file) raise civet_exceptions.ParseError(msg) bad_inputs = [] bad_outputs = [] # ADD PIPELINE_ROOT to tool_files self.tool_files['PIPELINE_ROOT'] = pipeline_files['PIPELINE_ROOT'] for n in range(len(ins)): try: f = pipeline_files[ins[n]] self.tool_files['in_' + str(n + 1)] = f except KeyError as e: bad_inputs.append(ins[n]) for n in range(len(outs)): try: f = pipeline_files[outs[n]] self.tool_files['out_' + str(n + 1)] = f except KeyError as e: bad_outputs.append(outs[n]) msg = ["{}: Tool input error".format(self.name_from_pipeline)] for f in bad_inputs: msg.append("\tunknown file ID: {}".format(f)) for f in bad_outputs: msg.append("\tunknown file ID: {}".format(f)) if bad_inputs or bad_outputs: raise civet_exceptions.ParseError("\n".join(msg)) # Verify that the tool definition file has not changed. self.verify_files.append(os.path.abspath(self.xml_file)) try: tool = ET.parse(self.xml_file).getroot() except ET.ParseError as e: raise civet_exceptions.ParseError( "XML ParseError when parsing {}: {}".format(xml_file, e)) atts = tool.attrib # Validate the attributes for a in atts: if a not in Tool.validAtts: msg = ("Unknown attribute in tool '{}': {}\n" "Valid Attributes: '{}'".format( self.name_from_pipeline, a, ", ".join(Tool.validAtts))) raise civet_exceptions.ParseError(msg) # The name attribute is required. All others are optional. try: self.name = atts['name'].replace(' ', '_') except KeyError: raise civet_exceptions.ParseError( "'{}' is mising required attribute 'name'".format( os.path.basename(self.xml_file))) if 'error_strings' in atts: self.error_strings = [] # The error strings are a comma-sep list of strings # to search for. Spaces have to be quoted or escaped. estrings = atts['error_strings'].split(',') for es in estrings: self.error_strings.append(es.strip()) else: self.error_strings = None if tool_config_prefix: self.config_prefix = tool_config_prefix elif 'tool_config_prefix' in atts: self.config_prefix = atts['tool_config_prefix'] else: self.config_prefix = None if self.config_prefix in PL.option_overrides: self.option_overrides = PL.option_overrides[self.config_prefix] if 'threads' in atts: try: self.default_threads = int(atts['threads']) except ValueError: msg = "{}: tool threads attribute must be an integer. Value was '{}'".format( os.path.basename(self.xml_file), atts['threads']) raise civet_exceptions.ParseError(msg) else: self.default_threads = 1 if 'walltime' in self.option_overrides: # walltime set in an option override file takes the highest priority self.walltime = self.option_overrides['walltime'][0].replace( '"', '') elif walltime: # did we pass a walltime as a parameter? this would be set in the # <tool> tag in the pipeline and should have priority over the # walltime in the tool XML file self.walltime = walltime elif 'walltime' in atts: # walltime set in the tool's XML file self.walltime = atts['walltime'] else: # no walltime set anywhere. Use a Civet default walltime. self.walltime = BatchJob.DEFAULT_WALLTIME if 'exit_if_exists' in atts and not PL.force_conditional_steps: # this is going to have to be fixed later, since it may contain # files that need to be expanded to a real path self.exit_if_exists = atts['exit_if_exists'] else: self.exit_if_exists = None if 'exit_test_logic' in atts: #if this is invalid, then BatchJob __init__() will throw a ValueError #should be "and" or "or" (case insensitive) self.exit_test_logic = atts['exit_test_logic'] if self.exit_test_logic.upper() not in ['AND', 'OR']: msg = "'{}': exit_test_logic attribute must be 'AND' or 'OR' (case insensitive). Value was: {}".format( os.path.basename(self.xml_file), self.exit_test_logic) raise civet_exceptions.ParseError(msg) else: self.exit_test_logic = "AND" #default to AND if 'mem' in atts: self.mem = atts['mem'] if not self.mem.isdigit() and self.mem <= 0: msg = "'{}': mem attribute must be a positive integer: {}".format( os.path.basename(self.xml_file), self.mem) raise civet_exceptions.ParseError(msg) else: self.mem = None if 'path' in atts: path_dirs = [] file_dir = os.path.dirname(os.path.abspath(self.xml_file)) for d in atts['path'].split(':'): if os.path.isabs(d): path_dirs.append(d) else: path_dirs.append(os.path.join(file_dir, d)) self.path = ':'.join(path_dirs) else: self.path = None # We can't process any non-file tags until all our files # are processed and fixed up. Rather than force an order # in the user's file, we simply stash the other tags in # a "pending tags" list. pending = [] # Now process our child tags for child in tool: t = child.tag if t not in Tool.validTags: msg = "'{}': Unknown tag {}\n\n{}".format( os.path.basename(self.xml_file), t, ET.tostring(child)) raise civet_exceptions.ParseError(msg) if t == 'file' or t == 'dir' or t == 'string': # Register the file in the tool's file dictionary self.file(child) else: pending.append(child) # Now we can fix up our files. try: PipelineFile.finalize_file_paths(self.tool_files) except civet_exceptions.ParseError as e: # finalize_file_paths can throw a civet_exceptions.ParseError, however # it doesn't know what file it is in at the time, so we catch it # here, add the filename to the message, and raise an exception msg = "{}: {}".format(os.path.basename(self.xml_file), e) raise civet_exceptions.ParseError(msg) sumarize_files(self.tool_files, self.name_from_pipeline) # Now we can process self.exit_if_exists if self.exit_if_exists: files_to_test = [] for f in self.exit_if_exists.split(","): f = f.strip() if f not in self.tool_files: raise civet_exceptions.ParseError( "unkown file ID in exit_if_exists attribute: {}". format(f)) files_to_test.append(self.tool_files[f].path) self.exit_if_exists = files_to_test # Now, finally, we can process the rest of the tags. for child in pending: t = child.tag if t == 'description': # This one is so simple we process it inline here, instead of # having a different class to process it. self.description = child.text elif t == 'option': Option(child, self) elif t == 'command': Command(child, self) elif t == 'module': if child.text not in self.modules: self.modules.append(child.text) elif t == 'validate': a = child.attrib if 'id' in a: try: name = self.tool_files[a['id']].path except KeyError: msg = "{}: Unknown file ID '{}'\n\n{}".format( os.path.basename(self.xml_file), a['id'], ET.tostring(child)) raise civet_exceptions.ParseError(msg) # If not key error; let the exception escape. else: name = child.text self.verify_files.append(name) else: print('Unprocessed tag:' + t, file=sys.stderr) # Do we need to adjust the walltime? if PL.walltime_multiplier > 0 and PL.walltime_multiplier != 1: self.walltime = BatchJob.adjust_walltime(self.walltime, PL.walltime_multiplier)
def __init__(self, e, tool): # valid attributes for the <option> tag valid_attributes = [ 'name', 'command_text', 'value', 'from_file', 'threads', 'binary', 'type', 'display_name', 'description' ] valid_types = [ 'string', 'numeric', 'select', 'boolean', 'threads', 'file', 'directory' ] # valid child tags valid_tags = ['select'] # these attributes are still valied, but are deprecated. we will warn # about them deprecated_attributes = { 'threads': 'use type="threads" instead', 'binary': 'use type="boolean" instead' } self.command_text = '' self.value = '' self.select_choices = [] self.type = None select_default = None if 'name' not in e.attrib: raise civet_exceptions.ParseError( "{}: option missing required 'name' attribute".format( os.path.basename(tool.xml_file))) name = e.attrib['name'].strip() self.name = name # We don't allow the same option name in a tool twice if self.name in tool.options: msg = "{}: Option {} is a duplicate".format( os.path.basename(tool.xml_file), self.name) raise civet_exceptions.ParseError(msg) if self.name in tool.tool_files: msg = "{}: Option {} is a duplicate of a file ID".format( os.path.basename(tool.xml_file), self.name) raise civet_exceptions.ParseError(msg) for attr in e.attrib: if attr not in valid_attributes: msg = ("{}: Unknown attribute in option '{}': {}\n" "Valid Attributes: '{}'".format( os.path.basename(tool.xml_file), self.name, attr, ", ".join(valid_attributes))) raise civet_exceptions.ParseError(msg) for attr in e.attrib: if attr in deprecated_attributes.keys(): print("Warning {}::{}::{}:\n" "\tdeprecated attribute '{}' in option '{}'\n" "\t{}".format(tool.step_name, tool.name_from_pipeline, os.path.basename(tool.xml_file), attr, self.name, deprecated_attributes[attr]), file=sys.stderr) # these are optional attributes, currently only used by the civet-ui # project, not by the civet framework itself self.display_name = e.attrib.get('display_name', self.name) self.description = e.attrib.get('description', self.name) # don't allow mixing newer "type" attribute with "threads", # or "binary" attributes. they can conflict with "type". # binary and threads are deprecated. type="boolean" or type="threads" # should be used instead. # also 'from_file' can't be combined with type if 'type' in e.attrib and ('from_file' in e.attrib or 'threads' in e.attrib or 'binary' in e.attrib): msg = ("Error in option '{}:{}':\n" "Can not combine 'type' attribute with 'from_file', " "'threads', or 'binary' attributes\n".format( tool.name_from_pipeline, self.name)) raise civet_exceptions.ParseError(msg) if 'type' in e.attrib: if e.attrib['type'] not in valid_types: msg = ("Unknown option type {} in '{}:{}'\n" "Valid types: {}".format(e.attrib['type'], tool.name_from_pipeline, self.name, ", ".join(valid_types))) raise civet_exceptions.ParseError(msg) self.type = e.attrib['type'] elif 'threads' in e.attrib and e.attrib['threads'].upper() == 'TRUE': self.type = 'threads' elif 'binary' in e.attrib: self.type = 'boolean' # some attributes/types are mutually exclusive if self.type == 'boolean' and 'from_file' in e.attrib: msg = ("{}: Option {}: from_file attribute can not be combined " "with 'boolean' type\n\n{}".format( os.path.basename(tool.xml_file), self.name, ET.tostring(e))) raise civet_exceptions.ParseError(msg) if 'binary' in e.attrib and 'threads' in e.attrib: msg = ("{}: Option {}: binary and threads attributes are mutually" " exclusive\n\n{}".format(os.path.basename(tool.xml_file), self.name, ET.tostring(e))) raise civet_exceptions.ParseError(msg) if 'value' in e.attrib and 'from_file' in e.attrib: msg = ("{}: Option {}: value and from_file attributes are mutually" " exclusive\n\n{}".format(os.path.basename(tool.xml_file), self.name, ET.tostring(e))) raise civet_exceptions.ParseError(msg) if 'value' in e.attrib and self.type == 'threads': msg = ("{}: Option {}: value and threads are mutually" " exclusive\n\n{}".format(os.path.basename(tool.xml_file), self.name, ET.tostring(e))) raise civet_exceptions.ParseError(msg) if 'from_file' in e.attrib and self.type == 'threads': msg = ( "{}: Option {}: from_file and threads attributes are mutually" " exclusive\n\n{}".format(os.path.basename(tool.xml_file), self.name, ET.tostring(e))) raise civet_exceptions.ParseError(msg) if 'command_text' in e.attrib: self.command_text = e.attrib['command_text'].strip() if 'value' in e.attrib: if name in tool.option_overrides: self.value = tool.option_overrides[name][0] else: self.value = e.attrib['value'].strip() elif 'from_file' in e.attrib: fid = e.attrib['from_file'] try: fn = tool.tool_files[fid].path except KeyError: msg = "{}: Unknown file ID '{}' in option 'from_file' attribute:\n\n{}".format( os.path.basename(tool.xml_file), fid, ET.tostring(e)) raise civet_exceptions.ParseError(msg) self.value = '$(cat ' + fn + ') ' elif self.type == 'threads': if name in tool.option_overrides: try: self.value = int(tool.option_overrides[name][0]) except ValueError: msg = "{}: Invalid value for option override '{}' (must be integer): {}".format( os.path.basename(tool.xml_file), name, tool.option_overrides[name][0]) raise civet_exceptions.ParseError(msg) else: self.value = tool.default_threads if self.value > tool.thread_option_max: tool.thread_option_max = self.value # now that we've made sure it is an integer and we've set # thread_option_max we need to turn it back into a string for # substitution in the command line self.value = str(self.value) # process child tags, this needs to be done after we process the # type attribute for child in e: t = child.tag if t not in valid_tags: msg = ("{}: unknown child tag '{}' in option '{}'\n" "Valid child tags: '{}'").format( os.path.basename(tool.xml_file), t, self.name, ", ".join(valid_tags)) raise civet_exceptions.ParseError(msg) if t == 'select': # option contains <select> child tag(s) if self.type != 'select': msg = ("{}: Option tag {} contains <select> tags, but its " "type attribute is not 'select'").format( os.path.basename(tool.xml_file), self.name) raise civet_exceptions.ParseError(msg) select_value = child.text.strip() self.select_choices.append(select_value) if 'default' in child.attrib and utilities.eval_boolean_string( child.attrib['default']): if not select_default: select_default = select_value else: msg = ("{}: Option tag {} has more than one one " "default <select> tag").format( os.path.basename(tool.xml_file), self.name) raise civet_exceptions.ParseError(msg) if self.type == 'select': # if self.type == 'select' we need to do a little checking # first make sure the pipeline developer included at least one <select> # tag if not self.select_choices: msg = ("{}\nSelect option '{}.{}' must include one or more " "<select> tags.").format( os.path.basename(tool.xml_file), tool.config_prefix, self.name) raise civet_exceptions.ParseError(msg) if name in tool.option_overrides: self.value = tool.option_overrides[name][0] # make sure the value is one of the valid options if self.value not in self.select_choices: msg = ("{}\n" "invalid value '{}' for option '{}.{}'.\n" "value must be one the specified choices:\n" "\t{}").format( os.path.basename(tool.xml_file), self.value, tool.config_prefix, self.name, ', '.join([ "'{}'".format(c) for c in self.select_choices ])) raise civet_exceptions.ParseError(msg) elif select_default: self.value = select_default else: self.value = self.select_choices[0] if self.type == 'boolean': if self.value.upper() == 'TRUE' or self.value == '1': self.value = True elif self.value.upper() == 'FALSE' or self.value == '0': self.value = False else: msg = "{}: invalid value '{}' for boolean option, must be 'true' or 'false'\n\n{}".format( os.path.basename(tool.xml_file), self.value, ET.tostring(e)) raise civet_exceptions.ParseError(msg) if self.type == 'file' or self.type == 'directory': # this option type specifies a path to an existing file # at least make sure it exists self.value = os.path.abspath(self.value) if not os.path.exists(self.value): msg = "{}: Option {} -- {} '{}' does not exist".format( os.path.basename(tool.xml_file), self.name, self.type, self.value) raise civet_exceptions.ParseError(msg) # TODO do some validation of other types (like numeric) to make sure # the values make sense tool.options[name] = self
def fix_up_file(self, files, circularity): """ Take care of all the inter-file dependencies such as in_dir and based_on, as well as files passed in as parameters. """ import pipeline_parse as PL if self._is_fixed_up: return # detect dependency cycles if self in circularity: msg = "File dependency cycle detected processing '{}' ".format( self.id) for f in circularity: msg = msg + "\n" + str(f) msg = msg + "\n\n" + str(self) raise civet_exceptions.ParseError(msg) circularity.append(self) self.parameter_to_path() self.apply_from_file(files, circularity) self.apply_based_on(files, circularity) if self is PipelineFile.output_dir: if PL.directory_version == 2: stamp_dir = "{0}-{1}".format( datetime.datetime.now().strftime('%Y%m%d_%H%M%S'), os.getpid()) self.path = os.path.join(self.path, stamp_dir) utilities.make_sure_path_exists(self.path) else: # might raise civet_exception.ParseError, to be handled at a higher level self.apply_in_dir_and_create_temp(files, circularity) # Turn all the paths into an absolute path, so changes in # working directory throughout the pipeline lifetime don't # foul us up. First check if the file doesn't have a path at all # i.e., just a filename. If so, and it is not an input file, # place it in the output directory. if self.is_list: if self.in_dir: #filelist is comprised of a directory and pattern, #convert the directory to an absolute path self.in_dir = os.path.abspath(files[self.in_dir].path) elif self.list_from_param: # file list is passed as a parameter, might be comma delimited # convert paths in list to absolute path file_list = [] for f in self.path.split(','): file_list.append(os.path.abspath(f)) self.path = ','.join(file_list) elif not self.is_string: path = self.path if (os.path.split(path)[0] == '' and (not self.is_input) and self != PipelineFile.output_dir and (PipelineFile.output_dir is None or PipelineFile.output_dir._is_fixed_up)): path = os.path.join(PipelineFile.get_output_dir(), path) self.path = os.path.abspath(path) self._is_fixed_up = True # Make sure a directory exists, unless explicitly requested # to not do so. if self._is_dir and self.create: utilities.make_sure_path_exists(self.path) check = circularity.pop() if check != self: print("circularity.pop() failed!\ncheck:{}".format(check), file=sys.stderr) print(" self:{}".format(self), file=sys.stderr) sys.exit(1)
def __init__(self, e, tool): self.command_text = '' self.value = '' self.binary = False name = e.attrib['name'].strip() self.name = name if 'command_text' in e.attrib: self.command_text = e.attrib['command_text'].strip() if 'value' in e.attrib: if name in tool.option_overrides: value = tool.option_overrides[name][0] else: value = e.attrib['value'].strip() elif 'from_file' in e.attrib: fid = e.attrib['from_file'] try: fn = tool.tool_files[fid].path except KeyError: msg = "{}: Unknown file ID '{}' in option 'from_file' attribute:\n\n{}".format( os.path.basename(tool.xml_file), fid, ET.tostring(e)) raise civet_exceptions.ParseError(msg) value = '$(cat ' + fn + ') ' elif 'threads' in e.attrib and e.attrib['threads'].upper() == 'TRUE': if name in tool.option_overrides: try: value = int(tool.option_overrides[name][0]) except ValueError: msg = "{}: Invalid value for option override '{}' (must be integer): {}".format( os.path.basename(tool.xml_file), name, tool.option_overrides[name][0]) raise civet_exceptions.ParseError(msg) else: value = tool.default_threads if value > tool.thread_option_max: tool.thread_option_max = value # now that we've made sure it is an integer and we've set # thread_option_max we need to turn it back into a string for # substitution in the command line value = str(value) if 'binary' in e.attrib: self.binary = True if value.upper() == 'TRUE' or value == '1': value = True elif value.upper() == 'FALSE' or value == '0': value = False else: msg = "{}: invalid value '{}' for binary option, must be 'True' or 'False'\n\n{}".format( os.path.basename(tool.xml_file), value, ET.tostring(e)) raise civet_exceptions.ParseError(msg) self.isFile = False self.value = value # We don't allow the same option name in a tool twice if self.name in tool.options: msg = "{}: Option {} is a duplicate".format( os.path.basename(tool.xml_file), self.name) raise civet_exceptions.ParseError(msg) if self.name in tool.tool_files: msg = "{}: Option {} is a duplicate of a file ID".format( os.path.basename(tool.xml_file), self.name) raise civet_exceptions.ParseError(msg) # some attributes are mutually exclusive if 'binary' in e.attrib and 'from_file' in e.attrib: msg = ( "{}: Option {}: binary and from_file attributes are mutually" " exclusive\n\n{}".format(os.path.basename(tool.xml_file), self.name, ET.tostring(e))) raise civet_exceptions.ParseError(msg) if 'binary' in e.attrib and 'threads' in e.attrib: msg = ("{}: Option {}: binary and threads attributes are mutually" " exclusive\n\n{}".format(os.path.basename(tool.xml_file), self.name, ET.tostring(e))) raise civet_exceptions.ParseError(msg) if 'value' in e.attrib and 'from_file' in e.attrib: msg = ("{}: Option {}: value and from_file attributes are mutually" " exclusive\n\n{}".format(os.path.basename(tool.xml_file), self.name, ET.tostring(e))) raise civet_exceptions.ParseError(msg) if 'value' in e.attrib and 'threads' in e.attrib: msg = ("{}: Option {}: value and threads attributes are mutually" " exclusive\n\n{}".format(os.path.basename(tool.xml_file), self.name, ET.tostring(e))) raise civet_exceptions.ParseError(msg) if 'from_file' in e.attrib and 'threads' in e.attrib: msg = ( "{}: Option {}: from_file and threads attributes are mutually" " exclusive\n\n{}".format(os.path.basename(tool.xml_file), self.name, ET.tostring(e))) raise civet_exceptions.ParseError(msg) tool.options[name] = self