def from_dict(self, doc, identifier=None, base_dir=None, validate=True): """Create an instance of the benchmark template from a dictionary serialization. Expects a dictionary that contains the three top-level elements of the template handle plus the 'result' schema. Parameters ---------- dict: dict Dictionary serialization of a workflow template identifier: string, optional Unique template identifier. This value will override the value in the document. base_dir: string, optional Optional path to directory on disk that contains static files that are required to run the represented workflow. This value will override the value in the document. validate: bool, optional Flag indicating if given template parameter declarations are to be validated against the parameter schema or not. Returns ------- benchtmpl.workflow.benchmark.base.BenchmarkTemplate Raises ------ benchtmpl.error.InvalidTemplateError benchtmpl.error.UnknownParameterError """ # Ensure that the mandatory elements are present if not LABEL_RESULTS in doc: raise err.InvalidTemplateError( 'missing element \'{}\''.format(LABEL_RESULTS)) # Get handle for workflow template from super class template = super(BenchmarkTemplateLoader, self).from_dict(doc=doc, identifier=identifier, base_dir=base_dir, validate=validate) # Get schema object from serialization try: schema = BenchmarkResultSchema.from_dict(doc[LABEL_RESULTS]) except ValueError as ex: raise err.InvalidTemplateError(str(ex)) return BenchmarkTemplate(identifier=template.identifier, base_dir=template.base_dir, workflow_spec=template.workflow_spec, parameters=template.parameters.values(), schema=schema)
def from_dict(self, doc, identifier=None, base_dir=None, validate=True): """Create an instance of the template handle for a dictionary serialization. The expected three top-level elements of the dictionary are 'workflow', 'id', and 'parameters'. The last two elements are optional. Parameters ---------- dict: dict Dictionary serialization of a workflow template identifier: string, optional Unique template identifier. This value will override the value in the document. base_dir: string, optional Optional path to directory on disk that contains static files that are required to run the represented workflow. This value will override the value in the document. validate: bool, optional Flag indicating if given template parameter declarations are to be validated against the parameter schema or not. Returns ------- benchtmpl.workflow.template.base.TemplateHandle Raises ------ benchtmpl.error.InvalidTemplateError benchtmpl.error.UnknownParameterError """ # Ensure that the mandatory elements are present if not LABEL_WORKFLOW in doc: raise err.InvalidTemplateError( 'missing element \'{}\''.format(LABEL_WORKFLOW)) workflow_spec = doc[LABEL_WORKFLOW] # Add given parameter declarations to the parameter list. Ensure that # all default values are set parameters = dict() if LABEL_PARAMETERS in doc: parameters = putil.create_parameter_index(doc[LABEL_PARAMETERS], validate=validate) # Ensure that the workflow specification does not reference undefined # parameters if validate flag is True. if validate: for key in tmpl.get_parameter_references(workflow_spec): if not key in parameters: raise err.UnknownParameterError(key) # Get identifier if present in document if LABEL_ID in doc: identifier = doc[LABEL_ID] return tmpl.TemplateHandle(identifier=identifier, base_dir=base_dir, workflow_spec=workflow_spec, parameters=list(parameters.values()))
def create_parameter_index(parameters, validate=True): """Create instances of template parameters from a list of dictionaries containing parameter declarations. The result is a dictionary containing the top-level parameters, indexed by their unique identifier. Parameters ---------- parameters: list(dict) List of dictionaries containing template parameter declarations validate: bool, optional Flag indicating if given template parameter declarations are to be validated against the parameter schema or not. Returns ------- dict(benchtmpl.workflow.parameter.base.TemplateParameter) Raises ------ benchtmpl.error.InvalidTemplateError benchtmpl.error.UnknownParameterError """ result = dict() for para in parameters: # Validate the template parameters if the validate flag is True if validate: pd.validate_parameter(para) # Create a TemplateParameter instance for the parameter. Keep # track of children for parameter that are of type DT_LIST or # DT_RECORD. Children are added after all parameters have been # instantiated. p_id = para[pd.LABEL_ID] # Ensure that the identifier of all parameters are unique if p_id in result: raise err.InvalidTemplateError('parameter \'{}\' not unique'.format(p_id)) c = None if para[pd.LABEL_DATATYPE] in [pd.DT_LIST, pd.DT_RECORD]: c = list() tp = TemplateParameter(pd.set_defaults(para), children=c) result[p_id] = tp # Add parameter templates to the list of children for their # respective parent (if given). We currently only support one level # of nesting. for para in parameters: if pd.LABEL_PARENT in para: p_id = para[pd.LABEL_ID] parent = para[pd.LABEL_PARENT] if not parent is None: result[parent].add_child(result[p_id]) return result
def get_parameter_references(spec, parameters=None): """Get set of parameter identifier that are referenced in the given workflow specification. Adds parameter identifier to the given parameter set. Parameters ---------- spec: dict Parameterized workflow specification parameters: set, optional Result set of parameter identifier Returns ------- set Raises ------ benchtmpl.error.InvalidTemplateError """ # The new object will contain the modified workflow specification if parameters is None: parameters = set() for key in spec: val = spec[key] if isinstance(val, str): # If the value is of type string we test whether the string is a # reference to a template parameter if is_parameter(val): # Extract variable name. parameters.add(get_parameter_name(val)) elif isinstance(val, dict): # Recursive call to get_parameter_references get_parameter_references(val, parameters=parameters) elif isinstance(val, list): for list_val in val: if isinstance(list_val, str): # Get potential references to template parameters in # list elements of type string. if is_parameter(list_val): # Extract variable name. parameters.add(get_parameter_name(list_val)) elif isinstance(list_val, dict): # Recursive replace for dictionaries get_parameter_references(list_val, parameters=parameters) elif isinstance(list_val, list): # We currently do not support lists of lists raise err.InvalidTemplateError( 'nested lists not supported') return parameters
def replace_args(spec, arguments, parameters): """Replace template parameter references in the workflow specification with their respective values in the argument dictionary or their defined default value. The type of the result is depending on the type of the spec object Returns a modified dictionary. Parameters ---------- spec: any Parameterized workflow specification arguments: dict(benchtmpl.workflow.parameter.value.TemplateArgument) Dictionary that associates template parameter identifiers with argument values parameters: dict(benchtmpl.workflow.parameter.base.TemplateParameter) Dictionary of parameter declarations Returns ------- type(spec) Raises ------ benchtmpl.error.InvalidTemplateError """ if isinstance(spec, dict): # The new object will contain the modified workflow specification obj = dict() for key in spec: obj[key] = replace_args(spec[key], arguments, parameters) elif isinstance(spec, list): obj = list() for val in spec: if isinstance(val, list): # We currently do not support lists of lists raise err.InvalidTemplateError('nested lists not supported') obj.append(replace_args(val, arguments, parameters)) elif isinstance(spec, str) or isinstance(spec, basestring): obj = replace_value(spec, arguments, parameters) else: obj = spec return obj
def get_commands(template, arguments): """Get expanded commands from template workflow specification. In this simple implementations the commands within each step of the workflow specification are expanded for the given set of arguments and appended to the result list of commands. Parameters ---------- template: benchtmpl.workflow.template.base.TemplateHandle Workflow template containing the parameterized specification and the parameter declarations arguments: dict(benchtmpl.workflow.parameter.value.TemplateArgument) Dictionary of argument values for parameters in the template Returns ------- list(string) Raises ------ benchtmpl.error.InvalidTemplateError """ spec = template.workflow_spec # Get the input/parameters dictionary from the workflow specification and # replace all references to template parameters with the given arguments # or default values workflow_parameters = tmpl.replace_args(spec=spec.get('inputs', {}).get( 'parameters', {}), arguments=arguments, parameters=template.parameters) # Add all command stings in workflow steps to result after replacing # references to parameters result = list() steps = spec.get('workflow', {}).get('specification', {}).get('steps', []) for step in steps: for command in step.get('commands', []): try: result.append( Template(command).substitute(workflow_parameters)) except KeyError as ex: raise err.InvalidTemplateError(str(ex)) return result
def __init__(self, workflow_spec, identifier=None, base_dir=None, parameters=None): """Initialize the components of the workflow template. A ValueError is raised if the identifier of template parameters are not unique. Parameters ---------- workflow_spec: dict Workflow specification object identifier: string, optional Unique template identifier. If no value is given a UUID will be assigned. base_dir: string, optional Optional path to directory on disk that contains static files that are required to run the represented workflow parameters: list(benchtmpl.workflow.parameter.base.TemplateParameter), optional List of workflow template parameter declarations Raises ------ benchtmpl.error.InvalidTemplateError """ self.workflow_spec = workflow_spec if not identifier is None: self.identifier = identifier else: self.identifier = util.get_unique_identifier() self.base_dir = base_dir # Add given parameter declarations to the parameter index. self.parameters = dict() if not parameters is None: for para in parameters: # Ensure that the identifier of all parameters are unique if para.identifier in self.parameters: raise err.InvalidTemplateError( 'parameter \'{}\' not unique'.format(para.identifier)) self.parameters[para.identifier] = para
def add_template(self, src_dir=None, src_repo_url=None, template_spec_file=None): """Create file and folder structure for a new workflow template. Assumes that either a workflow source directory or the Url of a remote Git repository is given. Creates a new folder with unique name in the base directory of the template store. The created folder will contain a copy of the source folder or the git repository. The source folder is expected to contain the template specification file. If the template_spec file is not given the method will look for a file using the entries in the file name and file suffix lists. If no template file is found in the source folder a ValueError is raised. The contents of the source directory will be copied to the new template directory (as subfolder named 'workflow'). The template directory may also contain other subfolders, e.g., subfolders for individual runs containing downloaded result files. Parameters ---------- src_dir: string, optional Directory containing the workflow components, i.e., the fixed files and the template specification (optional). src_repo_url: string, optional Git repository that contains the the workflow components template_spec_file: string, optional Path to the workflow template specification file (absolute or relative to the workflow directory) Returns ------- benchtmpl.workflow.template.TemplateHandle Raises ------ benchtmpl.error.InvalidParameterError benchtmpl.error.InvalidTemplateError ValueError """ # Exactly one of src_dir and src_repo_url has to be not None. If both # are None (or not None) a ValueError is raised. if src_dir is None and src_repo_url is None: raise ValueError( 'both \'src_dir\' and \'src_repo_url\' are missing') elif not src_dir is None and not src_repo_url is None: raise ValueError( 'cannot have both \'src_dir\' and \'src_repo_url\'') # Create a new unique folder for the template resources identifier = None template_dir = None attempt = 0 while identifier is None or template_dir is None: identifier = self.id_func() template_dir = os.path.join(self.base_dir, identifier) if os.path.isdir(template_dir): identifier = None template_dir = None attempt += 1 if attempt > self.max_attempts: raise RuntimeError('could not create unique directory') # Create the new folder (this should be unique now) os.makedirs(template_dir) try: # Copy either the given workflow directory into the created template # folder or clone the Git repository. static_dir = os.path.join(template_dir, STATIC_FILES_DIR) if not src_dir is None: shutil.copytree(src=src_dir, dst=static_dir) else: git.Repo.clone_from(src_repo_url, static_dir) # Find template specification file in the template workflow folder. # If the file is not found the template directory is removed and a # ValueError is raised. candidates = list() for name in self.filenames: for suffix in self.suffixes: candidates.append(os.path.join(static_dir, name + suffix)) if not template_spec_file is None: candidates = [template_spec_file] + candidates for filename in candidates: if os.path.isfile(filename): # Read template from file. If no error occurs the folder # contains a valid template. template = self.loader.load(filename=filename, identifier=identifier, base_dir=static_dir, validate=True) # Store serialized template handle on disk self.loader.write(template=template, filename=os.path.join( template_dir, TEMPLATE_FILE)) return template except (IOError, OSError, ValueError, err.TemplateError) as ex: # Make sure to cleanup by removing the created template folder shutil.rmtree(template_dir) raise err.InvalidTemplateError(str(ex)) # No template file found. Cleanup and raise error shutil.rmtree(template_dir) raise err.InvalidTemplateError('no template file found')
def upload_files(template, files, arguments, loader): """Upload all references to local files in a given list of file names of parameter references. THe list of files, for example corresponds to the entries in the 'inputs.files' section of a REANA workflow specification. Uses a loader function to allow use of this method in cases where the workflow is executed locally or remote using a REANA cluster instance. Raises errors if (i) an unknown parameter is referenced or (ii) if the type of a referenced parameter in the input files section is not of type file. Parameters ---------- template: benchtmpl.workflow.template.base.TemplateHandle Workflow template containing the parameterized specification and the parameter declarations files: list(string) List of file references arguments: dict(benchtmpl.workflow.parameter.value.TemplateArgument) Dictionary of argument values for parameters in the template loader: func File (up)load function that takes a filepath as the first argument and a (remote) target path as the second argument Raises ------ benchtmpl.error.InvalidTemplateError benchtmpl.error.MissingArgumentError benchtmpl.error.UnknownParameterError """ for val in files: # Set source and target values depending on whether the list # entry references a template parameter or not if tmpl.is_parameter(val): var = tmpl.get_parameter_name(val) # Raise error if the type of the referenced parameter is # not file para = template.get_parameter(var) if not para.is_file(): raise err.InvalidTemplateError('expected file parameter for \'{}\''.format(var)) arg = arguments.get(var) if arg is None: if para.default_value is None: raise err.MissingArgumentError(var) else: # Set argument to file handle using the default value # (assuming that the default points to a file in the # template base directory). if para.has_constant() and not para.as_input(): target_path = para.get_constant() else: target_path = para.default_value arg = TemplateArgument( parameter=para, value=InputFile( f_handle=FileHandle( filepath=os.path.join( template.base_dir, para.default_value ) ), target_path=target_path ) ) # Get path to source file and the target path from the input # file handle source = arg.value.source() target = arg.value.target() else: source = os.path.join(template.base_dir, val) target = val # Upload source file loader(source, target)