def validate(file: TextIO): """ Checks the orca file against a schema. which schema is determined by the 'apiVersion' defined in the orca configuration, if no configuration :param file: :return: """ data = file.read() log.debug("Raw yaml: {0}".format(data)) orca_data = yaml.load(data, yaml.Loader) try: version = orca_data['apiVersion'] except KeyError: raise ConfigurationError( "'apiVersion is missing. An API version must be specified on an orca document," + " the latest current version is {0}'".format(LATEST_SCHEMA_VERSION) ) schema_file = os.path.join(_get_schema_location(), "ORCA_SCHEMA_{0}.json".format(version)) try: # check if the schema file exists. with open(schema_file) as fp: schema_data = json.load(fp) validator = Draft7Validator(schema_data) errors = list(validator.iter_errors(orca_data)) _handle_errors(errors, _handle_generic_error, file.name) return data except FileNotFoundError: raise ConfigurationError( "'The apiVersion {0}, is an invalid apiVersion version. It did not match one of the" .format(version) + " supported apiVersions: {0}'".format(AVAILABLE_SCHEMA_VERSIONS))
def _check_symtable(self, name: str, task: Dict): if name is None or not name.isidentifier(): raise ConfigurationError('Invalid task name: "{0}"'.format(name)) task_id = id(task) # check against the task dict id to support loops. if self.symtable.get(name, task_id) != task_id: raise ConfigurationError("Duplicate task name: {0}".format(name)) self.symtable[name] = task_id
def __set_vars(self, variables: Dict, args: List[str]) -> None: """put all variables as globals""" log.debug("setting job variables:") for key, val in variables.items(): if not key.isidentifier(): raise ConfigurationError( 'Invalid variable identifier: "{0}"'.format(key)) try: exec("var.{0}={1}".format(key, val), globals()) log.debug(" set var.{0} = {1} -> {2}".format( key, str(val), str(eval("var." + key)))) except Exception as e: raise ConfigurationError( "Cannot set variable: {0}".format(key), e)
def __process_config(file: TextIO) -> Dict: try: # first pass: start by validating the yaml file against the schema version. data = validate(file) # processing single quote string literals: " ' ' repl = r"^(?P<key>\s*[^#:]*):\s+(?P<value>['].*['])\s*$" fixed_data = re.sub(repl, '\g<key>: "\g<value>"', data, flags=re.MULTILINE) log.debug("Processed yaml: {0}".format(fixed_data)) # second pass: appropriately quote strings in the yaml file. config = yaml.load(fixed_data, Loader=yaml.Loader) if log.isEnabledFor(logging.DEBUG): # to avoid always dump json log.debug("Loaded yaml: {0}".format( json.dumps(config, indent=2))) return config except yaml.YAMLError as e: log.error(e) raise ConfigurationError("error loading yaml file.", e) except ConfigurationError as e: # lets capture it log it and reraise it. log.error(e) raise e
def handle_python(self, _task: OrcaTask): try: self._resolve_file_path(_task.python, ".py") except ConfigurationError as e: raise ConfigurationError( 'Task {0} defines a python script that cannot be found: {1}'.format(_task.name, _task.python), e )
def handle_http(self, _task: OrcaTask) -> Dict: url = _task.http name = _task.name inputs = _task.locals headers = _task.config.get('header') content_type = headers.get('content-type', 'text/plain') if 'method' not in _task.config: raise ConfigurationError( "requests service operator must include method: service {0}". format(name)) if _task.config.get('method') == 'GET': return handle_service_result( json.loads( requests.get(url, params=_task.config.get('params', None)).content), _task.outputs, name) elif _task.config['method'] == 'POST': if isinstance(inputs, DottedCollection): return handle_service_result( json.loads(requests.post(url, inputs.to_python()).content), _task.outputs, name) else: return handle_service_result( json.loads(requests.post(url, inputs).content), _task.outputs, name)
def handle_bash(self, _task: OrcaTask): try: self._resolve_file_path(_task.bash, ".sh") except ConfigurationError as e: raise ConfigurationError( 'Task {0} defines a bash script that cannot be found: {1}'.format(_task.name, _task.bash), e )
def _resolve_file_path(self, name: str, ext: str) -> str: """ resolve the full qualified path name""" # potential value resolution if this should be a file name try: name = eval(str(name), globals()) except Exception as e: # ok, never mind log.debug(e) if os.path.isfile(name): return name else: # maybe change this, because of testing if hasattr(self, 'config'): yaml_dir = self.config.get_yaml_dir() else: yaml_dir = "." rel_path = os.path.join(yaml_dir, name) if os.path.isfile(rel_path): # path relative to yaml file return rel_path else: if name.endswith(ext): # this should be a file but it's not. raise ConfigurationError( 'File not found: "{0}"'.format(name)) return None
def __resolve_dependencies(self): for dep in self.deps: try: exec("import " + dep, globals()) log.debug("importing dependency: '{0}'".format(dep)) except Exception as e: raise ConfigurationError( "Cannot not resolve the '{0}' dependency".format(dep), e)
def select_handler(self, task_dict: Dict): if 'csip' in task_dict: return self.handle_csip elif 'http' in task_dict: return self.handle_http elif 'bash' in task_dict: return self.handle_bash elif 'python' in task_dict: return self.handle_python else: raise ConfigurationError( 'Invalid task type: "{0}"'.format(task_dict))
def _handle_for(self, condition_block: Dict) -> None: """Handle 'for'""" var_expr = condition_block['for'] i = var_expr.find(",") if i == -1: raise ConfigurationError( 'Invalid "for" expression: "{0}"'.format(var_expr)) var = var_expr[:i] if not var.isidentifier(): raise ConfigurationError( 'Not a valid identifier: "{0}"'.format(var)) expr = var_expr[i + 1:] for i in eval(expr, globals()): # mapping loop variable 'i' to 'var' q = '' if isinstance(i, str): q = "'" exec("{0}={2}{1}{2}".format(var, i, q), globals()) self._handle_sequence(condition_block['do'])
def _handle_errors(errors: List[ValidationError], fmt_err_func, filename): """ Handles the tree of errors provided by jsonschema and formats them. :param errors: :param fmt_err_func: :param filename: :return: exits if no errors exist """ errors = list(sorted(errors, key=str)) if not errors: return error_msg = '\n'.join(fmt_err_func(error) for error in errors) raise ConfigurationError( "The Orca configuration: {file} is invalid because:\n {error_msg}". format(file=" ' {}'".format(filename) if filename else "", error_msg=error_msg))
def resolve_file_path(handler: OrcaHandler, _name: str) -> str: """ resolve the full qualified path name""" if os.path.isfile(_name): return _name # otherwise find the relative dir elif hasattr(handler, 'config'): yaml_dir = handler.config.get_yaml_dir() else: yaml_dir = "." rel_path = os.path.join(yaml_dir, _name) if os.path.isfile(rel_path): # path relative to yaml file return rel_path else: # this should be a file but it's not. raise ConfigurationError( 'File not found: "{0}"'.format(_name))
def _handle_sequence(self, sequence: Dict) -> None: for step in sequence: node = next(iter(step)) if node == "task": log.debug(" ---- task: '{}'".format(step['task'])) self._handle_task(step) elif node == "if": log.debug(" ---- if: '{}'".format(step['if'])) self._handle_if(step) elif node == "for": log.debug(" ---- for: '{}'".format(step['for'])) self._handle_for(step) elif node.startswith("fork"): log.debug(" ---- fork: ") self._handle_fork(step['fork']) elif node == "switch": log.debug(" ---- switch: '{}'".format(step['switch'])) self._handle_switch(step) else: raise ConfigurationError( 'Invalid step in job: "{0}"'.format(node))
def handle_http(self, _task: OrcaTask): r = requests.head(_task.http) if r.status_code >= 400: raise ConfigurationError( 'Task {0} defines a Http url that is not accessible: "{1}"'.format(_task.name, _task.http) )
def handle_csip(self, _task: OrcaTask): r = requests.head(_task.csip) if r.status_code >= 400: raise ConfigurationError( 'Task {0} defines a CSIP endpoint that is not accessible: "{1}"'.format(_task.name, _task.csip) )