def resolve_all(self, document, base_url): if isinstance(document, list): iterator = enumerate(document) elif isinstance(document, dict): inc = 'include' in document if 'id' in document or 'import' in document or 'include' in document: document = self.resolve_ref(document, base_url) if inc: return document for d in self.url_fields: if d in document: if isinstance(document[d], basestring): document[d] = expand_url(document[d], base_url) elif isinstance(document[d], list): document[d] = [expand_url(url, base_url) if isinstance(url, basestring) else url for url in document[d] ] iterator = document.iteritems() else: return document for key, val in iterator: try: document[key] = self.resolve_all(val, base_url) except validate.ValidationException as v: if isinstance(key, basestring): raise validate.ValidationException("Validation error in field %s:\n%s" % (key, validate.indent(str(v)))) else: raise validate.ValidationException("Validation error in position %i:\n%s" % (key, validate.indent(str(v)))) return document
def validate_link(self, field, link): if field in self.nolinkcheck: return True if isinstance(link, basestring): if field in self.vocab_fields: if link not in self.vocab and link not in self.idx and link not in self.rvocab: if not self.check_file(link): raise validate.ValidationException( "Field `%s` contains undefined reference to `%s`" % (field, link)) elif link not in self.idx and link not in self.rvocab: if not self.check_file(link): raise validate.ValidationException( "Field `%s` contains undefined reference to `%s`" % (field, link)) elif isinstance(link, list): errors = [] for i in link: try: self.validate_link(field, i) except validate.ValidationException as v: errors.append(v) if errors: raise validate.ValidationException("\n".join( [str(e) for e in errors])) elif isinstance(link, dict): self.validate_links(link) return True
def validate_links(self, document): if isinstance(document, list): iterator = enumerate(document) elif isinstance(document, dict): for d in self.url_fields: if d in document: if isinstance(document[d], basestring): if document[d] not in self.idx: raise validate.ValidationException("Invalid link `%s` in field `%s`" % (document[d], d)) elif isinstance(document[d], list): for i in document[d]: if isinstance(i, basestring) and i not in self.idx: raise validate.ValidationException("Invalid link `%s` in field `%s`" % (i, d)) iterator = document.iteritems() else: return try: for key, val in iterator: self.validate_links(val) except validate.ValidationException as v: if isinstance(key, basestring): raise validate.ValidationException("At field `%s`\n%s" % (key, validate.indent(str(v)))) else: raise validate.ValidationException("At position %s\n%s" % (key, validate.indent(str(v)))) return
def validate_links(self, document): docid = self.getid(document) if docid is None: docid = "" errors = [] if isinstance(document, list): iterator = enumerate(document) elif isinstance(document, dict): try: for d in self.url_fields: if d not in self.identity_links and d in document: self.validate_link(d, document[d]) except validate.ValidationException as v: errors.append(v) iterator = document.iteritems() else: return for key, val in iterator: try: self.validate_links(val) except validate.ValidationException as v: if key not in self.nolinkcheck: docid = self.getid(val) if docid: errors.append( validate.ValidationException( "While checking object `%s`\n%s" % (docid, validate.indent(str(v))))) else: if isinstance(key, basestring): errors.append( validate.ValidationException( "While checking field `%s`\n%s" % (key, validate.indent(str(v))))) else: errors.append( validate.ValidationException( "While checking position %s\n%s" % (key, validate.indent(str(v))))) if errors: if len(errors) > 1: raise validate.ValidationException("\n".join( [str(e) for e in errors])) else: raise errors[0] return
def validate_requirements(self, tool, field): for r in tool.get(field, []): try: if self.names.get_name(r["class"], "") is None: raise validate.ValidationException("Unknown requirement %s" % (r["class"])) validate.validate_ex(self.names.get_name(r["class"], ""), r) if "requirements" in r: self.validate_requirements(r, "requirements") if "hints" in r: self.validate_requirements(r, "hints") except validate.ValidationException as v: err = "While validating %s %s\n%s" % (field, r["class"], validate.indent(str(v))) if field == "hints": _logger.warn(err) else: raise validate.ValidationException(err)
def validate_doc(schema_names, validate_doc, loader, strict): has_root = False for r in schema_names.names.values(): if r.get_prop("documentRoot"): has_root = True break if not has_root: raise validate.ValidationException( "No document roots defined in the schema") if isinstance(validate_doc, list): pass elif isinstance(validate_doc, dict): validate_doc = [validate_doc] else: raise validate.ValidationException("Document must be dict or list") anyerrors = [] for pos, item in enumerate(validate_doc): errors = [] success = False for r in schema_names.names.values(): if r.get_prop("documentRoot"): try: validate.validate_ex( r, item, loader.identifiers, strict, foreign_properties=loader.foreign_properties) success = True break except validate.ValidationException as e: errors.append("Could not validate as `%s` because\n%s" % (r.get_prop("name"), validate.indent(str(e), nolead=False))) if not success: objerr = "Validation error at position %i" % pos for ident in loader.identifiers: if ident in item: objerr = "Validation error in object %s" % (item[ident]) break anyerrors.append("%s\n%s" % (objerr, validate.indent("\n".join(errors)))) if anyerrors: raise validate.ValidationException("\n".join(anyerrors))
def resolve_ref(self, ref, base_url=None): base_url = base_url or 'file://%s/' % os.path.abspath('.') obj = None # If `ref` is a dict, look for special directives. if isinstance(ref, dict): obj = ref if "import" in ref: if len(obj) == 1: ref = obj["import"] obj = None else: raise ValueError("'import' must be the only field in %s" % (str(obj))) elif "include" in obj: if len(obj) == 1: ref = obj["include"] else: raise ValueError("'include' must be the only field in %s" % (str(obj))) else: if "id" in obj: ref = obj["id"] else: raise ValueError("Object `%s` does not have `id` field" % obj) if not isinstance(ref, basestring): raise ValueError("Must be string: `%s`" % str(ref)) url = expand_url(ref, base_url) # Has this reference been loaded already? if url in self.idx: return self.idx[url] # "include" directive means load raw text if obj and "include" in obj: return self.fetch_text(url) if obj: obj["id"] = url self.idx[url] = obj else: # Load structured document doc_url, frg = urlparse.urldefrag(url) if doc_url in self.idx: raise validate.ValidationException("Reference `#%s` not found in file `%s`." % (frg, doc_url)) obj = self.fetch(doc_url) # Recursively expand urls and resolve directives self.resolve_all(obj, url) # Requested reference should be in the index now, otherwise it's a bad reference if self.idx.get(url) is not None: return self.idx[url] else: raise RuntimeError("Reference `%s` is not in the index. Index contains:\n %s" % (url, "\n ".join(self.idx)))
def add_context(self, newcontext, baseuri=""): if self.vocab: raise validate.ValidationException( "Refreshing context that already has stuff in it") self.url_fields = set() self.vocab_fields = set() self.identifiers = set() self.identity_links = set() self.standalone = set() self.nolinkcheck = set() self.vocab = {} self.rvocab = {} self.ctx.update( {k: v for k, v in newcontext.iteritems() if k != "@context"}) _logger.debug("ctx is %s", self.ctx) for c in self.ctx: if self.ctx[c] == "@id": self.identifiers.add(c) self.identity_links.add(c) elif isinstance(self.ctx[c], dict) and self.ctx[c].get("@type") == "@id": self.url_fields.add(c) if self.ctx[c].get("identity", False): self.identity_links.add(c) elif isinstance(self.ctx[c], dict) and self.ctx[c].get("@type") == "@vocab": self.url_fields.add(c) self.vocab_fields.add(c) if isinstance(self.ctx[c], dict) and self.ctx[c].get("noLinkCheck"): self.nolinkcheck.add(c) if isinstance(self.ctx[c], dict) and "@id" in self.ctx[c]: self.vocab[c] = self.ctx[c]["@id"] elif isinstance(self.ctx[c], basestring): self.vocab[c] = self.ctx[c] for k, v in self.vocab.items(): self.rvocab[self.expand_url(v, "", scoped=False)] = k _logger.debug("identifiers is %s", self.identifiers) _logger.debug("identity_links is %s", self.identity_links) _logger.debug("url_fields is %s", self.url_fields) _logger.debug("vocab_fields is %s", self.vocab_fields) _logger.debug("vocab is %s", self.vocab)
def fetch(self, url): if url in self.idx: return self.idx[url] try: result = yaml.load(self.fetch_text(url)) except yaml.parser.ParserError as e: raise validate.ValidationException("Error loading '%s' %s" % (url, str(e))) if isinstance(result, dict): if "id" not in result: result["id"] = url self.idx[expand_url(result["id"], url)] = result else: self.idx[url] = result return result
def fetch(self, url): if url in self.idx: return self.idx[url] try: text = StringIO.StringIO(self.fetch_text(url)) text.name = url result = yaml.load(text) except yaml.parser.ParserError as e: raise validate.ValidationException("Syntax error %s" % (e)) if isinstance(result, dict) and self.identifiers: for identifier in self.identifiers: if identifier not in result: result[identifier] = url self.idx[self.expand_url(result[identifier], url)] = result else: self.idx[url] = result return result
def expand_url(self, url, base_url, scoped=False, vocab_term=False): if url in ("@id", "@type"): return url if vocab_term and url in self.vocab: return url if self.vocab and ":" in url: prefix = url.split(":")[0] if prefix in self.vocab: url = self.vocab[prefix] + url[len(prefix) + 1:] split = urlparse.urlsplit(url) if url == "cwl:hints": _logger.debug("XXX (%s) %s", id(self), self.vocab.keys()) raise validate.ValidationException("F*****G KIDDING ME") if split.scheme or url.startswith("$(") or url.startswith("${"): pass elif scoped and not split.fragment: splitbase = urlparse.urlsplit(base_url) frg = "" if splitbase.fragment: frg = splitbase.fragment + "/" + split.path else: frg = split.path url = urlparse.urlunsplit((splitbase.scheme, splitbase.netloc, splitbase.path, splitbase.query, frg)) else: url = urlparse.urljoin(base_url, url) if vocab_term and url in self.rvocab: return self.rvocab[url] else: return url
def extend_and_specialize(items, loader): """Apply 'extend' and 'specialize' to fully materialize derived record types.""" types = {t["name"]: t for t in items} n = [] for t in items: t = copy.deepcopy(t) if "extends" in t: if "specialize" in t: spec = { sp["specializeFrom"]: sp["specializeTo"] for sp in aslist(t["specialize"]) } else: spec = {} exfields = [] exsym = [] for ex in aslist(t["extends"]): if ex not in types: raise Exception( "Extends %s in %s refers to invalid base type" % (t["extends"], t["name"])) basetype = copy.deepcopy(types[ex]) if t["type"] == "record": if spec: basetype["fields"] = replace_type( basetype.get("fields", []), spec, loader, set()) for f in basetype.get("fields", []): if "inherited_from" not in f: f["inherited_from"] = ex exfields.extend(basetype.get("fields", [])) elif t["type"] == "enum": exsym.extend(basetype.get("symbols", [])) if t["type"] == "record": exfields.extend(t.get("fields", [])) t["fields"] = exfields fieldnames = set() for field in t["fields"]: if field["name"] in fieldnames: raise validate.ValidationException( "Field name %s appears twice in %s" % (field["name"], t["name"])) else: fieldnames.add(field["name"]) for y in [x for x in t["fields"] if x["name"] == "class"]: y["type"] = { "type": "enum", "symbols": [r["name"]], "name": r["name"] + "_class", } y["doc"] = "Must be `%s` to indicate this is a %s object." % ( r["name"], r["name"]) elif t["type"] == "enum": exsym.extend(t.get("symbols", [])) t["symbol"] = exsym types[t["name"]] = t n.append(t) ex_types = {t["name"]: t for t in n} extended_by = {} for t in n: if "extends" in t: for ex in aslist(t["extends"]): if ex_types[ex].get("abstract"): add_dictlist(extended_by, ex, ex_types[t["name"]]) add_dictlist(extended_by, avro_name(ex), ex_types[ex]) for t in n: if "fields" in t: t["fields"] = replace_type(t["fields"], extended_by, loader, set()) return n
def resolve_all(self, document, base_url, file_base=None): loader = self metadata = {} if file_base is None: file_base = base_url if isinstance(document, dict): # Handle $import and $include if ('$import' in document or '$include' in document): return self.resolve_ref(document, file_base) elif isinstance(document, list): pass else: return document, metadata newctx = None if isinstance(document, dict): # Handle $base, $profile, $namespaces, $schemas and $graph if "$base" in document: base_url = document["$base"] if "$profile" in document: if not newctx: newctx = SubLoader(self) prof = self.fetch(document["$profile"]) newctx.add_namespaces(document.get("$namespaces", {}), document["$profile"]) newctx.add_schemas(document.get("$schemas", []), document["$profile"]) if "$namespaces" in document: if not newctx: newctx = SubLoader(self) newctx.add_namespaces(document["$namespaces"]) if "$schemas" in document: if not newctx: newctx = SubLoader(self) newctx.add_schemas(document["$schemas"], file_base) if newctx: loader = newctx if "$graph" in document: metadata = {k: v for k, v in document.items() if k != "$graph"} document = document["$graph"] metadata, _ = loader.resolve_all(metadata, base_url, file_base) if isinstance(document, dict): for identifer in loader.identity_links: if identifer in document: if isinstance(document[identifer], basestring): document[identifer] = loader.expand_url( document[identifer], base_url, scoped=True) if document[identifer] not in loader.idx or isinstance( loader.idx[document[identifer]], basestring): loader.idx[document[identifer]] = document base_url = document[identifer] elif isinstance(document[identifer], list): for n, v in enumerate(document[identifer]): document[identifer][n] = loader.expand_url( document[identifer][n], base_url, scoped=True) if document[identifer][n] not in loader.idx: loader.idx[document[identifer] [n]] = document[identifer][n] for d in document: d2 = loader.expand_url(d, "", scoped=False, vocab_term=True) if d != d2: document[d2] = document[d] del document[d] for d in loader.url_fields: if d in document: if isinstance(document[d], basestring): document[d] = loader.expand_url( document[d], base_url, scoped=False, vocab_term=(d in loader.vocab_fields)) elif isinstance(document[d], list): document[d] = [ loader.expand_url( url, base_url, scoped=False, vocab_term=(d in loader.vocab_fields)) if isinstance(url, basestring) else url for url in document[d] ] try: for key, val in document.items(): document[key], _ = loader.resolve_all( val, base_url, file_base) except validate.ValidationException as v: _logger.debug("loader is %s", id(loader)) raise validate.ValidationException( "(%s) (%s) Validation error in field %s:\n%s" % (id(loader), file_base, key, validate.indent(str(v)))) elif isinstance(document, list): i = 0 try: while i < len(document): val = document[i] if isinstance(val, dict) and "$import" in val: l, _ = loader.resolve_ref(val, file_base) if isinstance(l, list): del document[i] for item in aslist(l): document.insert(i, item) i += 1 else: document[i] = l i += 1 else: document[i], _ = loader.resolve_all( val, base_url, file_base) i += 1 except validate.ValidationException as v: raise validate.ValidationException( "(%s) (%s) Validation error in position %i:\n%s" % (id(loader), file_base, i, validate.indent(str(v)))) for identifer in loader.identity_links: if identifer in metadata: if isinstance(metadata[identifer], basestring): metadata[identifer] = loader.expand_url( metadata[identifer], base_url, scoped=True) loader.idx[metadata[identifer]] = document return document, metadata
def resolve_ref(self, ref, base_url=None): base_url = base_url or 'file://%s/' % os.path.abspath('.') obj = None inc = False merge = None # If `ref` is a dict, look for special directives. if isinstance(ref, dict): obj = ref if "$import" in ref: if len(obj) == 1: ref = obj["$import"] obj = None else: raise ValueError("'$import' must be the only field in %s" % (str(obj))) elif "$include" in obj: if len(obj) == 1: ref = obj["$include"] inc = True obj = None else: raise ValueError( "'$include' must be the only field in %s" % (str(obj))) else: ref = None for identifier in self.identifiers: if identifier in obj: ref = obj[identifier] break if not ref: raise ValueError( "Object `%s` does not have identifier field in %s" % (obj, self.identifiers)) if not isinstance(ref, basestring): raise ValueError("Must be string: `%s`" % str(ref)) url = self.expand_url(ref, base_url, scoped=(obj is not None)) # Has this reference been loaded already? if url in self.idx: if merge: obj = self.idx[url].copy() else: return self.idx[url], {} # "$include" directive means load raw text if inc: return self.fetch_text(url), {} if obj: for identifier in self.identifiers: obj[identifier] = url doc_url = url else: # Load structured document doc_url, frg = urlparse.urldefrag(url) if doc_url in self.idx: raise validate.ValidationException( "Reference `#%s` not found in file `%s`." % (frg, doc_url)) obj = self.fetch(doc_url) # Recursively expand urls and resolve directives obj, metadata = self.resolve_all(obj, doc_url) # Requested reference should be in the index now, otherwise it's a bad reference if url is not None: if url in self.idx: obj = self.idx[url] else: raise RuntimeError( "Reference `%s` is not in the index. Index contains:\n %s" % (url, "\n ".join(self.idx))) if "$graph" in obj: metadata = {k: v for k, v in obj.items() if k != "$graph"} obj = obj["$graph"] return obj, metadata else: return obj, metadata