def _validate_member(self, member, classes, instanceof): """return error if `member` is not a member of any class in `classes`""" log.info("Validating member %s" % member) # TODO - recalculating this list for every member is inefficient # calculate it once at the beginning, or consider replacing # attributes_by_class with it somehow stripped_attribute_names = [] for cl in classes: sublist = [] for attr in self.schema_def.attributes_by_class[cl]: sublist.append(attr) for field in sublist: sublist[sublist.index(field)] = self._field_name_from_uri(field) stripped_attribute_names.append(sublist) if self._field_name_from_uri(member) in sum(stripped_attribute_names, []): if member in sum([self.schema_def.attributes_by_class[cl] for cl in classes], []): log.info("success") return None elif self._namespace_from_uri(member) in self.allowed_namespaces: log.info("warning - unofficially allowed namespace") err = _error("Unoficially allowed namespace {0}", self._namespace_from_uri(member), doc_lines=self.doc_lines) return ValidationWarning(ValidationResult.WARNING, err['err'], err['line'], err['num']) else: log.info("failure") err = _error("{0} - invalid member of {1}", self._field_name_from_uri(member), self._field_name_from_uri(instanceof), doc_lines=self.doc_lines) return ValidationWarning(ValidationResult.ERROR, err['err'], err['line'], err['num'])
def check_key(self, key, doc_lines): if key not in self.stdref: return _error("{0} - invalid parsely-page field", key, doc_lines=doc_lines) if key in ["link", "image_url"]: if not self.url_validator(self.parsely_page[key]): return _error("{0} - invalid url for field '{1}'", self.parsely_page[key], key, doc_lines=doc_lines) return None
def check_key(self, key): if key not in self.stdref: err = _error("{0} - invalid parsely-page field", key, doc_lines=self.doc_lines) return ValidationWarning(ValidationResult.ERROR, err['err'], err['line'], err['num']) if key in ["link", "image_url"]: if not self.url_validator(self.data[key]): err = _error("{0} - invalid url for field '{1}'", self.data[key], key, doc_lines=self.doc_lines) return ValidationWarning(ValidationResult.ERROR, err['err'], err['line'], err['num']) return None
def _validate_member(self, member, classes, instanceof): """return error if `member` is not a member of any class in `classes`""" log.info("Validating member %s" % member) if member not in sum([self.schema_def.attributes_by_class[cl] for cl in classes], []): return _error("{0} - invalid member of {1}", self._field_name_from_uri(member), self._field_name_from_uri(instanceof), doc_lines=self.doc_lines)
def validate(self, text, doc_lines): errors = [] ret = {'ontology': [], 'ont_name': [], 'errors': []} try: self.parsely_page = self._get_parselypage(doc_lines) except IndexError, ValueError: return [_error("Failed to parse parsely-page content", doc_lines=doc_lines)]
def _validate_class(self, cl): if cl not in self.schema_def.attributes_by_class.keys(): search_string = str(cl) err = _error("{0} - invalid class", self._field_name_from_uri(cl), search_string=search_string, doc_lines=self.doc_lines) return ValidationWarning(ValidationResult.ERROR, err['err'], err['line'], err['num'])
def validate_class(self, cl): """return error if class `cl` is not found in the ontology""" if cl not in self.attribs_by_class.keys(): search_string = str(cl) # this is fishy if self.impl != 'microdata': search_string = self.field_name(cl) return _error("{0} - invalid class", self.field_name(cl), search_string=search_string, doc_lines=self.doc_lines) return
def _validate_class(self, cl): if cl not in self.schema_def.attributes_by_class.keys(): search_string = str(cl) err = _error( "{0} - invalid class", self._field_name_from_uri(cl), search_string=search_string, doc_lines=self.doc_lines, ) return ValidationWarning(ValidationResult.ERROR, err["err"], err["line"], err["num"])
def validate_member(self, member, classes, instanceof): """return error if `member` is not a member of any class in `classes`""" valid = False for ns in self.ns_ont.keys(): name = "%s%s" % (ns, self.field_name(member)) if rt.URIRef(name) in sum([self.attribs_by_class[cl] for cl in classes], []): valid = True if not valid: return _error("{0} - invalid member of {1}", self.field_name(member), self.field_name(instanceof), doc_lines=self.doc_lines) return
def _validate_member(self, member, classes, instanceof): """return error if `member` is not a member of any class in `classes`""" log.info("Validating member %s" % member) # TODO - recalculating this list for every member is inefficient # calculate it once at the beginning, or consider replacing # attributes_by_class with it somehow stripped_attribute_names = [] for cl in classes: sublist = [] for attr in self.schema_def.attributes_by_class[cl]: sublist.append(attr) for field in sublist: sublist[sublist.index(field)] = self._field_name_from_uri( field) stripped_attribute_names.append(sublist) if self._field_name_from_uri(member) in sum(stripped_attribute_names, []): if member in sum( [self.schema_def.attributes_by_class[cl] for cl in classes], []): log.info("success") return None elif self._namespace_from_uri(member) in self.allowed_namespaces: log.info("warning - unofficially allowed namespace") err = _error("Unoficially allowed namespace {0}", self._namespace_from_uri(member), doc_lines=self.doc_lines) return ValidationWarning(ValidationResult.WARNING, err['err'], err['line'], err['num']) else: log.info("failure") err = _error("{0} - invalid member of {1}", self._field_name_from_uri(member), self._field_name_from_uri(instanceof), doc_lines=self.doc_lines) return ValidationWarning(ValidationResult.ERROR, err['err'], err['line'], err['num'])
"""return error if `member` is not a member of any class in `classes`""" valid = False for ns in self.ns_ont.keys(): name = "%s%s" % (ns, self.field_name(member)) if rt.URIRef(name) in sum([self.attribs_by_class[cl] for cl in classes], []): valid = True if not valid: return _error("{0} - invalid member of {1}", self.field_name(member), self.field_name(instanceof), doc_lines=self.doc_lines) return def validate_duplication(self, (subj, pred), cl): """returns error if we've already seen the member `pred` on `subj`""" if (subj,pred) in self.attributes: return _error("{0} - duplicated member of {1}", self.field_name(pred), self.field_name(cl), doc_lines=self.doc_lines) ################################################################# # HELPERS / UTILITIES # ################################################################# def get_filetype(self, source): """returns the function that can be used to parse the given document into an rdflib.Graph""" name, ext = os.path.splitext(source) if ext in ['.ttl']: # these should each belong to their respective validator def _parse_func(s): return rdflib.Graph().parse(s, format='n3') else: def _parse_func(s): return pyRdfa().graph_from_source(s)
err = _error("Unoficially allowed namespace {0}", self._namespace_from_uri(member), doc_lines=self.doc_lines) return ValidationWarning(ValidationResult.WARNING, err['err'], err['line'], err['num']) else: log.info("failure") err = _error("{0} - invalid member of {1}", self._field_name_from_uri(member), self._field_name_from_uri(instanceof), doc_lines=self.doc_lines) return ValidationWarning(ValidationResult.ERROR, err['err'], err['line'], err['num']) def _validate_duplication(self, (subj, pred), cl): """returns error if we've already seen the member `pred` on `subj`""" log.info("Validating duplication of member %s" % pred) if (subj, pred) in self.checked_attributes: log.info("failure") err = _error("{0} - duplicated member of {1}", self._field_name_from_uri(pred), self._field_name_from_uri(cl), doc_lines=self.doc_lines) return ValidationWarning(ValidationResult.WARNING, err['err'], err['line'], err['num']) log.info("success") def _superclasses_for_subject(self, graph, typeof): """helper, returns a list of all superclasses of a given class""" # TODO - this might be replacing a fairly simple graph API query where # it doesn't need to classes = [] superclass = typeof while True: found = False for (p, o) in self.schema_def.ontology[superclass]: if self.schema_def.lexicon['subclass'] == str(p): found = True classes.append(o)
def error(self, lineno, errstring): _error(lineno, errstring)
else: log.info("failure") err = _error("{0} - invalid member of {1}", self._field_name_from_uri(member), self._field_name_from_uri(instanceof), doc_lines=self.doc_lines) return ValidationWarning(ValidationResult.ERROR, err['err'], err['line'], err['num']) def _validate_duplication(self, (subj, pred), cl): """returns error if we've already seen the member `pred` on `subj`""" log.info("Validating duplication of member %s" % pred) if (subj, pred) in self.checked_attributes: log.info("failure") err = _error("{0} - duplicated member of {1}", self._field_name_from_uri(pred), self._field_name_from_uri(cl), doc_lines=self.doc_lines) return ValidationWarning(ValidationResult.WARNING, err['err'], err['line'], err['num']) log.info("success") def _superclasses_for_subject(self, graph, typeof): """helper, returns a list of all superclasses of a given class""" # TODO - this might be replacing a fairly simple graph API query where # it doesn't need to classes = [] superclass = typeof while True: found = False for (p, o) in self.schema_def.ontology[superclass]: if self.schema_def.lexicon['subclass'] == str(p):
def _validate_class(self, cl): if cl not in self.schema_def.attributes_by_class.keys(): search_string = str(cl) return _error("{0} - invalid class", self._field_name_from_uri(cl), search_string=search_string, doc_lines=self.doc_lines)