def triples(self): self.iri = rdflib.URIRef(f'https://apinatomy.org/uris/models/{self.id}') yield self.iri, rdf.type, readable.Graph yield self.iri, readable.name, rdflib.Literal(self.name) yield self.iri, readable.abbreviation, rdflib.Literal(self.abbreviation) externals = [] for id, blob in self.resources.items(): if 'class' not in blob: logd.warning(f'no class in\n{blob!r} for {id}') continue elif blob['class'] == 'Graph': continue obj = getattr(self, blob['class'])(blob, self.context, self.label_suffix) if blob['class'] == 'External': # defer lookup externals.append(obj) continue yield from obj.triples() Async()(deferred(lambda x: x._term)(e) for e in externals) for e in externals: yield from e.triples()
def mkval(cell): hl = cell.hyperlink if hl is not None: return AutoId(hl) else: logd.warning(f'unhandled value {cell.value}') return cell.value
def protocol_url_or_doi(self, value): #_, s = self.c.protocol_url_or_doi(value) #yield s, rdf.type, owl.NamedIndividual #yield s, rdf.type, sparc.Protocol log.debug(value) if not isinstance(value, idlib.Pio): if isinstance(value, idlib.Doi): try: t = None for t in value.triples_gen: yield t except idlib.exc.RemoteError as e: if t is None: # we already logged this error during id dereferencing return ds, _, _ = t try: pioid = value.dereference(asType=idlib.Pio) s = self.c.l(pioid) yield ds, TEMP.dereferencesTo, s yield s, TEMP.hasDoi, ds except idlib.exc.MalformedIdentifierError as e: log.warning(e) return else: try: pioid = idlib.Pio( value ) # FIXME :/ should be handled in Pio directly probably? except idlib.exc.MalformedIdentifierError as e: logd.warning(e) return else: pioid = value try: pioid_int = pioid.uri_api_int s = self.c.l(pioid_int) yield from pioid_int.triples_gen # FIXME needs to be a pipeline so that we can export errors try: data = pioid.data() except (OntId.BadCurieError, idlib.exc.MalformedIdentifierError) as e: loge.error(e) # FIXME export errors ... data = None except idlib.exc.RemoteError as e: # FIXME sandbox violation loge.exception(e) s = self.c.l(pioid) data = None yield s, rdf.type, sparc.Protocol if data: yield s, rdfs.label, rdflib.Literal(pioid.label) nsteps = len(data['steps']) yield s, TEMP.protocolHasNumberOfSteps, rdflib.Literal(nsteps)
def mkval(cell): hl = cell.hyperlink if hl is not None: oid = OntId(hl) if oid.prefix == 'TEMP': logd.warning(f'{cell.value} -> {oid!r}') #return OntTerm(curie=f'lex:{quote(cell.value)}') #else: return oid.asTerm() else: logd.warning(f'unhandled technique {cell.value}') return cell.value
def triples(self): self.iri = rdflib.URIRef( f'https://apinatomy.org/uris/models/{self.id}') yield self.iri, rdf.type, readable.Graph for id, blob in self.resources.items(): if 'class' not in blob: logd.warning(f'no class in\n{blob!r}') continue elif blob['class'] == 'Graph': log.warning('Graph is in resources itself') continue yield from getattr(self, blob['class'])(blob, self.context).triples()
def mkval(cell): hl = cell.hyperlink cv = cell.value if hl is None: hl = cv if cv else None if hl is not None: try: return idlib.Pio(hl) except idlib.exc.IdlibError as e: try: return idlib.Doi(hl) except idlib.exc.IdlibError as e: pass logd.warning(f'unhandled value {cell.value}') return cv
def validate_path_json_metadata(cls, path_meta_blob): from sparcur.core import HasErrors # FIXME he = HasErrors(pipeline_stage=cls.__name__ + '.validate_path_json_metadata') mimetypes, suffixes = cls._file_type_status_lookup( ) # SIGH this overhead is 2 function calls and a branch for i, path_meta in enumerate(path_meta_blob['data']): if path_meta['basename'] in cls._banned_basenames: msg = f'illegal file detect {path_meta["basename"]}' dsrp = path_meta['dataset_relative_path'] if he.addError(msg, path=dsrp, json_path=('data', i)): logd.error(msg) status = 'banned' path_meta['status'] = status continue if 'magic_mimetype' in path_meta and 'mimetype' in path_meta: # FIXME NOT clear whether magic_mimetype should be used by itself # usually magic and file extension together work, magic by itself # can give some completely bonkers results source = 'magic_mimetype' mimetype = path_meta['magic_mimetype'] muggle_mimetype = path_meta['mimetype'] if mimetype != muggle_mimetype: msg = f'mime types do not match {mimetype} != {muggle_mimetype}' dsrp = path_meta['dataset_relative_path'] if he.addError(msg, path=dsrp, json_path=('data', i)): log.error(msg) elif 'magic_mimetype' in path_meta: source = 'magic_mimetype' mimetype = path_meta['magic_mimetype'] elif 'mimetype' in path_meta: source = 'mimetype' mimetype = path_meta['mimetype'] else: mimetype = None if mimetype is not None: try: status = mimetypes[mimetype] if status == 'banned': msg = f'banned mimetype detected {mimetype}' dsrp = path_meta['dataset_relative_path'] if he.addError(msg, path=dsrp, json_path=('data', i, source)): logd.error(msg) except KeyError as e: status = 'known' if mimetype not in cls._unclassified_mimes: cls._unclassified_mimes.add(mimetype) log.info(f'unclassified mimetype {mimetype}') else: status = 'unknown' dsrp = path_meta['dataset_relative_path'] if isinstance(dsrp, str): if not dsrp: msg = f'FIXME top level folder needs a mimetype!' else: msg = f'unknown mimetype {path_meta["basename"]}' else: msg = f'unknown mimetype {"".join(dsrp.suffixes)}' cls._unknown_suffixes.add(tuple(dsrp.suffixes)) if he.addError(msg, path=dsrp, json_path=('data', i)): logd.warning(msg) path_meta['status'] = status if he._errors_set: he.embedErrors(path_meta_blob)