def graph(self): if not hasattr(self, '_graph'): self._graph = populateFromJsonLd(OntGraph(), self.asJsonLd()) OntCuries.populate(self._graph) self.populateHeader(self._graph) return self._graph
def export_protcur(self, dump_path, *hypothesis_groups, no_network=False): # FIXME no_network passed in here is dumb #if (self.latest and # FIXME NOTE this only points to the latest integrated release #self.latest_protcur_path.exists()): #blob_protcur = self.latest_protocols #else: pipeline = pipes.ProtcurPipeline(*hypothesis_groups, no_network=no_network) # FIXME NOTE this does not do the identifier expansion pass protcur = pipeline.data context = { **sc.base_context, **sc.protcur_context, } for f in ('meta', 'subjects', 'samples', 'contributors'): context.pop(f) # FIXME HACK meta @graph for datasets ontology_header = { # FIXME should probably not be added here since it is obscure ... '@id': 'https://cassava.ucsd.edu/sparc/ontologies/protcur.ttl', '@type': 'owl:Ontology', } protcur.append(ontology_header) blob_protcur = { # FIXME this should not be defined here so confusing that it is not with the pipeline ... '@context': context, 'meta': { 'count': len(protcur) }, # FIXME adjust to structure 'prov': { 'timestamp_export_start': self.timestamp, 'export_system_identifier': Path.sysid, 'export_hostname': gethostname(), }, '@graph': protcur, # FIXME regularize elements ? } dump_path.mkdir(parents=True, exist_ok=True) # FIXME TODO make these latest paths accessible # probably by splitting protcur export out into # its own class latest_path = dump_path.parent / 'LATEST' latest_partial_path = dump_path.parent / 'LATEST_PARTIAL' fn = dump_path / 'protcur.json' with open(fn, 'wt') as f: json.dump(blob_protcur, f, sort_keys=True, indent=2, cls=JEncode) symlink_latest(dump_path, latest_partial_path) g = populateFromJsonLd(OntGraph(), fn).write(fn.with_suffix('.ttl')) symlink_latest(dump_path, latest_path) return blob_protcur
def export_other_formats(self, dump_path, filepath_json, blob_ir, blob_export_json, *rest): summary, previous_latest, previous_latest_datasets = rest dataset_blobs = blob_ir['datasets'] # jsonld blob_export_jsonld = self.export_jsonld(filepath_json, blob_export_json) # identifier metadata blob_id_met = self.export_identifier_metadata(dump_path, previous_latest, dataset_blobs) teim = self.export_identifier_rdf(dump_path, blob_id_met) # rdf teds = self.export_rdf(dump_path, previous_latest_datasets, dataset_blobs) tes = ex.TriplesExportSummary(blob_ir, teds=teds + [teim]) # protcur # FIXME running after because rdf export side effects anno sync blob_protcur = self.export_protcur( dump_path, 'sparc-curation') # FIXME # handle orthogonally blob_protcur_path = dump_path / 'protcur.json' # FIXME SIGH populateFromJsonLd(tes.graph, blob_protcur_path) # this makes me so happy with open(filepath_json.with_suffix('.ttl'), 'wb') as f: f.write(tes.ttl) # protocol # handled orthogonally ?? #blob_protocol = self.export_protocols(dump_path, dataset_blobs, blob_protcur) # xml self.export_xml(filepath_json, dataset_blobs) # disco self.export_disco(filepath_json, dataset_blobs, teds)
def main(): dandi_terms_path = aug.LocalPath.cwd() g = OntGraph() _ = [ populateFromJsonLd(g, path_yaml(p)) for p in dandi_terms_path.rglob('*.yaml') ] g.write('dandi-raw.ttl') remove = [(s, p, o) for p in (schema.domainIncludes, schema.rangeIncludes, rdfs.subClassOf, rdf.type) for s, o in g[:p:]] add = [(s, p, (g.namespace_manager.expand(o.toPython()) if isinstance( o, rdflib.Literal) else o)) for s, p, o in remove] _ = [g.remove(t) for t in remove] _ = [g.add(t) for t in add] # TODO ontology metadata header section g.write('dandi.ttl')
def export_protcur( self, dump_path, *hypothesis_groups, rerun_protcur_export=False, # FIXME no_network passed in here is dumb no_network=False, # FIXME direct= is a hack direct=False): if not direct and self.export_base != self.export_protcur_base: # workaround to set the correct export base path nargs = {**self._args} nargs['export_base'] = self.export_protcur_base export = ExportProtcur(**nargs) return export.export_protcur(export.dump_path, *hypothesis_groups, no_network=no_network), export pipeline = pipes.ProtcurPipeline(*hypothesis_groups, no_network=no_network) annos = pipeline.load() if not annos: msg = ('No annos. Did you remember to run\n' 'python -m sparcur.simple.fetch_annotations') raise ValueError(msg) if self.latest_export_path.exists(): # FIXME this only points to the latest integrated release # which is not what we want, we need the latest protcur to be independent #self.latest and blob_protcur = self.latest_export t_lex = blob_protcur['prov']['timestamp_export_start'] t_lup = max(a.updated for a in annos).replace('+00:00', 'Z') new_annos_here = t_lex < t_lup # <= is pretty much impossible if not (new_annos_here or rerun_protcur_export): return blob_protcur # FIXME NOTE this does not do the identifier expansion pass protcur = pipeline._make_blob(annos=annos) context = { **sc.base_context, **sc.protcur_context, } for f in ('meta', 'subjects', 'samples', 'contributors'): # subjects samples and contributors no longer included in context directly if f in context: context.pop(f) # FIXME HACK meta @graph for datasets ontology_header = { # FIXME should probably not be added here since it is obscure ... '@id': 'https://cassava.ucsd.edu/sparc/ontologies/protcur.ttl', '@type': 'owl:Ontology', } protcur.append(ontology_header) blob_protcur = { # FIXME this should not be defined here so confusing that it is not with the pipeline ... '@context': context, 'meta': { 'count': len(protcur) }, # FIXME adjust to structure 'prov': { 'timestamp_export_start': self.timestamp, 'export_system_identifier': Path.sysid, 'export_hostname': gethostname(), }, '@graph': protcur, # FIXME regularize elements ? } dump_path.mkdir(parents=True, exist_ok=True) # FIXME TODO make these latest paths accessible # probably by splitting protcur export out into # its own class latest_path = dump_path.parent / 'LATEST' latest_partial_path = dump_path.parent / 'LATEST_PARTIAL' fn = dump_path / 'protcur.json' with open(fn, 'wt') as f: json.dump(blob_protcur, f, sort_keys=True, indent=2, cls=JEncode) symlink_latest(dump_path, latest_partial_path) g = populateFromJsonLd(OntGraph(), fn).write(fn.with_suffix('.ttl')) symlink_latest(dump_path, latest_path) return blob_protcur