def format(self, value): return utils.isoformat(value)
def timestamp(ts): return Literal(isoformat(ts), datatype=XSD.dateTime)
def describe_dataset(data, created_at): cursor = database.get_db().cursor() contributors = cursor.execute(''' SELECT DISTINCT created_by, updated_by FROM patch_request WHERE merged = 1 AND id > 1''').fetchall() with open(os.path.join(os.path.dirname(__file__), 'void-stub.ttl')) as f: description_g = Graph().parse(file=f, format='turtle') ns = Namespace(description_g.value( predicate=RDF.type, object=VOID.DatasetDescription)) dataset_g = Graph().parse(data=json.dumps(data), format='json-ld') partitions = description_g.objects( subject=ns.d, predicate=VOID.classPartition) for part in partitions: clazz = description_g.value(subject=part, predicate=VOID['class']) entity_count = len(dataset_g.query(''' SELECT DISTINCT ?s WHERE { ?s a <%s> . FILTER (STRSTARTS(STR(?s), "%s")) }''' % (clazz, ns))) description_g.add( (part, VOID.entities, Literal(entity_count, datatype=XSD.integer))) linksets = description_g.subjects(predicate=RDF.type, object=VOID.Linkset) for linkset in linksets: target = description_g.value( subject=linkset, predicate=VOID.objectsTarget) predicate = description_g.value( subject=linkset, predicate=VOID.linkPredicate) uriSpace = description_g.value( subject=target, predicate=VOID.uriSpace).value triples = len(dataset_g.query(''' SELECT ?s ?p ?o WHERE { ?s <%s> ?o . FILTER (STRSTARTS(STR(?o), "%s")) . }''' % (predicate, uriSpace))) description_g.add( (linkset, VOID.triples, Literal(triples, datatype=XSD.integer))) def add_to_description(p, o): description_g.add((ns.d, p, o)) add_to_description( DCTERMS.modified, Literal(utils.isoformat(created_at), datatype=XSD.dateTime)) add_to_description( DCTERMS.provenance, URIRef(utils.absolute_url(data['@context']['@base'], 'history') + '#changes') ) add_to_description( VOID.triples, Literal(len(dataset_g), datatype=XSD.integer)) for row in contributors: add_to_description( DCTERMS.contributor, URIRef(row['created_by'])) if row['updated_by']: add_to_description( DCTERMS.contributor, URIRef(row['updated_by'])) return description_g.serialize(format='turtle')
def history(): g = Graph() changelog = Collection(g, URIRef("#changelog")) cursor = database.get_db().cursor() for row in cursor.execute( """ SELECT id, created_at, created_by, updated_by, merged_at, merged_by, applied_to, resulted_in, created_entities, updated_entities, removed_entities FROM patch_request WHERE merged = 1 ORDER BY id ASC """ ).fetchall(): change = URIRef("#change-{}".format(row["id"])) patch = URIRef("#patch-{}".format(row["id"])) g.add((patch, FOAF.page, PERIODO[identifier.prefix(url_for("patch", id=row["id"]))])) g.add((change, PROV.startedAtTime, Literal(utils.isoformat(row["created_at"]), datatype=XSD.dateTime))) g.add((change, PROV.endedAtTime, Literal(utils.isoformat(row["merged_at"]), datatype=XSD.dateTime))) dataset = PERIODO[identifier.prefix(url_for("abstract_dataset"))] version_in = PERIODO[identifier.prefix(url_for("abstract_dataset", version=row["applied_to"]))] g.add((version_in, PROV.specializationOf, dataset)) version_out = PERIODO[identifier.prefix(url_for("abstract_dataset", version=row["resulted_in"]))] g.add((version_out, PROV.specializationOf, dataset)) g.add((change, PROV.used, version_in)) g.add((change, PROV.used, patch)) g.add((change, PROV.generated, version_out)) def add_entity_version(entity_id): entity = PERIODO[entity_id] entity_version = PERIODO[entity_id + "?version={}".format(row["resulted_in"])] g.add((entity_version, PROV.specializationOf, entity)) g.add((change, PROV.generated, entity_version)) return entity_version for entity_id in json.loads(row["created_entities"]): add_entity_version(entity_id) for entity_id in json.loads(row["updated_entities"]): entity_version = add_entity_version(entity_id) prev_entity_version = PERIODO[entity_id + "?version={}".format(row["applied_to"])] g.add((entity_version, PROV.wasRevisionOf, prev_entity_version)) for entity_id in json.loads(row["removed_entities"]): g.add((change, PROV.invalidated, PERIODO[entity_id])) for field, term in (("created_by", "submitted"), ("updated_by", "updated"), ("merged_by", "merged")): if row[field] == "initial-data-loader": continue agent = URIRef(row[field]) association = URIRef("#patch-{}-{}".format(row["id"], term)) g.add((change, PROV.wasAssociatedWith, agent)) g.add((change, PROV.qualifiedAssociation, association)) g.add((association, PROV.agent, agent)) g.add((association, PROV.hadRole, PERIODO[identifier.prefix(url_for("vocab") + "#" + term)])) changelog.append(change) def ordering(o): if o["@id"] == "#changelog": # sort first return " " return o["@id"] jsonld = json.loads(g.serialize(format="json-ld", context=CONTEXT).decode("utf-8")) jsonld["history"] = sorted(jsonld["history"], key=ordering) return json.dumps(jsonld, sort_keys=True)