def __init__(self, default_ns, namespaces=[]): """ Specify a default namespace and associated sub namespaces """ self.document = prov.ProvDocument() self.default_ns = default_ns self.document.set_default_namespace(self.default_ns) self.namespaces = namespaces self.subspaces = {} for namespace in self.namespaces: self.subspaces[namespace] = self.add_namespace( self.default_ns, namespace)
def compatible_operators(draw, args): doc = prov.ProvDocument() doc.set_default_namespace("https://ccfe.ukaea.uk/") session = MagicMock( prov=doc, agent=doc.agent("session_agent"), session=doc.activity("session_activity"), ) operator = FakeOperator(session) operator.ARGUMENT_TYPES = draw(compatible_datatype_lists(args)) return operator
def instantiate_template(prov_doc, instance_dict): global GLOBAL_UUID_DEF_NS ''' Instantiate a prov template based on a dictionary setting for the prov template variables Supported: entity and attribute var: matching multiple entity expansion Unsupported by now: linked entities multiple attribute expansion To Do: Handle core template expansion rules as described in https://ieeexplore.ieee.org/document/7909036/ and maybe add additional expansion/composition rules for templates useful to compose ENES community workflow templates Args: prov_doc (ProvDocument): input prov document template instance_dict (dict): match dictionary ''' #print("here inst templ") #CHECK FOR NAMESPACE FOR VARGEN UUID for ns in prov_doc.namespaces: if ns.prefix == GLOBAL_UUID_DEF_NS_PREFIX: #print ("found namespace") #uuid namespace defined in template? Use this one GLOBAL_UUID_DEF_NS = ns new_doc = set_namespaces(prov_doc.namespaces, prov.ProvDocument()) new_doc = add_records(prov_doc, new_doc, instance_dict) blist = list(prov_doc.bundles) #print (repr(blist)) #print ("iterating bundles") for bundle in blist: id1 = match(bundle.identifier, instance_dict, True) #print (id1) #print (repr(id1)) #print ("---") new_bundle = new_doc.bundle(id1) #print (repr(new_bundle)) new_bundle = add_records(bundle, new_bundle, instance_dict) return new_doc
def test_serialization_example_7(self): """ Test the serialization of example 7 which is a basic activity. """ document = prov.ProvDocument() document.add_namespace(*EX_NS) document.activity( "ex:a1", "2011-11-16T16:05:00", "2011-11-16T16:06:00", [(prov.PROV_TYPE, prov.Literal("ex:edit", prov.XSD_QNAME)), ("ex:host", "server.example.org")]) with io.BytesIO() as actual: document.serialize(format='xml', destination=actual) compare_xml(os.path.join(DATA_PATH, "example_07.xml"), actual)
def export_provo(trial: Trial, args, extension): document = provo.ProvDocument() document.set_default_namespace(args.defaultns) bundle_def = None bundle_dep = None bundle_exec = None print_msg("Exporting provenance of trial {} in PROV-O format".format(trial.id), force=True) document.collection("trial{}Prov".format(trial.id), [(provo.PROV_LABEL, "provenance collected by noworfklow")]) basic_info.export(trial, document) if args.function_defs: bundle_def = document.bundle("trial{}DefinitionProv".format(trial.id)) if not bundle_def else bundle_def bundle_def.set_default_namespace(args.defaultns) function_defs.export(trial, bundle_def) if args.modules: bundle_dep = document.bundle("trial{}DeploymentProv".format(trial.id)) bundle_dep.set_default_namespace(args.defaultns) module_deps.export(trial, bundle_dep) if args.environment: bundle_dep = document.bundle("trial{}DeploymentProv".format(trial.id)) if not bundle_dep else bundle_dep bundle_dep.set_default_namespace(args.defaultns) environment_attrs.export(trial, bundle_dep) if args.function_activations: bundle_exec = document.bundle("trial{}ExecutionProv".format(trial.id)) bundle_exec.set_default_namespace(args.defaultns) function_activations.export(trial, bundle_exec, args.recursion_depth) if args.file_accesses: bundle_exec = document.bundle("trial{}ExecutionProv".format(trial.id)) if not bundle_exec else bundle_exec bundle_exec.set_default_namespace(args.defaultns) file_accesses.export(trial, bundle_exec) if bundle_def: document.hadMember("trial{}Prov".format(trial.id), bundle_def.identifier) document.wasGeneratedBy(bundle_def.identifier, "trial{}Execution".format(trial.id), None) if bundle_dep: document.hadMember("trial{}Prov".format(trial.id), bundle_dep.identifier) document.wasGeneratedBy(bundle_dep.identifier, "trial{}Execution".format(trial.id), None) if bundle_exec: document.hadMember("trial{}Prov".format(trial.id), bundle_exec.identifier) document.wasGeneratedBy(bundle_exec.identifier, "trial{}Execution".format(trial.id), None) persist_document(document, args.file, args.format, extension, args.hide_elem_attr, args.hide_rel_attr, args.graph_dir)
def deserialize(self, stream, rdf_format='trig', **kwargs): """ Deserialize from the `PROV-O <https://www.w3.org/TR/prov-o/>`_ representation to a :class:`~prov.model.ProvDocument` instance. :param stream: Input data. :param rdf_format: The RDF format of the input data, default: TRiG. """ newargs = kwargs.copy() newargs['format'] = rdf_format container = ConjunctiveGraph() container.parse(stream, **newargs) document = pm.ProvDocument() self.document = document self.decode_document(container, document) return document
def test_serialization_example_6(self): """ Test the serialization of example 6 which is a simple entity description. """ document = prov.ProvDocument() ex_ns = document.add_namespace(*EX_NS) document.add_namespace(*EX_TR) document.entity("tr:WD-prov-dm-20111215", ((prov.PROV_TYPE, ex_ns["Document"]), ("ex:version", "2"))) with io.BytesIO() as actual: document.serialize(format='xml', destination=actual) compare_xml(os.path.join(DATA_PATH, "example_06.xml"), actual)
def test_deserialization_example_6(self): """ Test the deserialization of example 6 which is a simple entity description. """ actual_doc = prov.ProvDocument.deserialize(source=os.path.join( DATA_PATH, "example_06.xml"), format="xml") expected_document = prov.ProvDocument() ex_ns = expected_document.add_namespace(*EX_NS) expected_document.add_namespace(*EX_TR) expected_document.entity("tr:WD-prov-dm-20111215", ((prov.PROV_TYPE, ex_ns["Document"]), ("ex:version", "2"))) self.assertEqual(actual_doc, expected_document)
def test_deserialization_example_7(self): """ Test the deserialization of example 7 which is a simple activity description. """ actual_doc = prov.ProvDocument.deserialize(source=os.path.join( DATA_PATH, "example_07.xml"), format="xml") expected_document = prov.ProvDocument() ex_ns = Namespace(*EX_NS) expected_document.add_namespace(ex_ns) expected_document.activity( "ex:a1", "2011-11-16T16:05:00", "2011-11-16T16:06:00", [(prov.PROV_TYPE, QualifiedName(ex_ns, "edit")), ("ex:host", "server.example.org")]) self.assertEqual(actual_doc, expected_document)
def instantiate_template(prov_doc, instance_dict): ''' Instantiate a prov template based on a dictionary setting for the prov template variables Supported: entity and attribute var: matching multiple entity expansion Unsupported by now: linked entities multiple attribute expansion To Do: Handle core template expansion rules as described in https://ieeexplore.ieee.org/document/7909036/ and maybe add additional expansion/composition rules for templates useful to compose ENES community workflow templates Args: prov_doc (ProvDocument): input prov document template instance_dict (dict): match dictionary ''' #print("here inst templ") new_doc = set_namespaces(prov_doc.namespaces, prov.ProvDocument()) new_doc = add_records(prov_doc, new_doc, instance_dict) blist = list(prov_doc.bundles) #print repr(blist) print "iterating bundles" for bundle in blist: id1 = match(bundle.identifier, instance_dict, True) print id1 print "---" new_bundle = new_doc.bundle(id1) #print repr(new_bundle) new_bundle = add_records(bundle, new_bundle, instance_dict) return new_doc
def export_diff(diff: DiffModel, args, extension): document = provo.ProvDocument() document.set_default_namespace(args.defaultns) print_msg( "Exporting comparison provenance of trials {} and {} in PROV-O format". format(diff.trial1.id, diff.trial2.id), force=True) basic_info.diff(diff, document) if args.modules: module_deps.diff(diff, document) if args.environment: environment_attrs.diff(diff, document) if args.file_accesses: file_accesses.diff(diff, document) persist_document(document, args.file, args.format, extension, args.hide_elem_attr, args.hide_rel_attr, args.graph_dir)
def __init__(self, attributes=None, empty_graph=False, uuid=None): """ Default contructor, creates document and adds Project activity to graph with optional attributes :param attributes: optional dictionary of attributes to add :empty_graph: if set to True, creates empty graph with no namespaces besides Prov defaults :uuid: if uuid is not None then use supplied uuid for project instead of generating one (for reading nidm docs) """ if (empty_graph): self.graph = pm.ProvDocument() else: self.graph = Constants.NIDMDocument( namespaces=Constants.namespaces) if uuid is None: self._uuid = getUUID() #execute default parent class constructor super(Project, self).__init__( self.graph, pm.QualifiedName(pm.Namespace("niiri", Constants.NIIRI), self.get_uuid()), attributes) else: self._uuid = uuid #execute default parent class constructor super(Project, self).__init__( self.graph, pm.QualifiedName(pm.Namespace("niiri", Constants.NIIRI), self.get_uuid()), attributes) #add record to graph self.graph._add_record(self) #create empty sessions list self._sessions = [] #prov toolbox doesn't like 2 attributes with PROV_TYPE in 1 add_attributes call so split them... self.add_attributes({pm.PROV_TYPE: Constants.NIDM_PROJECT})
def gen_prov_file(): """Creates provenance file for the data schema. """ document = prov.ProvDocument() document.set_default_namespace('http://dapsi-example.org/') document.add_namespace('ex', 'http://dapsi-example.org/') e2 = document.entity('e2', ( (prov.PROV_TYPE, "File"), ('ex:path', "/amazon.sql"), ('ex:creator', "Miguel-Angel Sicilia"), ('ex:content', "Amazon user profile schema"), )) a1 = document.activity('a1', datetime.datetime.now(), None, {prov.PROV_TYPE: "edit"}) # References can be qnames or ProvRecord objects themselves document.wasGeneratedBy(e2, a1, None, {'ex:fct': "run_transformer"}) document.wasAssociatedWith('a1', 'ag2', None, None, {prov.PROV_ROLE: "author"}) document.agent('ag2', { prov.PROV_TYPE: 'prov:Person', 'ex:name': "Miguel-Angel Sicilia" }) print(document.get_provn())
def test_deserialization_example_04_and_05(self): """ Example 4 and 5 have a different type specification. They use an xsi:type as an attribute on an entity. This can be read but if written again it will become an XML child element. This is semantically identical but cannot be tested with a round trip. """ # Example 4. xml_string = """ <prov:document xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:ex="http://example.com/ns/ex#" xmlns:tr="http://example.com/ns/tr#"> <prov:entity prov:id="tr:WD-prov-dm-20111215" xsi:type="prov:Plan"> <prov:type xsi:type="xsd:QName">ex:Workflow</prov:type> </prov:entity> </prov:document> """ with io.StringIO() as xml: xml.write(xml_string) xml.seek(0, 0) actual_document = prov.ProvDocument.deserialize(source=xml, format="xml") expected_document = prov.ProvDocument() ex_ns = Namespace(*EX_NS) expected_document.add_namespace(ex_ns) expected_document.add_namespace(*EX_TR) # The xsi:type attribute is mapped to a proper PROV attribute. expected_document.entity( "tr:WD-prov-dm-20111215", ( (prov.PROV_TYPE, QualifiedName(ex_ns, "Workflow")), (prov.PROV_TYPE, PROV["Plan"]), ), ) self.assertEqual(actual_document, expected_document, "example_04") # Example 5. xml_string = """ <prov:document xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:ex="http://example.com/ns/ex#" xmlns:tr="http://example.com/ns/tr#"> <prov:entity prov:id="tr:WD-prov-dm-20111215" xsi:type="prov:Plan"> <prov:type xsi:type="xsd:QName">ex:Workflow</prov:type> <prov:type xsi:type="xsd:QName">prov:Plan</prov:type> <!-- inferred --> <prov:type xsi:type="xsd:QName">prov:Entity</prov:type> <!-- inferred --> </prov:entity> </prov:document> """ with io.StringIO() as xml: xml.write(xml_string) xml.seek(0, 0) actual_document = prov.ProvDocument.deserialize(source=xml, format="xml") expected_document = prov.ProvDocument() expected_document.add_namespace(*EX_NS) expected_document.add_namespace(*EX_TR) # The xsi:type attribute is mapped to a proper PROV attribute. expected_document.entity( "tr:WD-prov-dm-20111215", ( (prov.PROV_TYPE, QualifiedName(ex_ns, "Workflow")), (prov.PROV_TYPE, PROV["Entity"]), (prov.PROV_TYPE, PROV["Plan"]), ), ) self.assertEqual(actual_document, expected_document, "example_05")
def test_assign_dataset_provenance(args, operator, dataset, replace_equilib): doc = prov.ProvDocument() doc.set_default_namespace("https://ccfe.ukaea.uk/") session = MagicMock( prov=doc, agent=doc.agent("session_agent"), session=doc.activity("session_activity"), ) operator._session = session for i, arg in enumerate(args): if "provenance" in arg.attrs: arg.attrs["provenance"] = doc.entity(f"arg{i}") contents_prov = {} equilib = MagicMock(provenance=doc.entity("equilibrium"), _session=session) for key, var in dataset.data_vars.items(): if "provenance" in var.attrs: var.attrs["provenance"] = var.attrs[ "partial_provenance"] = doc.entity(f"variable_{key}") del var.indica.equilibrium if replace_equilib[key]: var.indica.equilibrium = equilib if "provenance" in var.attrs: contents_prov[key] = var.attrs["provenance"] operator.validate_arguments(*args) before = datetime.now() operator.assign_provenance(dataset) after = datetime.now() entity = dataset.attrs["provenance"] assert "Dataset" in entity.get_attribute(prov.PROV_TYPE) generated_candidates = list( filter( lambda x: {entity.identifier} == x.get_attribute("prov:entity"), doc.get_records(prov.ProvGeneration), )) assert len(generated_candidates) == 1 generated = generated_candidates[0] assert {entity.identifier} == generated.get_attribute("prov:entity") activity_id = next(iter(generated.get_attribute("prov:activity"))) end_time = next(iter(generated.get_attribute("prov:time"))) assert before < end_time < after comms = list( filter( lambda x: x.get_attribute("prov:informed") == {activity_id}, doc.get_records(prov.ProvCommunication), )) assert len(comms) == 1 informed = comms[0] assert {session.session.identifier } == informed.get_attribute("prov:informant") expected_agents = [session.agent.identifier, operator.agent.identifier] for a in filter( lambda a: a.get_attribute("prov:activity") == {activity_id}, doc.get_records(prov.ProvAssociation), ): agent_id = next(iter(a.get_attribute("prov:agent"))) assert agent_id in expected_agents expected_agents.remove(agent_id) assert len(expected_agents) == 0 expected_agents = [session.agent.identifier, operator.agent.identifier] for a in filter( lambda x: {entity.identifier} == x.get_attribute("prov:entity"), doc.get_records(prov.ProvAttribution), ): agent_id = next(iter(a.get_attribute("prov:agent"))) assert agent_id in expected_agents expected_agents.remove(agent_id) assert len(expected_agents) == 0 data = [ arg.attrs["provenance"].identifier for arg in args if "provenance" in arg.attrs ] data2 = list(data) for d in filter( lambda d: {entity.identifier} == d.get_attribute( "prov:generatedEntity"), doc.get_records(prov.ProvDerivation), ): used_id = next(iter(d.get_attribute("prov:usedEntity"))) assert used_id in data data.remove(used_id) assert len(data) == 0 data = data2 for u in doc.get_records(prov.ProvUsage): assert {activity_id} == u.get_attribute("prov:activity") entity_id = next(iter(u.get_attribute("prov:entity"))) assert entity_id in data data.remove(entity_id) assert len(data) == 0 for key, var in dataset.data_vars.items(): if key in contents_prov: assert contents_prov[key] == var.attrs["provenance"] else: assert "provenance" in var.attrs contents_prov[key] = var.attrs["provenance"] # Check provenance of dataset is collection of provenance of contents contents = [c.identifier for c in contents_prov.values()] for e in filter( lambda e: {entity.identifier} == e.get_attribute("prov:collection" ), doc.get_records(prov.ProvMembership), ): contents_id = next(iter(e.get_attribute("prov:entity"))) assert contents_id in contents contents.remove(contents_id) assert len(contents) == 0
def __init__(self): self.g = pm.ProvDocument() self.g.add_namespace(foaf) self.g.add_namespace(dcterms) self.g.add_namespace(nipype_ns) self.g.add_namespace(niiri)
#outval='"'+outval+'"^^xsd:datetime' #prepare data for bindings file if ID not in bindfile_dict: bindfile_dict[ID] = dict() bindfile_dict[ID]["value"] = list() bindfile_dict[ID]["type"] = bindmap[col]["type"] if not bindmap[col]["uniqueOnly"] or outval not in bindfile_dict[ ID]["value"]: bindfile_dict[ID]["value"].append(outval) tmpl_NS = prov.Namespace("tmpl", "http://openprovenance.org/tmpl#") bindDoc = prov.ProvDocument() bindDoc.add_namespace("tmpl", "http://openprovenance.org/tmpl#") bindDoc.add_namespace("var", "http://openprovenance.org/var#") #make bindings file, use prov library for this for ID in bindfile_dict: cnt1 = 0 attrs = dict() for a in bindfile_dict[ID]["value"]: #print ID #print a if bindfile_dict[ID]["type"] == "attr": cnt2 = 0 if isinstance(a, list): for b in a: attr = prov.QualifiedName( tmpl_NS, "2dvalue_" + str(cnt1) + "_" + str(cnt2))
class Provenance: document = prov.ProvDocument() document.add_namespace('art', 'http://art.com/') document.add_namespace('auction', 'http://auction.com/') document.add_namespace('transport', 'http://transport.com/') document.add_namespace('gallery', 'http://gallery.com/')
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Mon Nov 20 17:45:33 2017 @author: Mehmet Suzen """ import prov.model as prov document = prov.ProvDocument() # # narciprov ontology # document.add_namespace('narciprov', 'http://www.purl.org/narci/prov#') # # Activities # document.activity('narciprov:CraniotomyProcedure') document.activity('narciprov:TwoPhotonImaging') document.activity('narciprov:VisualStimulation') document.activity('narciprov:RegionClassification') document.activity('narciprov:TimeSeriesExtract') # # Entities # document.entity('narciprov:Subject') document.entity('narciprov:CraniotomyProtocol') document.entity('narciprov:Craniotomy') document.entity('narciprov:AnaesthesiaProtocol')
#print col + " " + bindmap[col]["varname"] + " " + outval bind_dict["var:" + bindmap[col]["varname"]] = outval #outval=row[col] if bindmap[col]["val"] == "literal": outval = '"' + outval + '"' if bindmap[col]["type"] == "attr": outstatement = outstatement + "\ttmpl:2dvalue_0_0 " + str( outval) + " .\n" else: outstatement = outstatement + "\ttmpl:value_0 " + str( outval) + " .\n" rtemplate = rtemplate + outstatement bind_dicts.append(bind_dict) print(rtemplate) provtemplate = prov.ProvDocument() res = provtemplate.deserialize(source="excelProvTemplate.rdf", format="rdf", rdf_format="xml") print(bind_dicts[0]) print(res.serialize(format="rdf")) exp = provconv.instantiate_template(res, bind_dicts[0]) outfile = open("excelProvTemplate_exp.provn", "w") outfile.write(exp.serialize(format="provn")) outfile.close()
def __init__(self, state, filename=None, handle=None, debug=False): if not filename and not handle: sys.exit( "Error: filename or handle must be provided to RearrangementsFile\n" ) return None # set logging level. for now, just True/False to issue warnings. self.debug = debug # define fields self.mandatoryFieldNames = [] self.optionalSpecFieldNames = [] self.additionalFieldNames = [] self._inputFieldNames = [] for f in rearrangements['fields']: if f['mandatory']: self.mandatoryFieldNames.append(f['name']) else: self.optionalSpecFieldNames.append(f['name']) # writing or reading if state: # writing self.writableState = state if filename: self.dataFile = open(filename, 'w') self.metaFile = open(filename + '.meta.json', 'w') else: self.dataFile = handle self.metaFile = open(handle.name + '.meta.json', 'w') self.metadata = model.ProvDocument() self.metadata.set_default_namespace('http://airr-community.org/') self.wroteMetadata = False self.dictWriter = None else: # reading self.writableState = state if filename: self.dataFile = open(filename, 'r') try: self.metaFile = open(filename + '.meta.json', 'r') except IOError: self.metaFile = None else: self.dataFile = handle try: self.metaFile = open(handle.name + '.meta.json', 'r') except IOError: self.metaFile = None self.wroteMetadata = None # read metadata self.metadata = None if self.metaFile: text = self.metaFile.read() self.metaFile.close() self.metadata = model.ProvDocument.deserialize( None, text, 'json') # data reader, collect field names self.dictReader = csv.DictReader(self.dataFile, dialect='excel-tab') self._inputFieldNames = self.dictReader.fieldnames for f in self._inputFieldNames: if f in self.mandatoryFieldNames: continue if f not in self.additionalFieldNames: self.additionalFieldNames.append(f)