def __init__(self, project,uuid=None,attributes=None,add_default_type=True): """ Default contructor, creates a session activity and links to project object :param project: a project object :return: none """ if uuid is None: self._uuid = getUUID() #execute default parent class constructor super(Session,self).__init__(project.graph, pm.QualifiedName(pm.Namespace("niiri",Constants.NIIRI),self.get_uuid()),attributes) else: self._uuid = uuid #execute default parent class constructor super(Session,self).__init__(project.graph, pm.QualifiedName(pm.Namespace("niiri",Constants.NIIRI),self.get_uuid()),attributes) project.graph._add_record(self) if add_default_type: self.add_attributes({pm.PROV_TYPE: Constants.NIDM_SESSION}) self.graph = project.graph project.add_sessions(self) #list of acquisitions associated with this session self._acquisitions=[]
def __init__(self, acquisition, attributes=None, uuid=None): """ Default contructor, creates an acquisition object and links to acquisition activity object :param acquisition: a Aquisition activity object :param attributes: optional attributes to add to entity :param uuid: optional uuid...used mostly for reading in existing NIDM document :return: none """ if uuid is None: #execute default parent class constructor super(AcquisitionObject, self).__init__( acquisition.graph, pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM), getUUID()), attributes) else: super(AcquisitionObject, self).__init__( acquisition.graph, pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM), uuid), attributes) acquisition.graph._add_record(self) #carry graph object around self.graph = acquisition.graph #create link to acquisition activity acquisition.add_acquisition_object(self)
def __init__(self, session, attributes=None, uuid=None): """ Default contructor, creates a session activity and links to project object :param session: a session object :param uuid: optional uuid...used mostly for reading in existing NIDM document :param attributes: optional dictionary of attributes to add qname:value """ if uuid is None: #execute default parent class constructor super(Acquisition,self).__init__(session.graph, pm.QualifiedName(pm.Namespace("niiri",Constants.NIIRI),getUUID()),attributes) else: super(Acquisition,self).__init__(session.graph, pm.QualifiedName(pm.Namespace("niiri",Constants.NIIRI),uuid),attributes) session.graph._add_record(self) self.add_attributes({pm.PROV_TYPE: Constants.NIDM_ACQUISITION_ACTIVITY}) #self.add_attributes({pm.QualifiedName(pm.Namespace("dct",Constants.DCT),'isPartOf'):self}) #list to store acquisition objects associated with this activity self._acquisition_objects=[] #if constructor is called with a session object then add this acquisition to the session #carry graph object around self.graph = session.graph #add acquisition to session session.add_acquisition(self)
def __init__(self, attributes=None, empty_graph=False, uuid=None): """ Default contructor, creates document and adds Project activity to graph with optional attributes :param attributes: optional dictionary of attributes to add :empty_graph: if set to True, creates empty graph with no namespaces besides Prov defaults :uuid: if uuid is not None then use supplied uuid for project instead of generating one (for reading nidm docs) """ if (empty_graph): self.graph = pm.ProvDocument() else: self.graph = Constants.NIDMDocument( namespaces=Constants.namespaces) if uuid is None: #execute default parent class constructor super(Project, self).__init__( self.graph, pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM), getUUID()), attributes) else: #execute default parent class constructor super(Project, self).__init__( self.graph, pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM), uuid), attributes) #add record to graph self.graph._add_record(self) #create empty sessions list self._sessions = [] #prov toolbox doesn't like 2 attributes with PROV_TYPE in 1 add_attributes call so split them... self.add_attributes({pm.PROV_TYPE: Constants.NIDM_PROJECT})
def instantiate_template(prov_doc,instance_dict): global GLOBAL_UUID_DEF_NS ''' Instantiate a prov template based on a dictionary setting for the prov template variables Supported: entity and attribute var: matching multiple entity expansion Unsupported by now: linked entities multiple attribute expansion To Do: Handle core template expansion rules as described in https://ieeexplore.ieee.org/document/7909036/ and maybe add additional expansion/composition rules for templates useful to compose ENES community workflow templates Args: prov_doc (ProvDocument): input prov document template instance_dict (dict): match dictionary ''' #print("here inst templ") #instance dict override: replace tmpl:startTime and tmpl:endTime with prov:startTime and prov:endTime instance_dict["tmpl:startTime"]=prov.QualifiedName(prov.Namespace("prov", "http://www.w3.org/ns/prov#"),"startTime") instance_dict["tmpl:endTime"]=prov.QualifiedName(prov.Namespace("prov", "http://www.w3.org/ns/prov#"),"endTime") instance_dict["tmpl:time"]=prov.QualifiedName(prov.Namespace("prov", "http://www.w3.org/ns/prov#"), "time") #print repr(instance_dict) #CHECK FOR NAMESPACE FOR VARGEN UUID for ns in prov_doc.namespaces: if ns.prefix==GLOBAL_UUID_DEF_NS_PREFIX: #print ("found namespace") #uuid namespace defined in template? Use this one GLOBAL_UUID_DEF_NS=ns new_doc = set_namespaces(prov_doc.namespaces,prov.ProvDocument()) new_doc = add_records(prov_doc,new_doc,instance_dict) blist = list(prov_doc.bundles) #print (repr(blist)) #print ("iterating bundles") for bundle in blist: id1=match(bundle.identifier, instance_dict, True) #print (id1) #print (repr(id1)) #print ("---") new_bundle = new_doc.bundle(id1) #print (repr(new_bundle)) new_bundle = add_records(bundle, new_bundle,instance_dict) return new_doc
def add_metadata_for_subject(rdf_graph, subject_uri, namespaces, nidm_obj): """ Cycles through triples for a particular subject and adds them to the nidm_obj :param rdf_graph: RDF graph object :param subject_uri: URI of subject to query for additional metadata :param namespaces: Namespaces in NIDM document :param nidm_obj: NIDM object to add metadata :return: None """ #Cycle through remaining metadata and add attributes for predicate, objects in rdf_graph.predicate_objects(subject=subject_uri): #if find qualified association if predicate == URIRef(Constants.PROV['qualifiedAssociation']): #need to get associated prov:Agent uri, add person information to graph for agent in rdf_graph.objects( subject=subject_uri, predicate=Constants.PROV['wasAssociatedWith']): #add person to graph and also add all metadata person = nidm_obj.add_person(uuid=agent) #now add metadata for person add_metadata_for_subject(rdf_graph=rdf_graph, subject_uri=agent, namespaces=namespaces, nidm_obj=person) #get role information for bnode in rdf_graph.objects( subject=subject_uri, predicate=Constants.PROV['qualifiedAssociation']): #for bnode, query for object which is role? How? #term.BNode.__dict__() #create temporary resource for this bnode r = Resource(rdf_graph, bnode) #get the object for this bnode with predicate Constants.PROV['hadRole'] for r_obj in r.objects(predicate=Constants.PROV['hadRole']): #create qualified names for objects obj_nm, obj_term = split_uri(r_obj._identifier) for uris in namespaces: if uris.uri == URIRef(obj_nm): #create qualified association in graph nidm_obj.add_qualified_association( person=person, role=pm.QualifiedName(uris, obj_term)) else: if validators.url(objects): #create qualified names for objects obj_nm, obj_term = split_uri(objects) for uris in namespaces: if uris.uri == URIRef(obj_nm): #prefix = uris.prefix nidm_obj.add_attributes( {predicate: pm.QualifiedName(uris, obj_term)}) else: nidm_obj.add_attributes( {predicate: get_RDFliteral_type(objects)})
def __init__(self, parentDoc=None, attributes=None): """ Default contructor, creates document and adds Project activity to graph with optional attributes :param parentDoc: optional ProvDocument :param attributes: optional dictionary of attributes to add """ #set graph document if (parentDoc): self.graph = parentDoc else: self.graph = Constants.p_graph #execute default parent class constructor super(Project, self).__init__( self.graph, pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM), getUUID()), attributes) self.graph._add_record(self) #create empty sessions list self._sessions = [] #prov toolbox doesn't like 2 attributes with PROV_TYPE in 1 add_attributes call so split them... self.add_attributes({pm.PROV_TYPE: Constants.NIDM_PROJECT}) self.add_attributes({pm.PROV_TYPE: Constants.NIDM_PROJECT_TYPE})
def decode_rdf_representation(self, literal, graph): if isinstance(literal, RDFLiteral): value = literal.value if literal.value is not None else literal datatype = literal.datatype if hasattr(literal, 'datatype') else None langtag = literal.language if hasattr(literal, 'language') else None if datatype and 'XMLLiteral' in datatype: value = literal if datatype and 'base64Binary' in datatype: value = base64.standard_b64encode(value) if datatype == XSD['QName']: return pm.Literal(literal, datatype=XSD_QNAME) if datatype == XSD['dateTime']: return dateutil.parser.parse(literal) else: # The literal of standard Python types is not converted here # It will be automatically converted when added to a record by _auto_literal_conversion() return Literal(value, self.valid_identifier(datatype), langtag) elif isinstance(literal, URIRef): rval = self.valid_identifier(literal) if rval is None: prefix, iri, _ = graph.namespace_manager.compute_qname(literal) ns = self.document.add_namespace(prefix, iri) rval = pm.QualifiedName(ns, literal.replace(ns.uri, '')) return rval else: # simple type, just return it return literal
def add_acquisition(self, acquisition): self._acquisitions.extend([acquisition]) #create links in graph acquisition.add_attributes({ pm.QualifiedName(pm.Namespace("dct", Constants.DCT), 'isPartOf'): self })
def __init__(self, project, attributes=None, uuid=None): """ Default contructor, creates a derivative activity :param uuid: optional uuid...used mostly for reading in existing NIDM document :param attributes: optional dictionary of attributes to add qname:value """ if uuid is None: self._uuid = getUUID() #execute default parent class constructor super(Derivative,self).__init__(project.graph, pm.QualifiedName(pm.Namespace("niiri",Constants.NIIRI),self.get_uuid()),attributes) else: self._uuid = uuid super(Derivative,self).__init__(project.graph, pm.Identifier(uuid),attributes) project.graph._add_record(self) #list to store acquisition objects associated with this activity self._derivative_objects=[] #if constructor is called with a session object then add this acquisition to the session #carry graph object around self.graph = project.graph project.add_derivatives(self)
def __init__(self, project, attributes=None, uuid=None, add_default_type=True): """ Default contructor, creates an acquisition object and links to acquisition activity object :param project: NIDM project to add data element entity to.\ :param attributes: optional attributes to add to entity :param uuid: optional uuid...used mostly for reading in existing NIDM document :return: none """ if uuid is None: #execute default parent class constructor super(DataElement,self).__init__(project.graph, pm.QualifiedName(pm.Namespace("niiri",Constants.NIIRI),getUUID()),attributes) else: super(DataElement,self).__init__(project.graph,pm.Identifier(uuid),attributes) project.graph._add_record(self) if add_default_type: self.add_attributes({pm.PROV_TYPE: Constants.NIDM_DATAELEMENT}) project.add_dataelements(self) self.graph = project.graph #list to store acquisition objects associated with this activity self._derivative_objects=[]
def __init__(self, attributes=None, empty_graph=False, uuid=None, add_default_type=True): """ Default contructor, creates document and adds Project activity to graph with optional attributes :param attributes: optional dictionary of attributes to add :empty_graph: if set to True, creates empty graph with no namespaces besides Prov defaults :uuid: if uuid is not None then use supplied uuid for project instead of generating one (for reading nidm docs) """ if (empty_graph): self.graph = Constants.NIDMDocument(namespaces=None) else: self.graph = Constants.NIDMDocument( namespaces=Constants.namespaces) if uuid is None: self._uuid = getUUID() #execute default parent class constructor super(Project, self).__init__( self.graph, pm.QualifiedName(pm.Namespace("niiri", Constants.NIIRI), self.get_uuid()), attributes) else: self._uuid = uuid #execute default parent class constructor super(Project, self).__init__( self.graph, pm.QualifiedName(pm.Namespace("niiri", Constants.NIIRI), self.get_uuid()), attributes) #add record to graph self.graph._add_record(self) #create empty sessions list self._sessions = [] #create empty derivatives list self._derivatives = [] # create empty data elements list self._dataelements = [] if add_default_type: self.add_attributes({pm.PROV_TYPE: Constants.NIDM_PROJECT})
def match(eid, mdict, node, numEntries=1): ''' helper function to match strings based on dictionary Args: eid (string): input string mdict (dict): match dictionary Returns: meid: same as input or matching value for eid key in mdict ''' adr = eid if isinstance(adr, prov.QualifiedName): lp = adr.localpart ns = adr.namespace.prefix adr = ns + ":" + lp #override: vargen found in entity declaration position: create a uuid #print "match " + repr(adr) + " with " + str(adr) + " red " + str(adr)[:7] #not optimal, need ability to provide custom namespace # FIX NAMESPACE FOR UUID!!!!!!!! if node and "vargen:" in str(adr) and str(adr)[:7] == "vargen:": ret = None for e in range(0, numEntries): uid = str(uuid.uuid4()) if adr not in mdict: ret = prov.QualifiedName(GLOBAL_UUID_NS, uid) mdict[adr] = ret else: if not isinstance(mdict[adr], list): tmp = list() tmp.append(mdict[adr]) mdict[adr] = tmp tmp2 = list() tmp2.append(ret) ret = tmp2 qn = prov.QualifiedName(GLOBAL_UUID_NS, uid) mdict[adr].append(qn) ret.append(qn) return ret if adr in mdict: #print("Match: ",adr) madr = mdict[adr] else: #print("No Match: ",adr) madr = eid return madr
def test_GetProjectInstruments(): kwargs = { Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII", Constants.NIDM_PROJECT_IDENTIFIER: 9610, Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation" } proj_uuid = "_123456gpi" project = Project(uuid=proj_uuid, attributes=kwargs) session = Session(project) acq = AssessmentAcquisition(session) kwargs = { pm.PROV_TYPE: pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM), "NorthAmericanAdultReadingTest") } acq_obj = AssessmentObject(acq, attributes=kwargs) acq2 = AssessmentAcquisition(session) kwargs = { pm.PROV_TYPE: pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM), "PositiveAndNegativeSyndromeScale") } acq_obj2 = AssessmentObject(acq2, attributes=kwargs) #save a turtle file with open("test_gpi.ttl", 'w') as f: f.write(project.serializeTurtle()) assessment_list = Query.GetProjectInstruments(["test_gpi.ttl"], proj_uuid) remove("test_gpi.ttl") assert Constants.NIDM + "NorthAmericanAdultReadingTest" in [ str(x) for x in assessment_list['assessment_type'].to_list() ] assert Constants.NIDM + "PositiveAndNegativeSyndromeScale" in [ str(x) for x in assessment_list['assessment_type'].to_list() ]
def add_sessions(self,session): """ Adds session to project, creating links and adding reference to sessions list :param session: object of type "Session" from nidm API :return true if session object added to project, false if session object is already in project """ if session in self._sessions: return False else: #add session to self.sessions list self._sessions.extend([session]) #create links in graph #session.add_attributes({str("dct:isPartOf"):self}) session.add_attributes({pm.QualifiedName(pm.Namespace("dct",Constants.DCT),'isPartOf'):self}) return True
def setEntry(rec, regNS): """ interpret value provided via v3 bindings, check if qualified name or value, handle datatypes accordingly Args: rec : a key value pair read from v3 bindings file regNS: the namespaces read from the context section of the v3 bindings file Returns: "prov-ified" value, value as-is as fallback #keys: @id (for quali) # @type (for value) # @value (for value) """ out=rec try: if "@id" in rec: toks=rec["@id"].split(":") #print (repr(toks)) if len(toks) > 2: raise BindingFileException( "Invalid Qualified Name " + rec["@id"] + " found in V3 Json Binding " + repr(rec)) #print( "Invalid Qualified Name " + rec["@id"] + " found in V3 Json Binding" ) #for ns in regNS.get_registered_namespaces(): for ns in regNS: #print (ns) if ns.prefix==toks[0]: #print ("HIT") out=prov.QualifiedName(ns, toks[1]) if "@value" in rec: if "@type" in rec: dt=rec["@type"] if isinstance(rec["@type"], basestring): dt=xsd_datype_to_prov_datatype(dt, regNS) out=prov.Literal(rec["@value"], datatype=dt) else: out=rec["@value"] except: raise BindingFileException("Error parsing " + repr(rec)) #pass return out
def add_derivatives(self, derivative): """ Adds derivatives to project, creating links and adding reference to derivatives list :param derivative: object of type "Derivative" from nidm API :return true if derivative object added to project, false if derivative object is already in project """ if derivative in self._derivatives: return False else: # add session to self.sessions list self._derivatives.extend([derivative]) # create links in graph # session.add_attributes({str("dct:isPartOf"):self}) derivative.add_attributes({ pm.QualifiedName(pm.Namespace("dct", Constants.DCT), 'isPartOf'): self }) return True
def __init__(self, project, attributes=None): """ Default contructor, creates a session activity and links to project object :param project: a project object :return: none """ #execute default parent class constructor super(Session, self).__init__( project.graph, pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM), getUUID()), attributes) project.graph._add_record(self) self.add_attributes({pm.PROV_TYPE: Constants.NIDM_SESSION}) self.graph = project.graph #list of acquisitions associated with this session self._acquisitions = []
def decode_rdf_representation(self, literal, graph): if isinstance(literal, RDFLiteral): value = literal.value if literal.value is not None else literal datatype = literal.datatype if hasattr(literal, "datatype") else None langtag = literal.language if hasattr(literal, "language") else None if datatype and "XMLLiteral" in datatype: value = literal if datatype and "base64Binary" in datatype: value = base64.standard_b64encode(value) if datatype == XSD["QName"]: return pm.Literal(literal, datatype=XSD_QNAME) if datatype == XSD["dateTime"]: return dateutil.parser.parse(literal) if datatype == XSD["gYear"]: return pm.Literal( dateutil.parser.parse(literal).year, datatype=self.valid_identifier(datatype), ) if datatype == XSD["gYearMonth"]: parsed_info = dateutil.parser.parse(literal) return pm.Literal( "{0}-{1:02d}".format(parsed_info.year, parsed_info.month), datatype=self.valid_identifier(datatype), ) else: # The literal of standard Python types is not converted here # It will be automatically converted when added to a record by # _auto_literal_conversion() return pm.Literal(value, self.valid_identifier(datatype), langtag) elif isinstance(literal, URIRef): rval = self.valid_identifier(literal) if rval is None: prefix, iri, _ = graph.namespace_manager.compute_qname(literal) ns = self.document.add_namespace(prefix, iri) rval = pm.QualifiedName(ns, literal.replace(ns.uri, "")) return rval else: # simple type, just return it return literal
def __init__(self, acquisition, attributes=None): """ Default contructor, creates an acquisition object and links to acquisition activity object :param acquisition: a Aquisition activity object :param attributes: optional attributes to add to entity :return: none """ #execute default parent class constructor #execute default parent class constructor super(AcquisitionObject, self).__init__( acquisition.graph, pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM), getUUID()), attributes) acquisition.graph._add_record(self) #self.add_attributes({PROV_TYPE: Constants.NIDM_ACQUISITION_ENTITY}) #carry graph object around self.graph = acquisition.graph #create link to acquisition activity acquisition.add_acquisition_object(self)
def __init__(self, derivative,attributes=None, uuid=None): """ Default contructor, creates an derivative object and links to derivative activity object :param derivative: a Derivative activity object :param attributes: optional attributes to add to entity :param uuid: optional uuid...used mostly for reading in existing NIDM document :return: none """ if uuid is None: #execute default parent class constructor super(DerivativeObject,self).__init__(derivative.graph, pm.QualifiedName(pm.Namespace("niiri",Constants.NIIRI),getUUID()),attributes) else: super(DerivativeObject,self).__init__(derivative.graph, pm.Identifier(uuid),attributes) derivative.graph._add_record(self) #carry graph object around self.graph = derivative.graph #create link to acquisition activity derivative.add_derivative_object(self)
def __init__(self, session, attributes=None): """ Default contructor, creates a session activity and links to project object :param session: a session object """ #execute default parent class constructor super(Acquisition, self).__init__( session.graph, pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM), getUUID()), attributes) session.graph._add_record(self) self.add_attributes( {pm.PROV_TYPE: Constants.NIDM_ACQUISITION_ACTIVITY}) self.add_attributes({str("dct:isPartOf"): session}) #list to store acquisition objects associated with this activity self._acquisition_objects = [] #if constructor is called with a session object then add this acquisition to the session #carry graph object around self.graph = session.graph
def checkLinked(nodes, instance_dict): tmpl_linked_qn = prov.QualifiedName( prov.Namespace("tmpl", "http://openprovenance.org/tmpl#"), "linked") #make tmpl:linked sweep and determine order # ASSUMPTION: Each entity can only be link to one "ancestor" entity, # one ancestor entity can be linked to by multiple "successor" entities # NO CYCLES! # -> This implies: There is only one root and the network of linked rels is a directed acyclic graph linkedDict = dict() linkedGroups = list() for rec in nodes: eid = rec.identifier #print repr(rec.attributes) for attr in rec.attributes: if tmpl_linked_qn == attr[0]: linkedDict[eid] = attr[1] dependents = [] roots = [] intermediates = [] for id in linkedDict: if id not in dependents: dependents.append(id) for id in linkedDict: if linkedDict[id] not in dependents: roots.append(linkedDict[id]) else: intermediates.append(linkedDict[id]) #print "roots: " + repr(roots) #print "dependents: " + repr(dependents) #print "intermediates: " + repr(intermediates) def dfs_levels(node, links, level): lower = dict() #print str(node) + " " + repr(lower) for k in [k for k, v in links.items() if v == node]: #print str(k) + " child of " + str(node) ret = dfs_levels(k, links, level + 1) #print repr(ret) if ret != None: lower.update(ret) myval = {node: level} #print "Appending : " + repr(myval) lower.update(myval) #print "Returning : " + repr(lower) return (lower) numInstances = dict() combRoot = dict() # traverse from root offset = 0 for r in roots: retval = dfs_levels(r, linkedDict, offset) #print "root: " + str(r) #print retval #get max rank maxr = max(retval.values()) # we need to check how many entries we have maxEntries = 0 for rec in nodes: #print rec if rec.identifier in retval: eid = rec.identifier neid = match(eid, instance_dict, False) #neid = match(eid._str,instance_dict, False) #assume single instance bound to this node length = 0 if not isinstance(neid, list): length = 1 #print repr(neid) #print repr(eid) #if neid==eid._str: if neid == eid: # no match: if unassigned var or vargen variable, assume length 0 length = 0 #print "same" if length > maxEntries: maxEntries = length #print neid if isinstance(neid, list): # list is assigned to node, now all lengths must be equal length = len(neid) if length != maxEntries: if maxEntries > 0: #print length #print maxEntries raise IncorrectNumberOfBindingsForGroupVariable( "Linked entities must have same number of bound instances!" ) maxEntries = length #print length # if rec.identifier not in combRoot: # retval[rec.identifier]=maxr+1 for n in retval: numInstances[n] = maxEntries combRoot.update(retval) linkedGroups.append(retval) offset = maxr + 1 for rec in nodes: if rec.identifier not in combRoot: combRoot[rec.identifier] = offset linkedGroups.append({rec.identifier: offset}) eid = rec.identifier neid = match(eid._str, instance_dict, False) if isinstance(neid, list): numInstances[eid] = len(neid) else: numInstances[eid] = 1 #need to remember number of instances for each var # when multiple link groups rank accordingly #print repr(combRoot) #try reorder nodes based on tmpl:linked hierarchy #nodes_sorted=sorted(nodes, key=retval.get) fnc = lambda x: combRoot[x.identifier] nodes_sorted = sorted(nodes, key=fnc) #for rec in nodes_sorted: #print "SORT : " + str(rec.identifier) #print repr(linkedGroups) return { "nodes": nodes_sorted, "numInstances": numInstances, "linkedGroups": linkedGroups }
def set_rel(new_entity, rel, idents, expAttr, linkedRelAttrs, otherAttrs): ''' helper function to add specific relations according to relation type implements cartesian expansion only (no "linked" restrictions) by now ''' cnt = 0 attrlists = [] indexlists = [] #create groups for g in linkedRelAttrs: alist = [] ilist = [] cnt = 0 for a in expAttr: if a in g: alist.append(expAttr[a]) ilist.append(cnt) cnt += 1 attrlists.append(alist) indexlists.append(ilist) #print repr(expAttr) #print repr(attrlists) #print repr(indexlists) outLists = [] for a in attrlists: outLists.append(zip(*a)) #taken from http://code.activestate.com/recipes/577932-flatten-arraytuple/ flatten = lambda arr: reduce( lambda x, y: ((isinstance(y, (list, tuple)) or x.append(y)) and x.extend( flatten(y))) or x, arr, []) idx = flatten(indexlists) relList = itertools.product(*outLists) #check identifier # if var: namespace: unbound variable, ignore # if vargen: namespace : create uuid for each ele # if not var and not vargen: if same number of idents as elements: iterate, else: fail #print idents getIdent = False makeUUID = False if idents: if isinstance(idents, list): if len(idents) != len(relList): raise IncorrectNumberOfBindingsForStatementVariableException( "Wrong number of idents for expanded rel " + repr(rel)) getIdent = True elif "vargen:" in idents._str and idents._str[:7] == "vargen:": #make uuid for each makeUUID = True elif "var:" in idents._str and idents._str[:4] == "var:": #make uuid for each idents = None cnt = 0 for element in relList: out = flatten(element) outordered = [out[i] for i in idx] if getIdent: make_rel(new_entity, rel, idents[cnt], outordered, otherAttrs) elif makeUUID: make_rel(new_entity, rel, prov.QualifiedName(GLOBAL_UUID_NS, str(uuid.uuid4())), outordered, otherAttrs) else: make_rel(new_entity, rel, idents, outordered, otherAttrs) cnt += 1
def checkLinked(nodes, instance_dict): """ This function identifies groups of linked variables in the current template Arguments: nodes: List of all variables in the template instance_dict: Lookup table with substitutes from bindings Returns: dict with following keys: "nodes" : template variables sorted so that each link group forms a contiguous sequence ordered by "direction" of tmpl:linked, eg for "var b tmpl:linked to var a var c tmpl:linked to var b var e tmpl:linked to var d" we get the order var a, var b, var c, var d, var e "numInstances": the number of instances assigned to each variable in "nodes" if vargen vars are linked to regular instantiated vars we create the same number of auto generated instances, they thus get the same number assigned "linkedGroups": list of lists each containing the variables belonging to the same link group, ex. above: [[a,b,c], [d,e]] """ """#we need that for lookup""" tmpl_linked_qn=prov.QualifiedName(prov.Namespace("tmpl", "http://openprovenance.org/tmpl#"), "linked") """ #make tmpl:linked sweep and determine order # we essentially create a graph containing all "tmpl:linked" ties and the involved nodes # ASSUMPTION: Each entity can only be link to one "ancestor" entity, # one ancestor entity can be linked to by multiple "successor" entities # NO CYCLES! # -> This implies: There is only one root in each link group and # the network of linked rels is a directed acyclic graph """ linkedDict=dict() linkedGroups=list() for rec in nodes: eid = rec.identifier #print (repr(rec.attributes)) for attr in rec.attributes: if tmpl_linked_qn == attr[0]: linkedDict[eid]=attr[1] """# determine order, which of the variables is a "root", i.e only linked to by other vars""" dependents=[] roots=[] intermediates=[] for id in linkedDict: if id not in dependents: dependents.append(id) for id in linkedDict: if linkedDict[id] not in dependents: roots.append(linkedDict[id]) else: intermediates.append(linkedDict[id]) #print ("roots: " + repr(roots)) #print ("dependents: " + repr(dependents)) #print ("intermediates: " + repr(intermediates)) def dfs_levels(node, links, level): """ #helper function #recursive depth first search to determine order of linked vars """ lower=dict() #print (str(node) + " " + repr(lower)) for k in [k for k,v in links.items() if v == node]: #print str(k) + " child of " + str(node) ret=dfs_levels(k, links, level+1) #print repr(ret) if ret!=None: lower.update(ret) myval={node : level} #print ("Appending : " + repr(myval)) lower.update(myval) #print ("Returning : " + repr(lower)) return(lower) numInstances=dict() combRoot=dict() #traverse from root offset=0 for r in roots: retval=dfs_levels(r, linkedDict, offset) #print ("root: " + str(r)) #print (retval) #get max rank maxr=max(retval.values()) # we need to check how many entries we have maxEntries=0 for rec in nodes: #print (rec) if rec.identifier in retval: eid = rec.identifier neid = match(eid,instance_dict, False) #neid = match(eid._str,instance_dict, False) #assume single instance bound to this node length=0 if not isinstance(neid, list): length=1 #print (repr(neid)) #print (repr(eid)) #if neid==eid._str: if neid==eid: #no match: if unassigned var or vargen variable, assume length 0 length=0 #print("same") if length>maxEntries: maxEntries=length #print neid if isinstance(neid,list): #list is assigned to node, now all lengths must be equal length=len(neid) if length!=maxEntries: if maxEntries>0: #print (length) #print (maxEntries) raise IncorrectNumberOfBindingsForGroupVariable("Linked entities must have same number of bound instances!") maxEntries=length # print (length) for n in retval: numInstances[n]=maxEntries combRoot.update(retval) linkedGroups.append(retval) offset=maxr+1 for rec in nodes: if rec.identifier not in combRoot: combRoot[rec.identifier]=offset linkedGroups.append({rec.identifier : offset}) eid=rec.identifier neid = match(eid._str,instance_dict, False) if isinstance(neid, list): numInstances[eid]=len(neid) else: numInstances[eid]=1 #need to remember number of instances for each var # when multiple link groups rank accordingly #print (repr(combRoot)) #try reorder nodes based on tmpl:linked hierarchy #nodes_sorted=sorted(nodes, key=retval.get) fnc=lambda x: combRoot[x.identifier] nodes_sorted=sorted(nodes, key=fnc) #for rec in nodes_sorted: #print ("SORT : " + str(rec.identifier)) #print (repr(linkedGroups)) return { "nodes" : nodes_sorted, "numInstances" : numInstances, "linkedGroups" : linkedGroups}
def set_rel(new_entity,rel,idents, expAttr, linkedRelAttrs, otherAttrs): ''' helper function to add specific relations according to relation type performs "tmpl:linked" aware expansion and passes relation specific as well as generic attributes Args: new_entity : Bundle or ProvDoc to be created via template expansion rel : relation from template to be substituted/expanded idents : optional identifier(s) for relation, can be None expAttr: ordered dict of formal attributes to be passed to relation constructor sequence of formal attrs must be provided in order as specified in header of func "make_rel" linkedRelAttrs: attributes grouped by link group, important information for expansion. otherAttrs: list of (k,v) tuples with optional non-specified attributes for the relation Returns: None - calls "make_rel" (see above) which instantiates relations in the passed "new_entity" The basic concept of expansion including tmpl:linked is conceived as follows: consider all the group level variables (See https://provenance.ecs.soton.ac.uk/prov-template/#dfn-group-variable) assigned to formal relation attributes sorted into groups based on wheter they are defined as linked or not If influencer, influencee or secondary vars are in the same link group, there is no cartesian expansion example: wasAssociatedWith(id;a,ag,pl,attrs), formal attrs are a(ctivity), ag(ent), pl(an) 5 modes: 1) a linked w. ag linked w. p -> the variables assigned to these attributes must all have the same number of instances which are combined according to their position 2) a linked with ag, pl not linked -> two of the assigned variables are linked, the third is not, we have a linked with pl, ag not linked " numInstances(linked var1) == numInstances(linked var2)" ag linked with pl, a not linked times "numinstances(unlinked var)" expansions 3) a, ag and pl are not linked with each other -> we have numInstances(a) x numInstances(ag) x numInstances(pl) expansions ''' #check identifier # if var: namespace: unbound variable, ignore # if vargen: namespace : create uuid for each ele # if not var and not vargen: if same number of idents as elements: iterate, else: fail #print idents getIdent=False makeUUID=False if idents: if isinstance(idents, list): if len(idents) != len(relList): raise IncorrectNumberOfBindingsForStatementVariableException("Wrong number of idents for expanded rel " + repr(rel)) getIdent=True elif "vargen:" in idents._str and idents._str[:7]=="vargen:": #make uuid for each makeUUID=True elif "var:" in idents._str and idents._str[:4]=="var:": #make uuid for each idents=None #create efficient data structures for expansion with tmpl:linked constraints cnt=0 attrlists=[] indexlists=[] #separate attribute values by group, but remember their original order in ilist attrVisited=[] for g in linkedRelAttrs: alist=[] ilist=[] cnt=0 for a in expAttr: if a in g: attrVisited.append(a) alist.append(expAttr[a]) ilist.append(cnt) cnt+=1 attrlists.append(alist) indexlists.append(ilist) #Some of the variables were not present in the linked groups. if len(attrVisited)!=len(expAttr): cnt=0 for a in expAttr: if a not in attrVisited: attrlists.append([expAttr[a]]) indexlists.append([cnt]) attrVisited.append(a) cnt+=1 #print (repr(expAttr)) #print (repr(attrlists)) #print (repr(indexlists)) outLists=[] # concatenate values in each link group for a in attrlists: outLists.append(zip(*a)) #taken from http://code.activestate.com/recipes/577932-flatten-arraytuple/ flatten = lambda arr: reduce(lambda x, y: ((isinstance(y, (list, tuple)) or x.append(y)) and x.extend(flatten(y))) or x, arr, []) #we need this info to maintain the order of formal attributes idx=flatten(indexlists) log.debug("OUTLISTS") log.debug(outLists) #create cartesian product of grouped variables, this way, only those not in the same group get expanded relList=list(itertools.product(*outLists)) log.debug(relList) cnt=0 #iterate over cartesian product #print (repr(relList)) for element in relList: log.debug("MEH") log.debug(repr(element)) log.debug(repr(otherAttrs)) # CONSTRUCT OTHER ATTRS FOR THIS REL rel_other_attrs=dict() for k in otherAttrs: log.debug(repr(otherAttrs[k])) oa_eles=1 if isinstance(otherAttrs[k], list): oa_eles=len(otherAttrs[k]) if oa_eles!=1 and oa_eles < len(relList): raise IncorrectNumberOfBindingsForStatementVariable("Attribute " + str(k) + " has incorrect number of bindings.") kval=None if oa_eles==1: kval=otherAttrs[k] else: kval=otherAttrs[k][cnt] rel_other_attrs[k]=kval #print (element) out=flatten(element) #print (out) #reorder based on original ordering # THIS IS THE ASSOC ORDER BUG #outordered=[out[i] for i in idx] outordered=[x for _,x in sorted(zip(idx, out))] #create expanded relation if getIdent: make_rel(new_entity, rel,idents[cnt], outordered, rel_other_attrs) elif makeUUID: make_rel(new_entity, rel, prov.QualifiedName(GLOBAL_UUID_DEF_NS, str(uuid.uuid4())), outordered, rel_other_attrs) else: make_rel(new_entity, rel,idents, outordered, rel_other_attrs) cnt+=1
#bind_dicts=[] bind_dict = dict() for index, row in data.iterrows(): rtemplate = template for col in data.columns.values: if col in bindmap: outstatement = "var:" + bindmap[col][ "varname"] + " a prov:Entity ;\n" #print repr(row) outval = str(row[col].encode('utf8', 'replace')) if bindmap[col]["val"] == "iri": outval = prov.QualifiedName(prov.Namespace(outNSpref, outNS), urllib.quote(outval)) #print col + " " + bindmap[col]["varname"] + " " + outval ID = "var:" + bindmap[col]["varname"] if ID not in bind_dict: bind_dict[ID] = outval else: if not isinstance(bind_dict[ID], list): tmp = list() tmp.append(bind_dict[ID]) bind_dict[ID] = tmp if outval not in bind_dict[ID]: bind_dict[ID].append(outval) #outval=row[col] if bindmap[col]["val"] == "literal": outval = '"' + outval + '"'
outNS = "http://example.com#" #bind_dicts=[] bind_dict = dict() bindfile_dict = dict() for index, row in data.iterrows(): rtemplate = template for col in data.columns.values: if col in bindmap: outval = row[col] if bindmap[col]["val"] == "iri": outval = prov.QualifiedName( prov.Namespace(outNSpref, outNS), urllib.quote(str(outval.encode('utf8', 'replace')))) #print col + " " + bindmap[col]["varname"] + " " + outval ID = "var:" + bindmap[col]["varname"] #ID = prov.QualifiedName(prov.Namespace("var", "http://openprovenance.org/var#"), urllib.quote(bindmap[col]["varname"])) #prepare data for bindings dict if ID not in bind_dict: bind_dict[ID] = outval else: if not isinstance(bind_dict[ID], list): tmp = list() tmp.append(bind_dict[ID]) bind_dict[ID] = tmp
bind_dicts = [] for index, row in data.iterrows(): rtemplate = template bind_dict = {} for col in data.columns.values: if col in bindmap: outstatement = "var:" + bindmap[col][ "varname"] + " a prov:Entity ;\n" #print repr(row) outval = str(row[col].encode('utf8', 'replace')) if bindmap[col]["val"] == "iri": outval = prov.QualifiedName(prov.Namespace(outNSpref, outNS), urllib.quote(outval)) #print col + " " + bindmap[col]["varname"] + " " + outval bind_dict["var:" + bindmap[col]["varname"]] = outval #outval=row[col] if bindmap[col]["val"] == "literal": outval = '"' + outval + '"' if bindmap[col]["type"] == "attr": outstatement = outstatement + "\ttmpl:2dvalue_0_0 " + str( outval) + " .\n" else: outstatement = outstatement + "\ttmpl:value_0 " + str( outval) + " .\n" rtemplate = rtemplate + outstatement
bindfile_dict=dict() for index, row in data.iterrows(): rtemplate=template for col in data.columns.values: if col in bindmap: outval=row[col] if bindmap[col]["val"]=="float": outval=float(outval) outiri=None if bindmap[col]["val"]=="iri": outiri=prov.QualifiedName(prov.Namespace(outNSpref,outNS), urllib.quote(str(outval.encode('utf8', 'replace')))) outval=outNSpref+":"+urllib.quote(str(outval.encode('utf8', 'replace'))) ID = "var:"+bindmap[col]["varname"] #prepare data for bindings dict if ID not in bind_dict: bind_dict[ID]=outval else: if not isinstance(bind_dict[ID], list): tmp=list() tmp.append(bind_dict[ID]) bind_dict[ID]=tmp if not bindmap[col]["uniqueOnly"] or outval not in bind_dict[ID]: bind_dict[ID].append(outval)