def __init__(self, attributes=None, empty_graph=False, uuid=None): """ Default contructor, creates document and adds Project activity to graph with optional attributes :param attributes: optional dictionary of attributes to add :empty_graph: if set to True, creates empty graph with no namespaces besides Prov defaults :uuid: if uuid is not None then use supplied uuid for project instead of generating one (for reading nidm docs) """ if (empty_graph): self.graph = pm.ProvDocument() else: self.graph = Constants.NIDMDocument( namespaces=Constants.namespaces) if uuid is None: #execute default parent class constructor super(Project, self).__init__( self.graph, pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM), getUUID()), attributes) else: #execute default parent class constructor super(Project, self).__init__( self.graph, pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM), uuid), attributes) #add record to graph self.graph._add_record(self) #create empty sessions list self._sessions = [] #prov toolbox doesn't like 2 attributes with PROV_TYPE in 1 add_attributes call so split them... self.add_attributes({pm.PROV_TYPE: Constants.NIDM_PROJECT})
def instantiate_template(prov_doc,instance_dict): global GLOBAL_UUID_DEF_NS ''' Instantiate a prov template based on a dictionary setting for the prov template variables Supported: entity and attribute var: matching multiple entity expansion Unsupported by now: linked entities multiple attribute expansion To Do: Handle core template expansion rules as described in https://ieeexplore.ieee.org/document/7909036/ and maybe add additional expansion/composition rules for templates useful to compose ENES community workflow templates Args: prov_doc (ProvDocument): input prov document template instance_dict (dict): match dictionary ''' #print("here inst templ") #instance dict override: replace tmpl:startTime and tmpl:endTime with prov:startTime and prov:endTime instance_dict["tmpl:startTime"]=prov.QualifiedName(prov.Namespace("prov", "http://www.w3.org/ns/prov#"),"startTime") instance_dict["tmpl:endTime"]=prov.QualifiedName(prov.Namespace("prov", "http://www.w3.org/ns/prov#"),"endTime") instance_dict["tmpl:time"]=prov.QualifiedName(prov.Namespace("prov", "http://www.w3.org/ns/prov#"), "time") #print repr(instance_dict) #CHECK FOR NAMESPACE FOR VARGEN UUID for ns in prov_doc.namespaces: if ns.prefix==GLOBAL_UUID_DEF_NS_PREFIX: #print ("found namespace") #uuid namespace defined in template? Use this one GLOBAL_UUID_DEF_NS=ns new_doc = set_namespaces(prov_doc.namespaces,prov.ProvDocument()) new_doc = add_records(prov_doc,new_doc,instance_dict) blist = list(prov_doc.bundles) #print (repr(blist)) #print ("iterating bundles") for bundle in blist: id1=match(bundle.identifier, instance_dict, True) #print (id1) #print (repr(id1)) #print ("---") new_bundle = new_doc.bundle(id1) #print (repr(new_bundle)) new_bundle = add_records(bundle, new_bundle,instance_dict) return new_doc
def __init__(self, session, attributes=None, uuid=None): """ Default contructor, creates a session activity and links to project object :param session: a session object :param uuid: optional uuid...used mostly for reading in existing NIDM document :param attributes: optional dictionary of attributes to add qname:value """ if uuid is None: #execute default parent class constructor super(Acquisition,self).__init__(session.graph, pm.QualifiedName(pm.Namespace("niiri",Constants.NIIRI),getUUID()),attributes) else: super(Acquisition,self).__init__(session.graph, pm.QualifiedName(pm.Namespace("niiri",Constants.NIIRI),uuid),attributes) session.graph._add_record(self) self.add_attributes({pm.PROV_TYPE: Constants.NIDM_ACQUISITION_ACTIVITY}) #self.add_attributes({pm.QualifiedName(pm.Namespace("dct",Constants.DCT),'isPartOf'):self}) #list to store acquisition objects associated with this activity self._acquisition_objects=[] #if constructor is called with a session object then add this acquisition to the session #carry graph object around self.graph = session.graph #add acquisition to session session.add_acquisition(self)
def __init__(self, acquisition, attributes=None, uuid=None): """ Default contructor, creates an acquisition object and links to acquisition activity object :param acquisition: a Aquisition activity object :param attributes: optional attributes to add to entity :param uuid: optional uuid...used mostly for reading in existing NIDM document :return: none """ if uuid is None: #execute default parent class constructor super(AcquisitionObject, self).__init__( acquisition.graph, pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM), getUUID()), attributes) else: super(AcquisitionObject, self).__init__( acquisition.graph, pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM), uuid), attributes) acquisition.graph._add_record(self) #carry graph object around self.graph = acquisition.graph #create link to acquisition activity acquisition.add_acquisition_object(self)
def __init__(self, project,uuid=None,attributes=None,add_default_type=True): """ Default contructor, creates a session activity and links to project object :param project: a project object :return: none """ if uuid is None: self._uuid = getUUID() #execute default parent class constructor super(Session,self).__init__(project.graph, pm.QualifiedName(pm.Namespace("niiri",Constants.NIIRI),self.get_uuid()),attributes) else: self._uuid = uuid #execute default parent class constructor super(Session,self).__init__(project.graph, pm.QualifiedName(pm.Namespace("niiri",Constants.NIIRI),self.get_uuid()),attributes) project.graph._add_record(self) if add_default_type: self.add_attributes({pm.PROV_TYPE: Constants.NIDM_SESSION}) self.graph = project.graph project.add_sessions(self) #list of acquisitions associated with this session self._acquisitions=[]
def __init__(self, parentDoc=None, attributes=None): """ Default contructor, creates document and adds Project activity to graph with optional attributes :param parentDoc: optional ProvDocument :param attributes: optional dictionary of attributes to add """ #set graph document if (parentDoc): self.graph = parentDoc else: self.graph = Constants.p_graph #execute default parent class constructor super(Project, self).__init__( self.graph, pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM), getUUID()), attributes) self.graph._add_record(self) #create empty sessions list self._sessions = [] #prov toolbox doesn't like 2 attributes with PROV_TYPE in 1 add_attributes call so split them... self.add_attributes({pm.PROV_TYPE: Constants.NIDM_PROJECT}) self.add_attributes({pm.PROV_TYPE: Constants.NIDM_PROJECT_TYPE})
def __init__(self, project, attributes=None, uuid=None): """ Default contructor, creates a derivative activity :param uuid: optional uuid...used mostly for reading in existing NIDM document :param attributes: optional dictionary of attributes to add qname:value """ if uuid is None: self._uuid = getUUID() #execute default parent class constructor super(Derivative,self).__init__(project.graph, pm.QualifiedName(pm.Namespace("niiri",Constants.NIIRI),self.get_uuid()),attributes) else: self._uuid = uuid super(Derivative,self).__init__(project.graph, pm.Identifier(uuid),attributes) project.graph._add_record(self) #list to store acquisition objects associated with this activity self._derivative_objects=[] #if constructor is called with a session object then add this acquisition to the session #carry graph object around self.graph = project.graph project.add_derivatives(self)
def add_acquisition(self, acquisition): self._acquisitions.extend([acquisition]) #create links in graph acquisition.add_attributes({ pm.QualifiedName(pm.Namespace("dct", Constants.DCT), 'isPartOf'): self })
def read_binding_v3(v3_dict): """ convert PROV template v3 bindings file to internalt bindings format Args: v3 bindings json dict Returns: internal bindingings dict """ bindings_dict=dict() namespaces=set() if "context" in v3_dict: #print v3_dict["context"] for k in v3_dict["context"]: namespaces.add(prov.Namespace(k, v3_dict["context"][k])) if "var" in v3_dict: for v in v3_dict["var"]: val=list() for rec in v3_dict["var"][v]: #print(repr(val)) val.append(setEntry(rec, namespaces)) bindings_dict["var:"+v]=val if "vargen" in v3_dict: for v in v3_dict["vargen"]: val=list() for rec in v3_dict["vargen"][v]: val.append(setEntry(rec, namespaces)) bindings_dict["vargen:"+v]=val return({ "binddict" : bindings_dict, "namespaces" : namespaces})
def __init__(self, project, attributes=None, uuid=None, add_default_type=True): """ Default contructor, creates an acquisition object and links to acquisition activity object :param project: NIDM project to add data element entity to.\ :param attributes: optional attributes to add to entity :param uuid: optional uuid...used mostly for reading in existing NIDM document :return: none """ if uuid is None: #execute default parent class constructor super(DataElement,self).__init__(project.graph, pm.QualifiedName(pm.Namespace("niiri",Constants.NIIRI),getUUID()),attributes) else: super(DataElement,self).__init__(project.graph,pm.Identifier(uuid),attributes) project.graph._add_record(self) if add_default_type: self.add_attributes({pm.PROV_TYPE: Constants.NIDM_DATAELEMENT}) project.add_dataelements(self) self.graph = project.graph #list to store acquisition objects associated with this activity self._derivative_objects=[]
def __init__(self, attributes=None, empty_graph=False, uuid=None, add_default_type=True): """ Default contructor, creates document and adds Project activity to graph with optional attributes :param attributes: optional dictionary of attributes to add :empty_graph: if set to True, creates empty graph with no namespaces besides Prov defaults :uuid: if uuid is not None then use supplied uuid for project instead of generating one (for reading nidm docs) """ if (empty_graph): self.graph = Constants.NIDMDocument(namespaces=None) else: self.graph = Constants.NIDMDocument( namespaces=Constants.namespaces) if uuid is None: self._uuid = getUUID() #execute default parent class constructor super(Project, self).__init__( self.graph, pm.QualifiedName(pm.Namespace("niiri", Constants.NIIRI), self.get_uuid()), attributes) else: self._uuid = uuid #execute default parent class constructor super(Project, self).__init__( self.graph, pm.QualifiedName(pm.Namespace("niiri", Constants.NIIRI), self.get_uuid()), attributes) #add record to graph self.graph._add_record(self) #create empty sessions list self._sessions = [] #create empty derivatives list self._derivatives = [] # create empty data elements list self._dataelements = [] if add_default_type: self.add_attributes({pm.PROV_TYPE: Constants.NIDM_PROJECT})
def test_GetProjectInstruments(): kwargs = { Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII", Constants.NIDM_PROJECT_IDENTIFIER: 9610, Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation" } proj_uuid = "_123456gpi" project = Project(uuid=proj_uuid, attributes=kwargs) session = Session(project) acq = AssessmentAcquisition(session) kwargs = { pm.PROV_TYPE: pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM), "NorthAmericanAdultReadingTest") } acq_obj = AssessmentObject(acq, attributes=kwargs) acq2 = AssessmentAcquisition(session) kwargs = { pm.PROV_TYPE: pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM), "PositiveAndNegativeSyndromeScale") } acq_obj2 = AssessmentObject(acq2, attributes=kwargs) #save a turtle file with open("test_gpi.ttl", 'w') as f: f.write(project.serializeTurtle()) assessment_list = Query.GetProjectInstruments(["test_gpi.ttl"], proj_uuid) remove("test_gpi.ttl") assert Constants.NIDM + "NorthAmericanAdultReadingTest" in [ str(x) for x in assessment_list['assessment_type'].to_list() ] assert Constants.NIDM + "PositiveAndNegativeSyndromeScale" in [ str(x) for x in assessment_list['assessment_type'].to_list() ]
def add_sessions(self,session): """ Adds session to project, creating links and adding reference to sessions list :param session: object of type "Session" from nidm API :return true if session object added to project, false if session object is already in project """ if session in self._sessions: return False else: #add session to self.sessions list self._sessions.extend([session]) #create links in graph #session.add_attributes({str("dct:isPartOf"):self}) session.add_attributes({pm.QualifiedName(pm.Namespace("dct",Constants.DCT),'isPartOf'):self}) return True
def add_derivatives(self, derivative): """ Adds derivatives to project, creating links and adding reference to derivatives list :param derivative: object of type "Derivative" from nidm API :return true if derivative object added to project, false if derivative object is already in project """ if derivative in self._derivatives: return False else: # add session to self.sessions list self._derivatives.extend([derivative]) # create links in graph # session.add_attributes({str("dct:isPartOf"):self}) derivative.add_attributes({ pm.QualifiedName(pm.Namespace("dct", Constants.DCT), 'isPartOf'): self }) return True
def __init__(self, project, attributes=None): """ Default contructor, creates a session activity and links to project object :param project: a project object :return: none """ #execute default parent class constructor super(Session, self).__init__( project.graph, pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM), getUUID()), attributes) project.graph._add_record(self) self.add_attributes({pm.PROV_TYPE: Constants.NIDM_SESSION}) self.graph = project.graph #list of acquisitions associated with this session self._acquisitions = []
def __init__(self, acquisition, attributes=None): """ Default contructor, creates an acquisition object and links to acquisition activity object :param acquisition: a Aquisition activity object :param attributes: optional attributes to add to entity :return: none """ #execute default parent class constructor #execute default parent class constructor super(AcquisitionObject, self).__init__( acquisition.graph, pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM), getUUID()), attributes) acquisition.graph._add_record(self) #self.add_attributes({PROV_TYPE: Constants.NIDM_ACQUISITION_ENTITY}) #carry graph object around self.graph = acquisition.graph #create link to acquisition activity acquisition.add_acquisition_object(self)
def __init__(self, derivative,attributes=None, uuid=None): """ Default contructor, creates an derivative object and links to derivative activity object :param derivative: a Derivative activity object :param attributes: optional attributes to add to entity :param uuid: optional uuid...used mostly for reading in existing NIDM document :return: none """ if uuid is None: #execute default parent class constructor super(DerivativeObject,self).__init__(derivative.graph, pm.QualifiedName(pm.Namespace("niiri",Constants.NIIRI),getUUID()),attributes) else: super(DerivativeObject,self).__init__(derivative.graph, pm.Identifier(uuid),attributes) derivative.graph._add_record(self) #carry graph object around self.graph = derivative.graph #create link to acquisition activity derivative.add_derivative_object(self)
def __init__(self, session, attributes=None): """ Default contructor, creates a session activity and links to project object :param session: a session object """ #execute default parent class constructor super(Acquisition, self).__init__( session.graph, pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM), getUUID()), attributes) session.graph._add_record(self) self.add_attributes( {pm.PROV_TYPE: Constants.NIDM_ACQUISITION_ACTIVITY}) self.add_attributes({str("dct:isPartOf"): session}) #list to store acquisition objects associated with this activity self._acquisition_objects = [] #if constructor is called with a session object then add this acquisition to the session #carry graph object around self.graph = session.graph
bind_dicts = [] for index, row in data.iterrows(): rtemplate = template bind_dict = {} for col in data.columns.values: if col in bindmap: outstatement = "var:" + bindmap[col][ "varname"] + " a prov:Entity ;\n" #print repr(row) outval = str(row[col].encode('utf8', 'replace')) if bindmap[col]["val"] == "iri": outval = prov.QualifiedName(prov.Namespace(outNSpref, outNS), urllib.quote(outval)) #print col + " " + bindmap[col]["varname"] + " " + outval bind_dict["var:" + bindmap[col]["varname"]] = outval #outval=row[col] if bindmap[col]["val"] == "literal": outval = '"' + outval + '"' if bindmap[col]["type"] == "attr": outstatement = outstatement + "\ttmpl:2dvalue_0_0 " + str( outval) + " .\n" else: outstatement = outstatement + "\ttmpl:value_0 " + str( outval) + " .\n" rtemplate = rtemplate + outstatement
def checkLinked(nodes, instance_dict): tmpl_linked_qn = prov.QualifiedName( prov.Namespace("tmpl", "http://openprovenance.org/tmpl#"), "linked") #make tmpl:linked sweep and determine order # ASSUMPTION: Each entity can only be link to one "ancestor" entity, # one ancestor entity can be linked to by multiple "successor" entities # NO CYCLES! # -> This implies: There is only one root and the network of linked rels is a directed acyclic graph linkedDict = dict() linkedGroups = list() for rec in nodes: eid = rec.identifier #print repr(rec.attributes) for attr in rec.attributes: if tmpl_linked_qn == attr[0]: linkedDict[eid] = attr[1] dependents = [] roots = [] intermediates = [] for id in linkedDict: if id not in dependents: dependents.append(id) for id in linkedDict: if linkedDict[id] not in dependents: roots.append(linkedDict[id]) else: intermediates.append(linkedDict[id]) #print "roots: " + repr(roots) #print "dependents: " + repr(dependents) #print "intermediates: " + repr(intermediates) def dfs_levels(node, links, level): lower = dict() #print str(node) + " " + repr(lower) for k in [k for k, v in links.items() if v == node]: #print str(k) + " child of " + str(node) ret = dfs_levels(k, links, level + 1) #print repr(ret) if ret != None: lower.update(ret) myval = {node: level} #print "Appending : " + repr(myval) lower.update(myval) #print "Returning : " + repr(lower) return (lower) numInstances = dict() combRoot = dict() # traverse from root offset = 0 for r in roots: retval = dfs_levels(r, linkedDict, offset) #print "root: " + str(r) #print retval #get max rank maxr = max(retval.values()) # we need to check how many entries we have maxEntries = 0 for rec in nodes: #print rec if rec.identifier in retval: eid = rec.identifier neid = match(eid, instance_dict, False) #neid = match(eid._str,instance_dict, False) #assume single instance bound to this node length = 0 if not isinstance(neid, list): length = 1 #print repr(neid) #print repr(eid) #if neid==eid._str: if neid == eid: # no match: if unassigned var or vargen variable, assume length 0 length = 0 #print "same" if length > maxEntries: maxEntries = length #print neid if isinstance(neid, list): # list is assigned to node, now all lengths must be equal length = len(neid) if length != maxEntries: if maxEntries > 0: #print length #print maxEntries raise IncorrectNumberOfBindingsForGroupVariable( "Linked entities must have same number of bound instances!" ) maxEntries = length #print length # if rec.identifier not in combRoot: # retval[rec.identifier]=maxr+1 for n in retval: numInstances[n] = maxEntries combRoot.update(retval) linkedGroups.append(retval) offset = maxr + 1 for rec in nodes: if rec.identifier not in combRoot: combRoot[rec.identifier] = offset linkedGroups.append({rec.identifier: offset}) eid = rec.identifier neid = match(eid._str, instance_dict, False) if isinstance(neid, list): numInstances[eid] = len(neid) else: numInstances[eid] = 1 #need to remember number of instances for each var # when multiple link groups rank accordingly #print repr(combRoot) #try reorder nodes based on tmpl:linked hierarchy #nodes_sorted=sorted(nodes, key=retval.get) fnc = lambda x: combRoot[x.identifier] nodes_sorted = sorted(nodes, key=fnc) #for rec in nodes_sorted: #print "SORT : " + str(rec.identifier) #print repr(linkedGroups) return { "nodes": nodes_sorted, "numInstances": numInstances, "linkedGroups": linkedGroups }
- make_binding(prov_doc,entity_dict, attr_dict): result: generate a PROV binding document based on an empty input document (with namespaces assigned) as well as variable settings for entities and attributes (python dictionaries) ''' import prov.model as prov import prov as provbase import six import itertools import uuid import sys import collections GLOBAL_UUID_NS = prov.Namespace("ex_uuid", "http://example.com/uuid#") class UnknownRelationException(Exception): pass class BindingFileException(Exception): pass class UnboundMandatoryVariableException(Exception): pass class IncorrectNumberOfBindingsForGroupVariable(Exception):
#!/usr/bin/env python from xml.dom.minidom import parse import xml.dom.minidom import urllib2 import prov.model as prov from uuid import uuid1 dcterms = prov.Namespace("dcterms", "http://purl.org/dc/terms/") xsd = prov.Namespace("xsd", "http://www.w3.org/2001/XMLSchema#") cml = prov.Namespace("cml", "http://www.connectomics.org/cff-2/") nidm = prov.Namespace("nidm", "http://purl.org/nidash/nidm#") # uuid method get_id = lambda: uuid1().hex def cff2provn(filename): """Parse cml xml file and return a prov bundle object""" #filename = "/Users/fariba/Desktop/UCI/freesurfer/scripts/meta-MC-SCA-023_tp1.cml" tree = xml.dom.minidom.parse(filename) collections = tree.documentElement g = prov.ProvBundle() g.add_namespace(xsd) g.add_namespace(dcterms) g.add_namespace(cml) url_entity = g.entity(cml[get_id()]) url_entity.add_extra_attributes({ prov.PROV['type']:
from copy import deepcopy from pickle import dumps import os import getpass import platform from uuid import uuid1 import simplejson as json import numpy as np import prov.model as pm from .. import get_info, logging, __version__ from .filemanip import (md5, hashlib, hash_infile) iflogger = logging.getLogger('interface') foaf = pm.Namespace("foaf", "http://xmlns.com/foaf/0.1/") dcterms = pm.Namespace("dcterms", "http://purl.org/dc/terms/") nipype_ns = pm.Namespace("nipype", "http://nipy.org/nipype/terms/") niiri = pm.Namespace("niiri", "http://iri.nidash.org/") crypto = pm.Namespace("crypto", ("http://id.loc.gov/vocabulary/preservation/" "cryptographicHashFunctions/")) get_id = lambda: niiri[uuid1().hex] PROV_ENVVARS = [ 'PATH', 'FSLDIR', 'FREESURFER_HOME', 'ANTSPATH', 'CAMINOPATH', 'CLASSPATH', 'LD_LIBRARY_PATH', 'DYLD_LIBRARY_PATH', 'FIX_VERTEX_AREA', 'FSF_OUTPUT_FORMAT', 'FSLCONFDIR', 'FSLOUTPUTTYPE', 'LOGNAME', 'USER', 'MKL_NUM_THREADS', 'OMP_NUM_THREADS' ]
def checkLinked(nodes, instance_dict): """ This function identifies groups of linked variables in the current template Arguments: nodes: List of all variables in the template instance_dict: Lookup table with substitutes from bindings Returns: dict with following keys: "nodes" : template variables sorted so that each link group forms a contiguous sequence ordered by "direction" of tmpl:linked, eg for "var b tmpl:linked to var a var c tmpl:linked to var b var e tmpl:linked to var d" we get the order var a, var b, var c, var d, var e "numInstances": the number of instances assigned to each variable in "nodes" if vargen vars are linked to regular instantiated vars we create the same number of auto generated instances, they thus get the same number assigned "linkedGroups": list of lists each containing the variables belonging to the same link group, ex. above: [[a,b,c], [d,e]] """ """#we need that for lookup""" tmpl_linked_qn=prov.QualifiedName(prov.Namespace("tmpl", "http://openprovenance.org/tmpl#"), "linked") """ #make tmpl:linked sweep and determine order # we essentially create a graph containing all "tmpl:linked" ties and the involved nodes # ASSUMPTION: Each entity can only be link to one "ancestor" entity, # one ancestor entity can be linked to by multiple "successor" entities # NO CYCLES! # -> This implies: There is only one root in each link group and # the network of linked rels is a directed acyclic graph """ linkedDict=dict() linkedGroups=list() for rec in nodes: eid = rec.identifier #print (repr(rec.attributes)) for attr in rec.attributes: if tmpl_linked_qn == attr[0]: linkedDict[eid]=attr[1] """# determine order, which of the variables is a "root", i.e only linked to by other vars""" dependents=[] roots=[] intermediates=[] for id in linkedDict: if id not in dependents: dependents.append(id) for id in linkedDict: if linkedDict[id] not in dependents: roots.append(linkedDict[id]) else: intermediates.append(linkedDict[id]) #print ("roots: " + repr(roots)) #print ("dependents: " + repr(dependents)) #print ("intermediates: " + repr(intermediates)) def dfs_levels(node, links, level): """ #helper function #recursive depth first search to determine order of linked vars """ lower=dict() #print (str(node) + " " + repr(lower)) for k in [k for k,v in links.items() if v == node]: #print str(k) + " child of " + str(node) ret=dfs_levels(k, links, level+1) #print repr(ret) if ret!=None: lower.update(ret) myval={node : level} #print ("Appending : " + repr(myval)) lower.update(myval) #print ("Returning : " + repr(lower)) return(lower) numInstances=dict() combRoot=dict() #traverse from root offset=0 for r in roots: retval=dfs_levels(r, linkedDict, offset) #print ("root: " + str(r)) #print (retval) #get max rank maxr=max(retval.values()) # we need to check how many entries we have maxEntries=0 for rec in nodes: #print (rec) if rec.identifier in retval: eid = rec.identifier neid = match(eid,instance_dict, False) #neid = match(eid._str,instance_dict, False) #assume single instance bound to this node length=0 if not isinstance(neid, list): length=1 #print (repr(neid)) #print (repr(eid)) #if neid==eid._str: if neid==eid: #no match: if unassigned var or vargen variable, assume length 0 length=0 #print("same") if length>maxEntries: maxEntries=length #print neid if isinstance(neid,list): #list is assigned to node, now all lengths must be equal length=len(neid) if length!=maxEntries: if maxEntries>0: #print (length) #print (maxEntries) raise IncorrectNumberOfBindingsForGroupVariable("Linked entities must have same number of bound instances!") maxEntries=length # print (length) for n in retval: numInstances[n]=maxEntries combRoot.update(retval) linkedGroups.append(retval) offset=maxr+1 for rec in nodes: if rec.identifier not in combRoot: combRoot[rec.identifier]=offset linkedGroups.append({rec.identifier : offset}) eid=rec.identifier neid = match(eid._str,instance_dict, False) if isinstance(neid, list): numInstances[eid]=len(neid) else: numInstances[eid]=1 #need to remember number of instances for each var # when multiple link groups rank accordingly #print (repr(combRoot)) #try reorder nodes based on tmpl:linked hierarchy #nodes_sorted=sorted(nodes, key=retval.get) fnc=lambda x: combRoot[x.identifier] nodes_sorted=sorted(nodes, key=fnc) #for rec in nodes_sorted: #print ("SORT : " + str(rec.identifier)) #print (repr(linkedGroups)) return { "nodes" : nodes_sorted, "numInstances" : numInstances, "linkedGroups" : linkedGroups}
def add_seg_data(nidmdoc, measure, header, json_map, png_file=None, output_file=None, root_act=None, nidm_graph=None): ''' WIP: this function creates a NIDM file of brain volume data and if user supplied a NIDM-E file it will add brain volumes to the NIDM-E file for the matching subject ID :param nidmdoc: :param measure: :param header: :param json_map: :param png_file: :param root_act: :param nidm_graph: :return: ''' niiri = prov.Namespace("niiri", "http://iri.nidash.org/") #this function can be used for both creating a brainvolumes NIDM file from scratch or adding brain volumes to #existing NIDM file. The following logic basically determines which route to take... #if an existing NIDM graph is passed as a parameter then add to existing file if nidm_graph is None: first_row = True #for each of the header items create a dictionary where namespaces are freesurfer #software_activity = nidmdoc.graph.activity(QualifiedName(provNamespace("niiri",Constants.NIIRI),getUUID()),other_attributes={Constants.NIDM_PROJECT_DESCRIPTION:"Freesurfer segmentation statistics"}) software_activity = nidmdoc.graph.activity( niiri[getUUID()], other_attributes={ Constants.NIDM_PROJECT_DESCRIPTION: "Freesurfer segmentation statistics" }) for key, value in header.items(): software_activity.add_attributes({ QualifiedName(provNamespace("fs", Constants.FREESURFER), key): value }) #create software agent and associate with software activity #software_agent = nidmdoc.graph.agent(QualifiedName(provNamespace("niiri",Constants.NIIRI),getUUID()),other_attributes={ software_agent = nidmdoc.graph.agent( niiri[getUUID()], other_attributes={ QualifiedName( provNamespace( "Neuroimaging_Analysis_Software", Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE), ""): Constants.FREESURFER, prov.PROV_TYPE: prov.PROV["SoftwareAgent"] }) #create qualified association with brain volume computation activity nidmdoc.graph.association( activity=software_activity, agent=software_agent, other_attributes={ PROV_ROLE: Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE }) nidmdoc.graph.wasAssociatedWith(activity=software_activity, agent=software_agent) #print(nidmdoc.serializeTurtle()) with open('measure.json', 'w') as fp: json.dump(measure, fp) with open('json_map.json', 'w') as fp: json.dump(json_map, fp) #datum_entity=nidmdoc.graph.entity(QualifiedName(provNamespace("niiri",Constants.NIIRI),getUUID()),other_attributes={ datum_entity = nidmdoc.graph.entity( niiri[getUUID()], other_attributes={ prov.PROV_TYPE: QualifiedName( provNamespace("nidm", "http://purl.org/nidash/nidm#"), "FSStatsCollection") }) nidmdoc.graph.wasGeneratedBy(software_activity, datum_entity) #iterate over measure dictionary where measures are the lines in the FS stats files which start with '# Measure' and #the whole table at the bottom of the FS stats file that starts with '# ColHeaders for measures in measure: #check if we have a CDE mapping for the anatomical structure referenced in the FS stats file if measures["structure"] in json_map['Anatomy']: #for the various fields in the FS stats file row starting with '# Measure'... for items in measures["items"]: # if the if items['name'] in json_map['Measures'].keys(): if not json_map['Anatomy'][ measures["structure"]]['label']: continue #region_entity=nidmdoc.graph.entity(QualifiedName(provNamespace("niiri",Constants.NIIRI),getUUID()),other_attributes={prov.PROV_TYPE: region_entity = nidmdoc.graph.entity( niiri[getUUID()], other_attributes={ prov.PROV_TYPE: QualifiedName( provNamespace( "measurement_datum", "http://uri.interlex.org/base/ilx_0738269#" ), "") }) #construct the custom CDEs to describe measurements of the various brain regions region_entity.add_attributes({ QualifiedName( provNamespace( "isAbout", "http://uri.interlex.org/ilx_0381385#"), ""): json_map['Anatomy'][ measures["structure"]]['isAbout'], QualifiedName( provNamespace( "hasLaterality", "http://uri.interlex.org/ilx_0381387#"), ""): json_map['Anatomy'][ measures["structure"]]['hasLaterality'], Constants.NIDM_PROJECT_DESCRIPTION: json_map['Anatomy'][measures["structure"]] ['definition'], QualifiedName( provNamespace( "isMeasureOf", "http://uri.interlex.org/ilx_0381389#"), ""): QualifiedName( provNamespace( "GrayMatter", "http://uri.interlex.org/ilx_0104768#"), ""), QualifiedName( provNamespace( "rdfs", "http://www.w3.org/2000/01/rdf-schema#"), "label"): json_map['Anatomy'][measures["structure"]]['label'] }) #QualifiedName(provNamespace("hasUnit","http://uri.interlex.org/ilx_0381384#"),""):json_map['Anatomy'][measures["structure"]]['units'], #print("%s:%s" %(key,value)) region_entity.add_attributes({ QualifiedName( provNamespace( "hasMeasurementType", "http://uri.interlex.org/ilx_0381388#"), ""): json_map['Measures'][items['name']]["measureOf"], QualifiedName( provNamespace( "hasDatumType", "http://uri.interlex.org/ilx_0738262#"), ""): json_map['Measures'][items['name']]["datumType"] }) datum_entity.add_attributes( {region_entity.identifier: items['value']})
import uuid import sys import collections import logging handler = logging.StreamHandler(stream=sys.stderr) log=logging.getLogger(__name__) log.setLevel(logging.INFO) log.addHandler(handler) #This is the prefix searched for in the passed prov template namespaces in order to identify #the custom namespace dedicated to vargen identifiers. #revert to standard uuid: urn:uuid: as default ns GLOBAL_UUID_DEF_NS_PREFIX="uuid" GLOBAL_UUID_DEF_NS=prov.Namespace(GLOBAL_UUID_DEF_NS_PREFIX, "urn:uuid:") class UnknownRelationException(Exception): pass class BindingFileException(Exception): pass class UnboundMandatoryVariableException(Exception): pass class IncorrectNumberOfBindingsForGroupVariable(Exception): pass class IncorrectNumberOfBindingsForStatementVariable(Exception): pass
outNS = "http://example.com#" #bind_dicts=[] bind_dict = dict() bindfile_dict = dict() for index, row in data.iterrows(): rtemplate = template for col in data.columns.values: if col in bindmap: outval = row[col] if bindmap[col]["val"] == "iri": outval = prov.QualifiedName( prov.Namespace(outNSpref, outNS), urllib.quote(str(outval.encode('utf8', 'replace')))) #print col + " " + bindmap[col]["varname"] + " " + outval ID = "var:" + bindmap[col]["varname"] #ID = prov.QualifiedName(prov.Namespace("var", "http://openprovenance.org/var#"), urllib.quote(bindmap[col]["varname"])) #prepare data for bindings dict if ID not in bind_dict: bind_dict[ID] = outval else: if not isinstance(bind_dict[ID], list): tmp = list() tmp.append(bind_dict[ID]) bind_dict[ID] = tmp
# standard library from pickle import dumps from datetime import datetime as dt import hashlib import os import pwd from socket import getfqdn import uuid import prov.model as prov import rdflib import sys # create namespace references to terms used foaf = prov.Namespace("foaf", "http://xmlns.com/foaf/0.1/") dcterms = prov.Namespace("dcterms", "http://purl.org/dc/terms/") fs = prov.Namespace("fs", "http://www.incf.org/ns/nidash/fs#") nidm = prov.Namespace("nidm", "http://www.incf.org/ns/nidash/nidm#") niiri = prov.Namespace("niiri", "http://iri.nidash.org/") obo = prov.Namespace("obo", "http://purl.obolibrary.org/obo/") nif = prov.Namespace("nif", "http://neurolex.org/wiki/") crypto = prov.Namespace("crypto", "http://www.w3.org/2000/10/swap/crypto#") crypto = prov.Namespace("crypto", ("http://id.loc.gov/vocabulary/preservation/" "cryptographicHashFunctions/")) def add_seg_data(nidmdoc, measure, header, tableinfo, json_map, png_file=None, output_file=None, root_act=None, nidm_graph=None): '''
code_block = [codes[pos:pos + 2] for pos in xrange(0, len(codes), 2)] ns_map = {code[0].text[:-1]: code[1].text for code in code_block} # Access namespace objects as attributes class AttrDict(dict): def __init__(self, *args, **kwargs): super(AttrDict, self).__init__(*args, **kwargs) self.__dict__ = self # create a prov bundle to store the graph bundle = prov.ProvBundle() # add namespaces to the bundle for k, v in ns_map.iteritems(): ns = prov.Namespace(k, v) bundle.add_namespace(ns) ns = AttrDict(bundle._namespaces) # Core Metadata # property: [summary, version, distribution] core_metadata = {ns.rdf['type']: ["MUST", "MUST", "MUST"], ns.dct['title']: ["MUST", "MUST", "MUST"], ns.dct['alternative']: ["MAY", "MAY", "MAY"], ns.dct['description']: ["MUST", "MUST", "MUST"], ns.dct['created']: ["NEVER", "SHOULD", "SHOULD"], ns.pav['createdOn']: ["NEVER", "MAY", "MAY"], ns.pav['authoredOn']: ["NEVER", "MAY", "MAY"], ns.pav['curatedOn']: ["NEVER", "MAY", "MAY"], ns.dct['creator']: ["NEVER", "MUST", "MUST"],
__author__ = 'satra' import hashlib import os from uuid import uuid1 # PROV API library import prov.model as prov import rdflib # create namespace references to terms used foaf = prov.Namespace("foaf", "http://xmlns.com/foaf/0.1/") dcterms = prov.Namespace("dcterms", "http://purl.org/dc/terms/") fs = prov.Namespace("fs", "http://freesurfer.net/fswiki/terms/") nidm = prov.Namespace("nidm", "http://nidm.nidash.org/terms/") niiri = prov.Namespace("niiri", "http://iri.nidash.org/") obo = prov.Namespace("obo", "http://purl.obolibrary.org/obo/") nif = prov.Namespace("nif", "http://neurolex.org/wiki/") crypto = prov.Namespace("crypto", "http://www.w3.org/2000/10/swap/crypto#") get_id = lambda: niiri[uuid1().hex] def hash_infile(afile, crypto=hashlib.md5, chunk_len=8192): """ Computes hash of a file using 'crypto' module""" hex = None if os.path.isfile(afile): crypto_obj = crypto() fp = file(afile, 'rb') while True: data = fp.read(chunk_len)