Ejemplo n.º 1
0
Archivo: Core.py Proyecto: mih/PyNIDM
    def addAttributes(self,id,attributes):
        """
        Adds generic attributes in bulk to object [id] and inserts into the graph

        :param id: subject identifier/URI
        :param attributes: Dictionary with keys as prefix:term and value of attribute} \
        example: {"ncit:age":15,"ncit:gender":"M", Constants.NIDM_FAMILY_NAME:"Keator"}
        :return: TypeError if namespace prefix does not exist in graph
        """
        #iterate through attributes
        for key in attributes.keys():
            #is the key already mapped to a URL (i.e. using one of the constants from Constants.py) or is it in prefix:term form?
            #if not validators.url(key):
                #check if namespace prefix already exists in graph or #if we're using a Constants reference
            if (not self.checkNamespacePrefix(key.split(':')[0])):
                raise TypeError("Namespace prefix " + key + " not in graph, use addAttributesWithNamespaces or manually add!")
            #figure out datatype of literal
            datatype = self.getDataType(attributes[key])
            #if (not validators.url(key)):
                #we must be using the prefix:term form instead of a constant directly

            #    if (datatype != None):
            #        id.add_attributes({self.namespaces[key.split(':')[0]][key.split(':')[1]]:Literal(attributes[key],datatype=datatype)})
            #    else:
            #        id.add_attributes({self.namespaces[key.split(':')[0]][key.split(':')[1]]:Literal(attributes[key])})
            #else:
                #we're using the Constants form
            if (datatype != None):
                id.add_attributes({key:pm.Literal(attributes[key],datatype=datatype)})
            else:
                id.add_attributes({key:pm.Literal(attributes[key])})
Ejemplo n.º 2
0
Archivo: Core.py Proyecto: mih/PyNIDM
    def addLiteralAttribute(self, namespace_prefix, term, object, namespace_uri=None):
        """
        Adds generic literal and inserts into the graph
        :param namespace_prefix: namespace prefix
        :param pred_term: predidate term to associate with tuple
        :param object: literal to add as object of tuple
        :param namespace_uri: If namespace_prefix isn't one already used then use this optional argument to define
        :return: none
        """
        #figure out datatype of literal
        datatype = self.getDataType(object)
        #check if namespace prefix already exists in graph
        if not self.checkNamespacePrefix(namespace_prefix):
            #if so, use URI
            #namespace_uri = self.namespaces[namespace_prefix]
        #else: #add namespace_uri + prefix to graph
            if (namespace_uri == None):
                raise TypeError("Namespace_uri argument must be defined for new namespaces")
            else:
                self.addNamespace(namespace_prefix,namespace_uri)

        #figure out if predicate namespace is defined, if not, return predicate namespace error
        try:
            if (datatype != None):
                self.add_attributes({str(namespace_prefix + ':' + term): pm.Literal(object, datatype=datatype)})
            else:
                self.add_attributes({str(namespace_prefix + ':' + term): pm.Literal(object)})
        except KeyError as e:
            print("\nPredicate namespace identifier \" %s \" not found! \n" % (str(e).split("'")[1]))
            print("Use addNamespace method to add namespace before adding literal attribute \n")
            print("No attribute has been added \n")
Ejemplo n.º 3
0
Archivo: Core.py Proyecto: mih/PyNIDM
    def addAttributesWithNamespaces(self,id,attributes):
        """
        Adds generic attributes in bulk to object [id] and inserts into the graph

        :param id: subject identifier/URI
        :param attributes: List of dictionaries with keys prefix, uri, term, value} \
        example: [ {uri:"http://ncitt.ncit.nih.gov/", prefix:"ncit", term:"age", value:15},
                   {uri:"http://ncitt.ncit.nih.gov/", prefix:"ncit", term:"gender", value:"M"}]
        :return: TypeError if namespace prefix already exists in graph but URI is different
        """
        #iterate through list of attributes
        for tuple in attributes:
            #check if namespace prefix already exists in graph
            if self.checkNamespacePrefix(tuple['prefix']):
                #checking if existing prefix maps to same namespaceURI, if so use it, if not then raise error
                if (self.namespaces[tuple['prefix']] != tuple['uri']):
                    raise TypeError("Namespace prefix: " + tuple['prefix'] + "already exists in document")

            else: #add tuple to graph
                self.addNamespace(tuple['prefix'], tuple['uri'])

            #figure out datatype of literal
            datatype = self.getDataType(tuple['value'])
            if (datatype != None):
                id.add_attributes({self.namespaces[tuple['prefix']][tuple['term']]:pm.Literal(tuple['value'],datatype=datatype)})
            else:
                id.add_attributes({self.namespaces[tuple['prefix']][tuple['term']]:pm.Literal(tuple['value'])})
Ejemplo n.º 4
0
 def decode_rdf_representation(self, literal, graph):
     if isinstance(literal, RDFLiteral):
         value = literal.value if literal.value is not None else literal
         datatype = literal.datatype if hasattr(literal, 'datatype') else None
         langtag = literal.language if hasattr(literal, 'language') else None
         if datatype and 'XMLLiteral' in datatype:
             value = literal
         if datatype and 'base64Binary' in datatype:
             value = base64.standard_b64encode(value)
         if datatype == XSD['QName']:
             return pm.Literal(literal, datatype=XSD_QNAME)
         if datatype == XSD['dateTime']:
             return dateutil.parser.parse(literal)
         if datatype == XSD['gYear']:
             return pm.Literal(dateutil.parser.parse(literal).year,
                               datatype=self.valid_identifier(datatype))
         if datatype == XSD['gYearMonth']:
             parsed_info = dateutil.parser.parse(literal)
             return pm.Literal('{0}-{1:02d}'.format(parsed_info.year, parsed_info.month),
                               datatype=self.valid_identifier(datatype))
         else:
             # The literal of standard Python types is not converted here
             # It will be automatically converted when added to a record by
             # _auto_literal_conversion()
             return pm.Literal(value, self.valid_identifier(datatype), langtag)
     elif isinstance(literal, URIRef):
         rval = self.valid_identifier(literal)
         if rval is None:
             prefix, iri, _ = graph.namespace_manager.compute_qname(literal)
             ns = self.document.add_namespace(prefix, iri)
             rval = pm.QualifiedName(ns, literal.replace(ns.uri, ''))
         return rval
     else:
         # simple type, just return it
         return literal
Ejemplo n.º 5
0
def prov_encode(graph, value, create_container=True):
    if isinstance(value, (list, tuple)) and create_container:
        value = list(value)
        if len(value) == 0:
            encoded_literal = safe_encode(value)
            attr = {pm.PROV['value']: encoded_literal}
            eid = get_attr_id(attr)
            return graph.entity(eid, attr)

        if len(value) == 1:
            return prov_encode(graph, value[0])

        entities = []
        for item in value:
            item_entity = prov_encode(graph, item)
            entities.append(item_entity)
            if isinstance(item, (list, tuple)):
                continue

            item_entity_val = list(item_entity.value)[0]
            is_str = isinstance(item_entity_val, str)
            if not is_str or (is_str and 'file://' not in item_entity_val):
                return prov_encode(graph, value, create_container=False)

        eid = get_id()
        entity = graph.collection(identifier=eid)
        for item_entity in entities:
            graph.hadMember(eid, item_entity)

        return entity
    else:
        encoded_literal = safe_encode(value)
        attr = {pm.PROV['value']: encoded_literal}
        if isinstance(value, str) and os.path.exists(value):
            attr.update({pm.PROV['location']: encoded_literal})
            if not os.path.isdir(value):
                sha512 = hash_infile(value, crypto=hashlib.sha512)
                attr.update({
                    crypto['sha512']:
                    pm.Literal(sha512, pm.XSD['string'])
                })
                eid = get_attr_id(
                    attr, skip=[pm.PROV['location'], pm.PROV['value']])
            else:
                eid = get_attr_id(attr, skip=[pm.PROV['location']])
        else:
            eid = get_attr_id(attr)
        entity = graph.entity(eid, attr)
    return entity
Ejemplo n.º 6
0
    def test_serialization_example_7(self):
        """
        Test the serialization of example 7 which is a basic activity.
        """
        document = prov.ProvDocument()
        document.add_namespace(*EX_NS)

        document.activity(
            "ex:a1", "2011-11-16T16:05:00", "2011-11-16T16:06:00",
            [(prov.PROV_TYPE, prov.Literal("ex:edit", prov.XSD_QNAME)),
             ("ex:host", "server.example.org")])

        with io.BytesIO() as actual:
            document.serialize(format='xml', destination=actual)
            compare_xml(os.path.join(DATA_PATH, "example_07.xml"), actual)
Ejemplo n.º 7
0
def prov_encode(graph, value, create_container=True):
    if isinstance(value, list) and create_container:
        if len(value) == 0:
            encoded_literal = safe_encode(value)
            attr = {pm.PROV['value']: encoded_literal}
            id = get_attr_id(attr)
            entity = graph.entity(id, attr)
        elif len(value) > 1:
            try:
                entities = []
                for item in value:
                    item_entity = prov_encode(graph, item)
                    entities.append(item_entity)
                    if isinstance(item, list):
                        continue
                    if not isinstance(
                            list(item_entity.value)[0], string_types):
                        raise ValueError('Not a string literal')
                    if 'file://' not in list(item_entity.value)[0]:
                        raise ValueError('No file found')
                id = get_id()
                entity = graph.collection(identifier=id)
                for item_entity in entities:
                    graph.hadMember(id, item_entity)
            except ValueError as e:
                iflogger.debug(e)
                entity = prov_encode(graph, value, create_container=False)
        else:
            entity = prov_encode(graph, value[0])
    else:
        encoded_literal = safe_encode(value)
        attr = {pm.PROV['value']: encoded_literal}
        if isinstance(value, string_types) and os.path.exists(value):
            attr.update({pm.PROV['location']: encoded_literal})
            if not os.path.isdir(value):
                sha512 = hash_infile(value, crypto=hashlib.sha512)
                attr.update(
                    {crypto['sha512']: pm.Literal(sha512, pm.XSD['string'])})
                id = get_attr_id(attr,
                                 skip=[pm.PROV['location'], pm.PROV['value']])
            else:
                id = get_attr_id(attr, skip=[pm.PROV['location']])
        else:
            id = get_attr_id(attr)
        entity = graph.entity(id, attr)
    return entity
Ejemplo n.º 8
0
def setEntry(rec, regNS):
	"""
	interpret value provided via v3 bindings, 
	check if qualified name or value, 
	handle datatypes accordingly
	
	Args:
		rec : a key value pair read from v3 bindings file
		regNS: the namespaces read from the context section of the v3 bindings file
	Returns:
		"prov-ified" value, value as-is as fallback	

	#keys:	@id	(for quali)

	#	@type	(for value)
	#	@value	(for value)

	"""

	out=rec
	try:
		if "@id" in rec:
			toks=rec["@id"].split(":")
			#print (repr(toks))
			if len(toks) > 2:
				raise BindingFileException( "Invalid Qualified Name " + rec["@id"] + " found in V3 Json Binding " +  repr(rec))
				#print( "Invalid Qualified Name " + rec["@id"] + " found in V3 Json Binding" )
			#for ns in regNS.get_registered_namespaces():
			for ns in regNS:
				#print (ns)
				if ns.prefix==toks[0]:
					#print ("HIT")
					out=prov.QualifiedName(ns, toks[1])	
		if "@value" in rec:
			if "@type" in rec:
				dt=rec["@type"]
				if isinstance(rec["@type"], basestring):
					dt=xsd_datype_to_prov_datatype(dt, regNS)
				out=prov.Literal(rec["@value"], datatype=dt)	
			else:
				out=rec["@value"]
	except:
		raise BindingFileException("Error parsing " + repr(rec))
		#pass
	return out
Ejemplo n.º 9
0
def safe_encode(x, as_literal=True):
    """Encodes a python value for prov
"""
    if x is None:
        value = "Unknown"
        if as_literal:
            return prov.Literal(value, prov.XSD['string'])
        else:
            return value
    try:
        if isinstance(x, (str, unicode)):
            if os.path.exists(x):
                value = 'file://%s%s' % (getfqdn(), x)
                if not as_literal:
                    return value
                try:
                    return prov.URIRef(value)
                except AttributeError:
                    return prov.Literal(value, prov.XSD['anyURI'])
            else:
                if len(x) > max_text_len:
                    value = x[:max_text_len - 13] + ['...Clipped...']
                else:
                    value = x
                if not as_literal:
                    return value
                return prov.Literal(value, prov.XSD['string'])
        if isinstance(x, (int,)):
            if not as_literal:
                return x
            return prov.Literal(int(x), prov.XSD['integer'])
        if isinstance(x, (float,)):
            if not as_literal:
                return x
            return prov.Literal(x, prov.XSD['float'])
        if not as_literal:
            return dumps(x)
        return prov.Literal(dumps(x), nidm['pickle'])
    except TypeError as e:
        value = "Could not encode: " + str(e)
        if not as_literal:
            return value
        return prov.Literal(value, prov.XSD['string'])
Ejemplo n.º 10
0
def cff2provn(filename):
    """Parse cml xml file and return a prov bundle object"""
    #filename = "/Users/fariba/Desktop/UCI/freesurfer/scripts/meta-MC-SCA-023_tp1.cml"
    tree = xml.dom.minidom.parse(filename)
    collections = tree.documentElement

    g = prov.ProvBundle()
    g.add_namespace(xsd)
    g.add_namespace(dcterms)
    g.add_namespace(cml)

    url_entity = g.entity(cml[get_id()])
    url_entity.add_extra_attributes({
        prov.PROV['type']:
        nidm['nidm:ConnectomeFileFormat'],
        prov.PROV['location']:
        prov.Literal(filename, prov.XSD['String'])
    })

    cml_collection = g.collection(cml[get_id()])
    cml_collection.add_extra_attributes({
        prov.PROV['type']: cml['connectome'],
        prov.PROV['label']: filename
    })
    g.wasDerivedFrom(cml_collection, url_entity)

    # get species, subject_name, and subject_timepoint
    species = tree.getElementsByTagName('cml:species')[0].toxml()
    species = species.replace('<cml:species>',
                              '').replace('</cml:species>', '')

    tp = ''
    sub = ''
    tags = collections.getElementsByTagName("cml:tag")
    for t in tags:
        if t.attributes['key'].value == 'subject_name':
            sub = t.toxml()
        if t.attributes['key'].value == 'subject_timepoint':
            tp = t.toxml()
    sub = sub.replace('<cml:tag key="subject_name">',
                      '').replace('</cml:tag>', '')
    tp = tp.replace('<cml:tag key="subject_timepoint">',
                    '').replace('</cml:tag>', '')
    #print species + " " + sub + " " + tp

    cml_meta = g.entity(cml[get_id()])
    cml_meta.add_extra_attributes({
        prov.PROV['type']: cml['connectome-meta'],
        cml['species']: species,
        cml['timepoint']: tp,
        cml['subject_name']: sub
    })
    g.hadMember(cml_collection, cml_meta)

    volumes = collections.getElementsByTagName("cml:connectome-volume")
    c = 0
    for v in volumes:
        c = c + 1
        #print v.getAttribute("src") + " " + v.getAttribute("dtype") + " " + v.getAttribute("name") + " " + v.getAttribute("fileformat")
        #print v.attributes['fileformat'].value
        dtype = v.getAttribute('dtype')
        src = v.getAttribute('src')
        name = v.getAttribute('name')
        fileformat = v.getAttribute('fileformat')
        cml_volume = g.entity(cml[get_id()])
        cml_volume.add_extra_attributes({
            prov.PROV['type']:
            cml['connectome-volume'],
            cml['dtype']:
            dtype,
            cml['src']:
            src,
            cml['name']:
            name,
            cml['fileformat']:
            fileformat
        })
        g.hadMember(cml_collection, cml_volume)

    tracks = collections.getElementsByTagName("cml:connectome-track")
    c = 0
    for t in tracks:
        c = c + 1
        #print t.getAttribute("src") + " " + t.getAttribute("dtype") + " " + t.getAttribute("name") + " " + t.getAttribute("fileformat")
        dtype = t.getAttribute('dtype')
        src = t.getAttribute('src')
        name = t.getAttribute('name')
        fileformat = t.getAttribute('fileformat')
        cml_track = g.entity(cml[get_id()])
        cml_track.add_extra_attributes({
            prov.PROV['type']:
            cml['connectome-track'],
            cml['dtype']:
            dtype,
            cml['src']:
            src,
            cml['name']:
            name,
            cml['fileformat']:
            fileformat
        })
        g.hadMember(cml_collection, cml_track)

    networks = collections.getElementsByTagName("cml:connectome-network")
    c = 0
    for n in networks:
        c = c + 1
        #print n.getAttribute("src") + " " + n.getAttribute("dtype") + " " + n.getAttribute("name") + " " + n.getAttribute("fileformat")
        dtype = n.getAttribute('dtype')
        src = n.getAttribute('src')
        name = n.getAttribute('name')
        fileformat = n.getAttribute('fileformat')
        cml_network = g.entity(cml[get_id()])
        cml_network.add_extra_attributes({
            prov.PROV['type']:
            cml['connectome-network'],
            cml['dtype']:
            dtype,
            cml['src']:
            src,
            cml['name']:
            name,
            cml['fileformat']:
            fileformat
        })
        g.hadMember(cml_collection, cml_network)

    surfaces = collections.getElementsByTagName("cml:connectome-surface")
    c = 0
    for s in surfaces:
        c = c + 1
        #print s.getAttribute("src") + " " + s.getAttribute("dtype") + " " + s.getAttribute("name") + " " + s.getAttribute("fileformat")
        dtype = s.getAttribute('dtype')
        src = s.getAttribute('src')
        name = s.getAttribute('name')
        fileformat = s.getAttribute('fileformat')
        cml_surface = g.entity(cml[get_id()])
        cml_surface.add_extra_attributes({
            prov.PROV['type']:
            cml['connectome-surface'],
            cml['dtype']:
            dtype,
            cml['src']:
            src,
            cml['name']:
            name,
            cml['fileformat']:
            fileformat
        })
        g.hadMember(cml_collection, cml_surface)

    data = collections.getElementsByTagName("cml:connectome-data")
    c = 0
    for d in data:
        c = c + 1
        #print d.getAttribute("src") + " " + d.getAttribute("dtype") + " " + d.getAttribute("name") + " " + d.getAttribute("fileformat")
        dtype = d.getAttribute('dtype')
        src = d.getAttribute('src')
        name = d.getAttribute('name')
        cml_data = g.entity(cml[get_id()])
        cml_data.add_extra_attributes({
            prov.PROV['type']: cml['connectome-data'],
            cml['dtype']: dtype,
            cml['src']: src,
            cml['name']: name,
            cml['fileformat']: fileformat
        })
        g.hadMember(cml_collection, cml_data)

    return g
Ejemplo n.º 11
0
    def add_results(self, results, keep_provenance=False):
        if keep_provenance and results.provenance:
            self.g = deepcopy(results.provenance)
            return self.g
        runtime = results.runtime
        interface = results.interface
        inputs = results.inputs
        outputs = results.outputs
        classname = interface.__name__
        modulepath = "{0}.{1}".format(interface.__module__, interface.__name__)
        activitytype = ''.join([i.capitalize() for i in modulepath.split('.')])

        a0_attrs = {
            nipype_ns['module']: interface.__module__,
            nipype_ns["interface"]: classname,
            pm.PROV["type"]: nipype_ns[activitytype],
            pm.PROV["label"]: classname,
            nipype_ns['duration']: safe_encode(runtime.duration),
            nipype_ns['workingDirectory']: safe_encode(runtime.cwd),
            nipype_ns['returnCode']: safe_encode(runtime.returncode),
            nipype_ns['platform']: safe_encode(runtime.platform),
            nipype_ns['version']: safe_encode(runtime.version),
        }
        a0_attrs[foaf["host"]] = pm.Literal(runtime.hostname, pm.XSD['anyURI'])

        try:
            a0_attrs.update(
                {nipype_ns['command']: safe_encode(runtime.cmdline)})
            a0_attrs.update(
                {nipype_ns['commandPath']: safe_encode(runtime.command_path)})
            a0_attrs.update(
                {nipype_ns['dependencies']: safe_encode(runtime.dependencies)})
        except AttributeError:
            pass
        a0 = self.g.activity(get_id(), runtime.startTime, runtime.endTime,
                             a0_attrs)
        # environment
        id = get_id()
        env_collection = self.g.collection(id)
        env_collection.add_attributes({
            pm.PROV['type']: nipype_ns['Environment'],
            pm.PROV['label']: "Environment"
        })
        self.g.used(a0, id)
        # write environment entities
        for idx, (key, val) in enumerate(sorted(runtime.environ.items())):
            if key not in PROV_ENVVARS:
                continue
            in_attr = {
                pm.PROV["label"]: key,
                nipype_ns["environmentVariable"]: key,
                pm.PROV["value"]: safe_encode(val)
            }
            id = get_attr_id(in_attr)
            self.g.entity(id, in_attr)
            self.g.hadMember(env_collection, id)
        # write input entities
        if inputs:
            id = get_id()
            input_collection = self.g.collection(id)
            input_collection.add_attributes({
                pm.PROV['type']:
                nipype_ns['Inputs'],
                pm.PROV['label']:
                "Inputs"
            })
            # write input entities
            for idx, (key, val) in enumerate(sorted(inputs.items())):
                in_entity = prov_encode(self.g, val).identifier
                self.g.hadMember(input_collection, in_entity)
                used_attr = {pm.PROV["label"]: key, nipype_ns["inPort"]: key}
                self.g.used(activity=a0,
                            entity=in_entity,
                            other_attributes=used_attr)
        # write output entities
        if outputs:
            id = get_id()
            output_collection = self.g.collection(id)
            if not isinstance(outputs, dict):
                outputs = outputs.get_traitsfree()
            output_collection.add_attributes({
                pm.PROV['type']:
                nipype_ns['Outputs'],
                pm.PROV['label']:
                "Outputs"
            })
            self.g.wasGeneratedBy(output_collection, a0)
            # write output entities
            for idx, (key, val) in enumerate(sorted(outputs.items())):
                out_entity = prov_encode(self.g, val).identifier
                self.g.hadMember(output_collection, out_entity)
                gen_attr = {pm.PROV["label"]: key, nipype_ns["outPort"]: key}
                self.g.generation(out_entity,
                                  activity=a0,
                                  other_attributes=gen_attr)
        # write runtime entities
        id = get_id()
        runtime_collection = self.g.collection(id)
        runtime_collection.add_attributes({
            pm.PROV['type']: nipype_ns['Runtime'],
            pm.PROV['label']: "RuntimeInfo"
        })
        self.g.wasGeneratedBy(runtime_collection, a0)
        for key, value in sorted(runtime.items()):
            if not value:
                continue
            if key not in ['stdout', 'stderr', 'merged']:
                continue
            attr = {pm.PROV["label"]: key, nipype_ns[key]: safe_encode(value)}
            id = get_id()
            self.g.entity(get_id(), attr)
            self.g.hadMember(runtime_collection, id)

        # create agents
        user_attr = {
            pm.PROV["type"]: pm.PROV["Person"],
            pm.PROV["label"]: getpass.getuser(),
            foaf["name"]: safe_encode(getpass.getuser())
        }
        user_agent = self.g.agent(get_attr_id(user_attr), user_attr)
        agent_attr = {
            pm.PROV["type"]: pm.PROV["SoftwareAgent"],
            pm.PROV["label"]: "Nipype",
            foaf["name"]: safe_encode("Nipype"),
            nipype_ns["version"]: __version__
        }
        for key, value in list(get_info().items()):
            agent_attr.update({nipype_ns[key]: safe_encode(value)})
        software_agent = self.g.agent(get_attr_id(agent_attr), agent_attr)
        self.g.wasAssociatedWith(
            a0, user_agent, None, None,
            {pm.PROV["hadRole"]: nipype_ns["LoggedInUser"]})
        self.g.wasAssociatedWith(a0, software_agent)
        return self.g
Ejemplo n.º 12
0
def safe_encode(x, as_literal=True):
    """
    Encodes a python value for prov
    """
    if x is None:
        value = "Unknown"
        if as_literal:
            return pm.Literal(value, pm.XSD['string'])
        else:
            return value

    if isinstance(x, (str, bytes)):
        if isinstance(x, bytes):
            x = str(x, 'utf-8')
        if os.path.exists(x):
            if x[0] != os.pathsep:
                x = os.path.abspath(x)
            value = 'file://{}{}'.format(platform.node().lower(), x)
            if not as_literal:
                return value
            try:
                return pm.URIRef(value)
            except AttributeError:
                return pm.Literal(value, pm.XSD['anyURI'])
        else:
            value = x
            if len(x) > max_text_len:
                cliptxt = '...Clipped...'
                value = x[:max_text_len - len(cliptxt)] + cliptxt

            if not as_literal:
                return value

            return pm.Literal(value, pm.XSD['string'])
    if isinstance(x, int):
        if not as_literal:
            return x
        return pm.Literal(int(x), pm.XSD['integer'])
    if isinstance(x, float):
        if not as_literal:
            return x
        return pm.Literal(x, pm.XSD['float'])
    if isinstance(x, dict):
        outdict = {}
        for key, value in list(x.items()):
            encoded_value = safe_encode(value, as_literal=False)
            if isinstance(encoded_value, pm.Literal):
                outdict[key] = encoded_value.json_representation()
            else:
                outdict[key] = encoded_value

        try:
            jsonstr = json.dumps(outdict)
        except UnicodeDecodeError as excp:
            jsonstr = "Could not encode dictionary. {}".format(excp)
            iflogger.warn('Prov: %s', jsonstr)

        if not as_literal:
            return jsonstr
        return pm.Literal(jsonstr, pm.XSD['string'])
    if isinstance(x, (list, tuple)):
        x = list(x)
        is_object = False
        try:
            nptype = np.array(x).dtype
            is_object = nptype == np.dtype(object)
        except ValueError:
            is_object = True

        # If the array contains an heterogeneous mixture of data types
        # they should be encoded sequentially
        if is_object:
            outlist = []
            for value in x:
                encoded_value = safe_encode(value, as_literal=False)
                if isinstance(encoded_value, pm.Literal):
                    outlist.append(encoded_value.json_representation())
                else:
                    outlist.append(encoded_value)
            x = outlist

        try:
            jsonstr = json.dumps(x)
        except UnicodeDecodeError as excp:
            jsonstr = "Could not encode list/tuple. {}".format(excp)
            iflogger.warn('Prov: %s', jsonstr)

        if not as_literal:
            return jsonstr
        return pm.Literal(jsonstr, pm.XSD['string'])

    # If is a literal, and as_literal do nothing.
    # else bring back to json.
    if isinstance(x, pm.Literal):
        if as_literal:
            return x
        return dumps(x.json_representation())

    jsonstr = None
    ltype = pm.XSD['string']
    try:
        jsonstr = json.dumps(x.__dict__)
    except AttributeError:
        pass

    if jsonstr is None:
        try:
            jsonstr = dumps(x)
            ltype = nipype_ns['pickle']
        except TypeError as excp:
            jsonstr = 'Could not encode object. {}'.format(excp)

    if not as_literal:
        return jsonstr
    return pm.Literal(jsonstr, ltype)
Ejemplo n.º 13
0
def safe_encode(x, as_literal=True):
    """Encodes a python value for prov
    """
    if x is None:
        value = "Unknown"
        if as_literal:
            return pm.Literal(value, pm.XSD['string'])
        else:
            return value
    try:
        if isinstance(x, (str, unicode)):
            if os.path.exists(x):
                value = 'file://%s%s' % (getfqdn(), x)
                if not as_literal:
                    return value
                try:
                    return pm.URIRef(value)
                except AttributeError:
                    return pm.Literal(value, pm.XSD['anyURI'])
            else:
                if len(x) > max_text_len:
                    value = x[:max_text_len - 13] + ['...Clipped...']
                else:
                    value = x
                if not as_literal:
                    return value
                return pm.Literal(value, pm.XSD['string'])
        if isinstance(x, (int, )):
            if not as_literal:
                return x
            return pm.Literal(int(x), pm.XSD['integer'])
        if isinstance(x, (float, )):
            if not as_literal:
                return x
            return pm.Literal(x, pm.XSD['float'])
        if isinstance(x, dict):
            outdict = {}
            for key, value in x.items():
                encoded_value = safe_encode(value, as_literal=False)
                if isinstance(encoded_value, (pm.Literal, )):
                    outdict[key] = encoded_value.json_representation()
                else:
                    outdict[key] = encoded_value
            if not as_literal:
                return json.dumps(outdict)
            return pm.Literal(json.dumps(outdict), pm.XSD['string'])
        if isinstance(x, list):
            try:
                nptype = np.array(x).dtype
                if nptype == np.dtype(object):
                    raise ValueError('dtype object')
            except ValueError, e:
                outlist = []
                for value in x:
                    encoded_value = safe_encode(value, as_literal=False)
                    if isinstance(encoded_value, (pm.Literal, )):
                        outlist.append(encoded_value.json_representation())
                    else:
                        outlist.append(encoded_value)
            else:
                outlist = x
            if not as_literal:
                return json.dumps(outlist)
            return pm.Literal(json.dumps(outlist), pm.XSD['string'])
        if not as_literal:
            return dumps(x)
        return pm.Literal(dumps(x), nipype_ns['pickle'])
Ejemplo n.º 14
0
                    else:
                        outlist.append(encoded_value)
            else:
                outlist = x
            if not as_literal:
                return json.dumps(outlist)
            return pm.Literal(json.dumps(outlist), pm.XSD['string'])
        if not as_literal:
            return dumps(x)
        return pm.Literal(dumps(x), nipype_ns['pickle'])
    except TypeError, e:
        iflogger.info(e)
        value = "Could not encode: " + str(e)
        if not as_literal:
            return value
        return pm.Literal(value, pm.XSD['string'])


def prov_encode(graph, value, create_container=True):
    if isinstance(value, list) and create_container:
        if len(value) == 0:
            encoded_literal = safe_encode(value)
            attr = {pm.PROV['value']: encoded_literal}
            id = get_attr_id(attr)
            entity = graph.entity(id, attr)
        elif len(value) > 1:
            try:
                entities = []
                for item in value:
                    item_entity = prov_encode(graph, item)
                    entities.append(item_entity)
Ejemplo n.º 15
0
    def add_results(self, results):
        if results.provenance:
            try:
                self.g.add_bundle(results.provenance)
            except pm.ProvException:
                self.g.add_bundle(results.provenance, get_id())
            return self.g
        runtime = results.runtime
        interface = results.interface
        inputs = results.inputs
        outputs = results.outputs
        classname = interface.__name__

        a0_attrs = {
            nipype_ns['module']: interface.__module__,
            nipype_ns["interface"]: classname,
            pm.PROV["label"]: classname,
            nipype_ns['duration']: safe_encode(runtime.duration),
            nipype_ns['working_directory']: safe_encode(runtime.cwd),
            nipype_ns['return_code']: safe_encode(runtime.returncode),
            nipype_ns['platform']: safe_encode(runtime.platform),
            nipype_ns['version']: safe_encode(runtime.version),
        }
        try:
            a0_attrs[foaf["host"]] = pm.URIRef(runtime.hostname)
        except AttributeError:
            a0_attrs[foaf["host"]] = pm.Literal(runtime.hostname,
                                                pm.XSD['anyURI'])

        try:
            a0_attrs.update(
                {nipype_ns['command']: safe_encode(runtime.cmdline)})
            a0_attrs.update(
                {nipype_ns['command_path']: safe_encode(runtime.command_path)})
            a0_attrs.update(
                {nipype_ns['dependencies']: safe_encode(runtime.dependencies)})
        except AttributeError:
            pass
        a0 = self.g.activity(get_id(), runtime.startTime, runtime.endTime,
                             a0_attrs)
        # environment
        id = get_id()
        env_collection = self.g.collection(id)
        env_collection.add_extra_attributes({
            pm.PROV['type']:
            nipype_ns['environment'],
            pm.PROV['label']:
            "Environment"
        })
        self.g.used(a0, id)
        # write environment entities
        for idx, (key, val) in enumerate(sorted(runtime.environ.items())):
            if key not in [
                    'PATH', 'FSLDIR', 'FREESURFER_HOME', 'ANTSPATH',
                    'CAMINOPATH', 'CLASSPATH', 'LD_LIBRARY_PATH',
                    'DYLD_LIBRARY_PATH', 'FIX_VERTEX_AREA',
                    'FSF_OUTPUT_FORMAT', 'FSLCONFDIR', 'FSLOUTPUTTYPE',
                    'LOGNAME', 'USER', 'MKL_NUM_THREADS', 'OMP_NUM_THREADS'
            ]:
                continue
            in_attr = {
                pm.PROV["label"]: key,
                nipype_ns["environment_variable"]: key,
                pm.PROV["value"]: safe_encode(val)
            }
            id = get_attr_id(in_attr)
            self.g.entity(id, in_attr)
            self.g.hadMember(env_collection, id)
        # write input entities
        if inputs:
            id = get_id()
            input_collection = self.g.collection(id)
            input_collection.add_extra_attributes({
                pm.PROV['type']:
                nipype_ns['inputs'],
                pm.PROV['label']:
                "Inputs"
            })
            # write input entities
            for idx, (key, val) in enumerate(sorted(inputs.items())):
                in_entity = prov_encode(self.g, val).get_identifier()
                self.g.hadMember(input_collection, in_entity)
                used_attr = {pm.PROV["label"]: key, nipype_ns["in_port"]: key}
                self.g.used(activity=a0,
                            entity=in_entity,
                            other_attributes=used_attr)
        # write output entities
        if outputs:
            id = get_id()
            output_collection = self.g.collection(id)
            if not isinstance(outputs, dict):
                outputs = outputs.get_traitsfree()
            output_collection.add_extra_attributes({
                pm.PROV['type']:
                nipype_ns['outputs'],
                pm.PROV['label']:
                "Outputs"
            })
            self.g.wasGeneratedBy(output_collection, a0)
            # write output entities
            for idx, (key, val) in enumerate(sorted(outputs.items())):
                out_entity = prov_encode(self.g, val).get_identifier()
                self.g.hadMember(output_collection, out_entity)
                gen_attr = {pm.PROV["label"]: key, nipype_ns["out_port"]: key}
                self.g.generation(out_entity,
                                  activity=a0,
                                  other_attributes=gen_attr)
        # write runtime entities
        id = get_id()
        runtime_collection = self.g.collection(id)
        runtime_collection.add_extra_attributes({
            pm.PROV['type']:
            nipype_ns['runtime'],
            pm.PROV['label']:
            "RuntimeInfo"
        })
        self.g.wasGeneratedBy(runtime_collection, a0)
        for key, value in sorted(runtime.items()):
            if not value:
                continue
            if key not in ['stdout', 'stderr', 'merged']:
                continue
            attr = {pm.PROV["label"]: key, nipype_ns[key]: safe_encode(value)}
            id = get_id()
            self.g.entity(get_id(), attr)
            self.g.hadMember(runtime_collection, id)

        # create agents
        user_attr = {
            pm.PROV["type"]: pm.PROV["Person"],
            pm.PROV["label"]: pwd.getpwuid(os.geteuid()).pw_name,
            foaf["name"]: safe_encode(pwd.getpwuid(os.geteuid()).pw_name)
        }
        user_agent = self.g.agent(get_attr_id(user_attr), user_attr)
        agent_attr = {
            pm.PROV["type"]: pm.PROV["SoftwareAgent"],
            pm.PROV["label"]: "Nipype",
            foaf["name"]: safe_encode("Nipype")
        }
        for key, value in get_info().items():
            agent_attr.update({nipype_ns[key]: safe_encode(value)})
        software_agent = self.g.agent(get_attr_id(agent_attr), agent_attr)
        self.g.wasAssociatedWith(
            a0, user_agent, None, None,
            {pm.PROV["hadRole"]: nipype_ns["LoggedInUser"]})
        self.g.wasAssociatedWith(a0, software_agent)
        return self.g
Ejemplo n.º 16
0
def parse_stats(g, fs_stat_file, entity_uri):
    """Convert stats file to a nidm object
"""

    header, tableinfo, measures = read_stats(fs_stat_file)

    get_id = lambda : niiri[uuid.uuid1().hex]
    a0 = g.activity(get_id(), startTime=dt.isoformat(dt.utcnow()))
    user_agent = g.agent(get_id(),
                         {prov.PROV["type"]: prov.PROV["Person"],
                          prov.PROV["label"]: pwd.getpwuid(os.geteuid()).pw_name,
                          foaf["name"]: pwd.getpwuid(os.geteuid()).pw_name})
    g.wasAssociatedWith(a0, user_agent, None, None,
                        {prov.PROV["Role"]: "LoggedInUser"})
    stat_collection = g.collection(get_id())
    stat_collection.add_extra_attributes({prov.PROV['type']: fs['FreeSurferStatsCollection']})
    # header elements
    statheader_collection = g.entity(get_id())
    attributes = {prov.PROV['type']: fs['StatFileHeader']}
    for key, value in header.items():
        attributes[fs[key.replace('.c', '-c')]] = value
    statheader_collection.add_extra_attributes(attributes)
    # measures
    struct_info = {}
    measure_list = []
    measure_graph = rdflib.ConjunctiveGraph()
    measure_graph.namespace_manager.bind('fs', fs.get_uri())
    measure_graph.namespace_manager.bind('nidm', nidm.get_uri())
    unknown_units = set(('unitless', 'NA'))
    for measure in measures:
        obj_attr = []
        struct_uri = fs[measure['structure'].replace('.', '-')]
        if measure['source'] == 'Header':
            measure_name = measure['name']
            if measure_name not in measure_list:
                measure_list.append(measure_name)
                measure_uri = fs[measure_name].rdf_representation()
                measure_graph.add((measure_uri,
                                   rdflib.RDF['type'],
                                   fs['Measure'].rdf_representation()))
                measure_graph.add((measure_uri,
                                   rdflib.RDFS['label'],
                                   rdflib.Literal(measure['description'])))
                measure_graph.add((measure_uri,
                                   nidm['unitsLabel'].rdf_representation(),
                                   rdflib.Literal(measure['units'])))
            obj_attr.append((nidm["anatomicalAnnotation"], struct_uri))
            if str(measure['units']) in unknown_units and \
                    '.' not in measure['value']:
                valref = prov.Literal(int(measure['value']), prov.XSD['integer'])
            else:
                valref= prov.Literal(float(measure['value']), prov.XSD['float'])
            obj_attr.append((fs[measure_name], valref))
        elif measure['source'] == 'Table':
            obj_attr.append((nidm["anatomicalAnnotation"], struct_uri))
            for column_info in measure['items']:
                measure_name = column_info['name']
                if column_info['units'] in unknown_units and \
                   '.' not in column_info['value']:
                    valref = prov.Literal(int(column_info['value']),
                                          prov.XSD['integer'])
                else:
                    valref= prov.Literal(float(column_info['value']),
                                         prov.XSD['float'])
                obj_attr.append((fs[measure_name], valref))
                if measure_name not in measure_list:
                    measure_list.append(measure_name)
                    measure_uri = fs[measure_name].rdf_representation()
                    measure_graph.add((measure_uri,
                                       rdflib.RDF['type'],
                                       fs['Measure'].rdf_representation()))
                    measure_graph.add((measure_uri,
                                       rdflib.RDFS['label'],
                                       rdflib.Literal(column_info['description'])))
                    measure_graph.add((measure_uri,
                                       nidm['unitsLabel'].rdf_representation(),
                                       rdflib.Literal(column_info['units'])))
        id = get_id()
        if struct_uri in struct_info:
            euri = struct_info[struct_uri]
            euri.add_extra_attributes(obj_attr)
        else:
            euri = g.entity(id, obj_attr)
            struct_info[struct_uri] = euri
        g.hadMember(stat_collection, id)
    g.hadMember(stat_collection, statheader_collection)
    g.derivation(stat_collection, entity_uri)
    g.wasGeneratedBy(stat_collection, a0)
    return g, measure_graph