Esempio n. 1
0
def safe_encode(x, as_literal=True):
    """Encodes a python value for prov
"""
    if x is None:
        value = "Unknown"
        if as_literal:
            return prov.Literal(value, prov.XSD['string'])
        else:
            return value
    try:
        if isinstance(x, (str, unicode)):
            if os.path.exists(x):
                value = 'file://%s%s' % (getfqdn(), x)
                if not as_literal:
                    return value
                try:
                    return prov.URIRef(value)
                except AttributeError:
                    return prov.Literal(value, prov.XSD['anyURI'])
            else:
                if len(x) > max_text_len:
                    value = x[:max_text_len - 13] + ['...Clipped...']
                else:
                    value = x
                if not as_literal:
                    return value
                return prov.Literal(value, prov.XSD['string'])
        if isinstance(x, (int,)):
            if not as_literal:
                return x
            return prov.Literal(int(x), prov.XSD['integer'])
        if isinstance(x, (float,)):
            if not as_literal:
                return x
            return prov.Literal(x, prov.XSD['float'])
        if not as_literal:
            return dumps(x)
        return prov.Literal(dumps(x), nidm['pickle'])
    except TypeError as e:
        value = "Could not encode: " + str(e)
        if not as_literal:
            return value
        return prov.Literal(value, prov.XSD['string'])
def create_entity(graph, fs_subject_id, filepath, hostname):
    """ Create a PROV entity for a file in a FreeSurfer directory
"""
    # identify FreeSurfer terms based on directory and file names
    _, filename = os.path.split(filepath)
    relpath = filepath.split(fs_subject_id)[1].lstrip(os.path.sep)
    fstypes = relpath.split('/')[:-1]
    additional_types = relpath.split('/')[-1].split('.')

    file_md5_hash = hash_infile(filepath, crypto=hashlib.md5)
    file_sha512_hash = hash_infile(filepath, crypto=hashlib.sha512)
    if file_md5_hash is None:
        print('Empty file: %s' % filepath)

    url = "file://%s%s" % (hostname, filepath)
    obj_attr = [(prov.PROV["label"], filename),
                (fs["relative_path"], "%s" % relpath),
                (prov.PROV["location"], prov.URIRef(url)),
                (crypto["md5"], "%s" % file_md5_hash),
                (crypto["sha512"], "%s" % file_sha512_hash)]

    for key in fstypes:
        obj_attr.append((nidm["tag"], key))
    for key in additional_types:
        obj_attr.append((nidm["tag"], key))

    for key, uris in fs_file_map:
        if key in filename:
            if key.rstrip('.').lstrip('.') not in fstypes + additional_types:
                obj_attr.append((nidm["tag"], key.rstrip('.').lstrip('.')))
            for uri in uris:
                if isinstance(uri, tuple):
                    obj_attr.append((uri[0], uri[1]))
                else:
                    obj_attr.append((prov.PROV["type"], uri))
    id = uuid.uuid1().hex
    return graph.entity(niiri[id], obj_attr)
Esempio n. 3
0
def safe_encode(x, as_literal=True):
    """
    Encodes a python value for prov
    """
    if x is None:
        value = "Unknown"
        if as_literal:
            return pm.Literal(value, pm.XSD['string'])
        else:
            return value

    if isinstance(x, (str, bytes)):
        if isinstance(x, bytes):
            x = str(x, 'utf-8')
        if os.path.exists(x):
            if x[0] != os.pathsep:
                x = os.path.abspath(x)
            value = 'file://{}{}'.format(platform.node().lower(), x)
            if not as_literal:
                return value
            try:
                return pm.URIRef(value)
            except AttributeError:
                return pm.Literal(value, pm.XSD['anyURI'])
        else:
            value = x
            if len(x) > max_text_len:
                cliptxt = '...Clipped...'
                value = x[:max_text_len - len(cliptxt)] + cliptxt

            if not as_literal:
                return value

            return pm.Literal(value, pm.XSD['string'])
    if isinstance(x, int):
        if not as_literal:
            return x
        return pm.Literal(int(x), pm.XSD['integer'])
    if isinstance(x, float):
        if not as_literal:
            return x
        return pm.Literal(x, pm.XSD['float'])
    if isinstance(x, dict):
        outdict = {}
        for key, value in list(x.items()):
            encoded_value = safe_encode(value, as_literal=False)
            if isinstance(encoded_value, pm.Literal):
                outdict[key] = encoded_value.json_representation()
            else:
                outdict[key] = encoded_value

        try:
            jsonstr = json.dumps(outdict)
        except UnicodeDecodeError as excp:
            jsonstr = "Could not encode dictionary. {}".format(excp)
            iflogger.warn('Prov: %s', jsonstr)

        if not as_literal:
            return jsonstr
        return pm.Literal(jsonstr, pm.XSD['string'])
    if isinstance(x, (list, tuple)):
        x = list(x)
        is_object = False
        try:
            nptype = np.array(x).dtype
            is_object = nptype == np.dtype(object)
        except ValueError:
            is_object = True

        # If the array contains an heterogeneous mixture of data types
        # they should be encoded sequentially
        if is_object:
            outlist = []
            for value in x:
                encoded_value = safe_encode(value, as_literal=False)
                if isinstance(encoded_value, pm.Literal):
                    outlist.append(encoded_value.json_representation())
                else:
                    outlist.append(encoded_value)
            x = outlist

        try:
            jsonstr = json.dumps(x)
        except UnicodeDecodeError as excp:
            jsonstr = "Could not encode list/tuple. {}".format(excp)
            iflogger.warn('Prov: %s', jsonstr)

        if not as_literal:
            return jsonstr
        return pm.Literal(jsonstr, pm.XSD['string'])

    # If is a literal, and as_literal do nothing.
    # else bring back to json.
    if isinstance(x, pm.Literal):
        if as_literal:
            return x
        return dumps(x.json_representation())

    jsonstr = None
    ltype = pm.XSD['string']
    try:
        jsonstr = json.dumps(x.__dict__)
    except AttributeError:
        pass

    if jsonstr is None:
        try:
            jsonstr = dumps(x)
            ltype = nipype_ns['pickle']
        except TypeError as excp:
            jsonstr = 'Could not encode object. {}'.format(excp)

    if not as_literal:
        return jsonstr
    return pm.Literal(jsonstr, ltype)
Esempio n. 4
0
    def add_results(self, results):
        if results.provenance:
            try:
                self.g.add_bundle(results.provenance)
            except pm.ProvException:
                self.g.add_bundle(results.provenance, get_id())
            return self.g
        runtime = results.runtime
        interface = results.interface
        inputs = results.inputs
        outputs = results.outputs
        classname = interface.__name__
        modulepath = "{0}.{1}".format(interface.__module__, interface.__name__)
        activitytype = ''.join([i.capitalize() for i in modulepath.split('.')])

        a0_attrs = {
            nipype_ns['module']: interface.__module__,
            nipype_ns["interface"]: classname,
            pm.PROV["type"]: nipype_ns[activitytype],
            pm.PROV["label"]: classname,
            nipype_ns['duration']: safe_encode(runtime.duration),
            nipype_ns['workingDirectory']: safe_encode(runtime.cwd),
            nipype_ns['returnCode']: safe_encode(runtime.returncode),
            nipype_ns['platform']: safe_encode(runtime.platform),
            nipype_ns['version']: safe_encode(runtime.version),
        }
        try:
            a0_attrs[foaf["host"]] = pm.URIRef(runtime.hostname)
        except AttributeError:
            a0_attrs[foaf["host"]] = pm.Literal(runtime.hostname,
                                                pm.XSD['anyURI'])

        try:
            a0_attrs.update(
                {nipype_ns['command']: safe_encode(runtime.cmdline)})
            a0_attrs.update(
                {nipype_ns['commandPath']: safe_encode(runtime.command_path)})
            a0_attrs.update(
                {nipype_ns['dependencies']: safe_encode(runtime.dependencies)})
        except AttributeError:
            pass
        a0 = self.g.activity(get_id(), runtime.startTime, runtime.endTime,
                             a0_attrs)
        # environment
        id = get_id()
        env_collection = self.g.collection(id)
        env_collection.add_extra_attributes({
            pm.PROV['type']:
            nipype_ns['Environment'],
            pm.PROV['label']:
            "Environment"
        })
        self.g.used(a0, id)
        # write environment entities
        for idx, (key, val) in enumerate(sorted(runtime.environ.items())):
            if key not in [
                    'PATH', 'FSLDIR', 'FREESURFER_HOME', 'ANTSPATH',
                    'CAMINOPATH', 'CLASSPATH', 'LD_LIBRARY_PATH',
                    'DYLD_LIBRARY_PATH', 'FIX_VERTEX_AREA',
                    'FSF_OUTPUT_FORMAT', 'FSLCONFDIR', 'FSLOUTPUTTYPE',
                    'LOGNAME', 'USER', 'MKL_NUM_THREADS', 'OMP_NUM_THREADS'
            ]:
                continue
            in_attr = {
                pm.PROV["label"]: key,
                nipype_ns["environmentVariable"]: key,
                pm.PROV["value"]: safe_encode(val)
            }
            id = get_attr_id(in_attr)
            self.g.entity(id, in_attr)
            self.g.hadMember(env_collection, id)
        # write input entities
        if inputs:
            id = get_id()
            input_collection = self.g.collection(id)
            input_collection.add_extra_attributes({
                pm.PROV['type']:
                nipype_ns['Inputs'],
                pm.PROV['label']:
                "Inputs"
            })
            # write input entities
            for idx, (key, val) in enumerate(sorted(inputs.items())):
                in_entity = prov_encode(self.g, val).get_identifier()
                self.g.hadMember(input_collection, in_entity)
                used_attr = {pm.PROV["label"]: key, nipype_ns["inPort"]: key}
                self.g.used(activity=a0,
                            entity=in_entity,
                            other_attributes=used_attr)
        # write output entities
        if outputs:
            id = get_id()
            output_collection = self.g.collection(id)
            if not isinstance(outputs, dict):
                outputs = outputs.get_traitsfree()
            output_collection.add_extra_attributes({
                pm.PROV['type']:
                nipype_ns['Outputs'],
                pm.PROV['label']:
                "Outputs"
            })
            self.g.wasGeneratedBy(output_collection, a0)
            # write output entities
            for idx, (key, val) in enumerate(sorted(outputs.items())):
                out_entity = prov_encode(self.g, val).get_identifier()
                self.g.hadMember(output_collection, out_entity)
                gen_attr = {pm.PROV["label"]: key, nipype_ns["outPort"]: key}
                self.g.generation(out_entity,
                                  activity=a0,
                                  other_attributes=gen_attr)
        # write runtime entities
        id = get_id()
        runtime_collection = self.g.collection(id)
        runtime_collection.add_extra_attributes({
            pm.PROV['type']:
            nipype_ns['Runtime'],
            pm.PROV['label']:
            "RuntimeInfo"
        })
        self.g.wasGeneratedBy(runtime_collection, a0)
        for key, value in sorted(runtime.items()):
            if not value:
                continue
            if key not in ['stdout', 'stderr', 'merged']:
                continue
            attr = {pm.PROV["label"]: key, nipype_ns[key]: safe_encode(value)}
            id = get_id()
            self.g.entity(get_id(), attr)
            self.g.hadMember(runtime_collection, id)

        # create agents
        user_attr = {
            pm.PROV["type"]: pm.PROV["Person"],
            pm.PROV["label"]: getpass.getuser(),
            foaf["name"]: safe_encode(getpass.getuser())
        }
        user_agent = self.g.agent(get_attr_id(user_attr), user_attr)
        agent_attr = {
            pm.PROV["type"]: pm.PROV["SoftwareAgent"],
            pm.PROV["label"]: "Nipype",
            foaf["name"]: safe_encode("Nipype")
        }
        for key, value in get_info().items():
            agent_attr.update({nipype_ns[key]: safe_encode(value)})
        software_agent = self.g.agent(get_attr_id(agent_attr), agent_attr)
        self.g.wasAssociatedWith(
            a0, user_agent, None, None,
            {pm.PROV["hadRole"]: nipype_ns["LoggedInUser"]})
        self.g.wasAssociatedWith(a0, software_agent)
        return self.g
Esempio n. 5
0
def safe_encode(x, as_literal=True):
    """Encodes a python value for prov
    """
    if x is None:
        value = "Unknown"
        if as_literal:
            return pm.Literal(value, pm.XSD['string'])
        else:
            return value
    try:
        if isinstance(x, (str, unicode)):
            if os.path.exists(x):
                value = 'file://%s%s' % (getfqdn(), x)
                if not as_literal:
                    return value
                try:
                    return pm.URIRef(value)
                except AttributeError:
                    return pm.Literal(value, pm.XSD['anyURI'])
            else:
                if len(x) > max_text_len:
                    value = x[:max_text_len - 13] + ['...Clipped...']
                else:
                    value = x
                if not as_literal:
                    return value
                return pm.Literal(value, pm.XSD['string'])
        if isinstance(x, (int, )):
            if not as_literal:
                return x
            return pm.Literal(int(x), pm.XSD['integer'])
        if isinstance(x, (float, )):
            if not as_literal:
                return x
            return pm.Literal(x, pm.XSD['float'])
        if isinstance(x, dict):
            outdict = {}
            for key, value in x.items():
                encoded_value = safe_encode(value, as_literal=False)
                if isinstance(encoded_value, (pm.Literal, )):
                    outdict[key] = encoded_value.json_representation()
                else:
                    outdict[key] = encoded_value
            if not as_literal:
                return json.dumps(outdict)
            return pm.Literal(json.dumps(outdict), pm.XSD['string'])
        if isinstance(x, list):
            try:
                nptype = np.array(x).dtype
                if nptype == np.dtype(object):
                    raise ValueError('dtype object')
            except ValueError, e:
                outlist = []
                for value in x:
                    encoded_value = safe_encode(value, as_literal=False)
                    if isinstance(encoded_value, (pm.Literal, )):
                        outlist.append(encoded_value.json_representation())
                    else:
                        outlist.append(encoded_value)
            else:
                outlist = x
            if not as_literal:
                return json.dumps(outlist)
            return pm.Literal(json.dumps(outlist), pm.XSD['string'])
        if not as_literal:
            return dumps(x)
        return pm.Literal(dumps(x), nipype_ns['pickle'])
def encode_fs_directory(g, basedir, project_id, subject_id, n_items=100000):
    """ Convert a FreeSurfer directory to a PROV graph
"""
    # directory collection/catalog
    collection_hash = uuid.uuid1().hex
    fsdir_collection = g.collection(niiri[collection_hash])
    fsdir_collection.add_extra_attributes({
        prov.PROV['type']:
        fs['SubjectDirectory'],
        nidm['tag']:
        project_id,
        fs['subjectID']:
        subject_id
    })
    directory_id = g.entity(niiri[uuid.uuid1().hex])
    hostname = getfqdn()
    url = "file://%s%s" % (hostname, os.path.abspath(basedir))
    directory_id.add_extra_attributes(
        {prov.PROV['location']: prov.URIRef(url)})
    g.wasDerivedFrom(fsdir_collection, directory_id)

    a0 = g.activity(niiri[uuid.uuid1().hex],
                    startTime=dt.isoformat(dt.utcnow()))
    user_agent = g.agent(
        niiri[uuid.uuid1().hex], {
            prov.PROV["type"]: prov.PROV["Person"],
            prov.PROV["label"]: pwd.getpwuid(os.geteuid()).pw_name,
            foaf["name"]: pwd.getpwuid(os.geteuid()).pw_name
        })
    g.wasAssociatedWith(a0, user_agent, None, None,
                        {prov.PROV["Role"]: "LoggedInUser"})
    g.wasGeneratedBy(fsdir_collection, a0)

    i = 0
    for dirpath, dirnames, filenames in os.walk(os.path.realpath(basedir)):
        for filename in sorted(filenames):
            if filename.startswith('.'):
                continue
            i += 1
            if i > n_items:
                break
            file2encode = os.path.realpath(os.path.join(dirpath, filename))
            if not os.path.isfile(file2encode):
                print "%s not a file" % file2encode
                continue
            ignore_key_found = False
            for key in ignore_list:
                if key in file2encode:
                    ignore_key_found = True
                    continue
            if ignore_key_found:
                continue
            try:
                entity = create_entity(g, subject_id, file2encode, hostname)
                g.hadMember(fsdir_collection, entity.get_identifier())
                rdf_g = entity.rdf().serialize(format='turtle')
                '''
query = """
PREFIX prov: <http://www.w3.org/ns/prov#>
PREFIX fs: <http://www.incf.org/ns/nidash/fs#>
PREFIX crypto: <http://www.w3.org/2000/10/swap/crypto#>
PREFIX nidm: <http://www.incf.org/ns/nidash/nidm#>
select ?e ?relpath ?path where
{?e fs:fileType fs:StatisticFile;
fs:relativePath ?relpath;
prov:atLocation ?path .
FILTER NOT EXISTS {
?e nidm:tag "curv" .
}
}
"""
results = rdf_g.query(query)
'''
                if 'StatisticFile' in rdf_g and 'curv' not in rdf_g:
                    g, measure_graph = parse_stats(g, file2encode, entity)
                    if os.path.exists('fsterms.ttl'):
                        measure_graph.parse('fsterms.ttl', format='turtle')
                    measure_graph.serialize('fsterms.ttl', format='turtle')
            except IOError, e:
                print e