def to_graph(subject_specific_dir, project_id, output_dir, new_id=None):
    # location of FreeSurfer $SUBJECTS_DIR
    basedir = os.path.abspath(subject_specific_dir)
    subject_id = basedir.rstrip(os.path.sep).split(os.path.sep)[-1]

    graph = prov.ProvBundle()
    graph.add_namespace(foaf)
    graph.add_namespace(dcterms)
    graph.add_namespace(fs)
    graph.add_namespace(nidm)
    graph.add_namespace(niiri)
    graph.add_namespace(obo)
    graph.add_namespace(nif)
    graph.add_namespace(crypto)

    graph = encode_fs_directory(graph, basedir, project_id, subject_id)
    provn = graph.get_provn()
    old_id = subject_id
    if new_id:
        provn = provn.replace(subject_id, new_id)
        subject_id = new_id
    filename = os.path.join(output_dir,
                            '%s_%s.provn' % (subject_id, project_id))
    with open(filename, 'wt') as fp:
        fp.writelines(provn)
    filename_ttl = os.path.join(output_dir,
                                '%s_%s.ttl' % (subject_id, project_id))
    graph.rdf().serialize(filename_ttl, format='turtle')
    if new_id:
        map_graph = rdflib.Graph()
        map_graph.namespace_manager.bind('fs', fs.get_uri())
        map_graph.namespace_manager.bind('nidm', nidm.get_uri())
        map_graph.add((fs[new_id].rdf_representation(),
                       nidm['sameSubjectAs'].rdf_representation(),
                       fs[old_id].rdf_representation()))
        if os.path.exists('mapper.ttl'):
            map_graph.parse('mapper.ttl', format='turtle')
        map_graph.serialize('mapper.ttl', format='turtle')
    return graph, old_id
Beispiel #2
0
def cff2provn(filename):
    """Parse cml xml file and return a prov bundle object"""
    #filename = "/Users/fariba/Desktop/UCI/freesurfer/scripts/meta-MC-SCA-023_tp1.cml"
    tree = xml.dom.minidom.parse(filename)
    collections = tree.documentElement

    g = prov.ProvBundle()
    g.add_namespace(xsd)
    g.add_namespace(dcterms)
    g.add_namespace(cml)

    url_entity = g.entity(cml[get_id()])
    url_entity.add_extra_attributes({
        prov.PROV['type']:
        nidm['nidm:ConnectomeFileFormat'],
        prov.PROV['location']:
        prov.Literal(filename, prov.XSD['String'])
    })

    cml_collection = g.collection(cml[get_id()])
    cml_collection.add_extra_attributes({
        prov.PROV['type']: cml['connectome'],
        prov.PROV['label']: filename
    })
    g.wasDerivedFrom(cml_collection, url_entity)

    # get species, subject_name, and subject_timepoint
    species = tree.getElementsByTagName('cml:species')[0].toxml()
    species = species.replace('<cml:species>',
                              '').replace('</cml:species>', '')

    tp = ''
    sub = ''
    tags = collections.getElementsByTagName("cml:tag")
    for t in tags:
        if t.attributes['key'].value == 'subject_name':
            sub = t.toxml()
        if t.attributes['key'].value == 'subject_timepoint':
            tp = t.toxml()
    sub = sub.replace('<cml:tag key="subject_name">',
                      '').replace('</cml:tag>', '')
    tp = tp.replace('<cml:tag key="subject_timepoint">',
                    '').replace('</cml:tag>', '')
    #print species + " " + sub + " " + tp

    cml_meta = g.entity(cml[get_id()])
    cml_meta.add_extra_attributes({
        prov.PROV['type']: cml['connectome-meta'],
        cml['species']: species,
        cml['timepoint']: tp,
        cml['subject_name']: sub
    })
    g.hadMember(cml_collection, cml_meta)

    volumes = collections.getElementsByTagName("cml:connectome-volume")
    c = 0
    for v in volumes:
        c = c + 1
        #print v.getAttribute("src") + " " + v.getAttribute("dtype") + " " + v.getAttribute("name") + " " + v.getAttribute("fileformat")
        #print v.attributes['fileformat'].value
        dtype = v.getAttribute('dtype')
        src = v.getAttribute('src')
        name = v.getAttribute('name')
        fileformat = v.getAttribute('fileformat')
        cml_volume = g.entity(cml[get_id()])
        cml_volume.add_extra_attributes({
            prov.PROV['type']:
            cml['connectome-volume'],
            cml['dtype']:
            dtype,
            cml['src']:
            src,
            cml['name']:
            name,
            cml['fileformat']:
            fileformat
        })
        g.hadMember(cml_collection, cml_volume)

    tracks = collections.getElementsByTagName("cml:connectome-track")
    c = 0
    for t in tracks:
        c = c + 1
        #print t.getAttribute("src") + " " + t.getAttribute("dtype") + " " + t.getAttribute("name") + " " + t.getAttribute("fileformat")
        dtype = t.getAttribute('dtype')
        src = t.getAttribute('src')
        name = t.getAttribute('name')
        fileformat = t.getAttribute('fileformat')
        cml_track = g.entity(cml[get_id()])
        cml_track.add_extra_attributes({
            prov.PROV['type']:
            cml['connectome-track'],
            cml['dtype']:
            dtype,
            cml['src']:
            src,
            cml['name']:
            name,
            cml['fileformat']:
            fileformat
        })
        g.hadMember(cml_collection, cml_track)

    networks = collections.getElementsByTagName("cml:connectome-network")
    c = 0
    for n in networks:
        c = c + 1
        #print n.getAttribute("src") + " " + n.getAttribute("dtype") + " " + n.getAttribute("name") + " " + n.getAttribute("fileformat")
        dtype = n.getAttribute('dtype')
        src = n.getAttribute('src')
        name = n.getAttribute('name')
        fileformat = n.getAttribute('fileformat')
        cml_network = g.entity(cml[get_id()])
        cml_network.add_extra_attributes({
            prov.PROV['type']:
            cml['connectome-network'],
            cml['dtype']:
            dtype,
            cml['src']:
            src,
            cml['name']:
            name,
            cml['fileformat']:
            fileformat
        })
        g.hadMember(cml_collection, cml_network)

    surfaces = collections.getElementsByTagName("cml:connectome-surface")
    c = 0
    for s in surfaces:
        c = c + 1
        #print s.getAttribute("src") + " " + s.getAttribute("dtype") + " " + s.getAttribute("name") + " " + s.getAttribute("fileformat")
        dtype = s.getAttribute('dtype')
        src = s.getAttribute('src')
        name = s.getAttribute('name')
        fileformat = s.getAttribute('fileformat')
        cml_surface = g.entity(cml[get_id()])
        cml_surface.add_extra_attributes({
            prov.PROV['type']:
            cml['connectome-surface'],
            cml['dtype']:
            dtype,
            cml['src']:
            src,
            cml['name']:
            name,
            cml['fileformat']:
            fileformat
        })
        g.hadMember(cml_collection, cml_surface)

    data = collections.getElementsByTagName("cml:connectome-data")
    c = 0
    for d in data:
        c = c + 1
        #print d.getAttribute("src") + " " + d.getAttribute("dtype") + " " + d.getAttribute("name") + " " + d.getAttribute("fileformat")
        dtype = d.getAttribute('dtype')
        src = d.getAttribute('src')
        name = d.getAttribute('name')
        cml_data = g.entity(cml[get_id()])
        cml_data.add_extra_attributes({
            prov.PROV['type']: cml['connectome-data'],
            cml['dtype']: dtype,
            cml['src']: src,
            cml['name']: name,
            cml['fileformat']: fileformat
        })
        g.hadMember(cml_collection, cml_data)

    return g
Beispiel #3
0
 def __init__(self):
     self.g = pm.ProvBundle(identifier=get_id())
     self.g.add_namespace(foaf)
     self.g.add_namespace(dcterms)
     self.g.add_namespace(nipype_ns)
     self.g.add_namespace(niiri)
Beispiel #4
0
r = requests.get('https://raw.githubusercontent.com/joejimbo/HCLSDatasetDescriptions/master/Overview.html')
soup = BeautifulSoup(r.text)
table = soup.find('table')
codes = table.findChildren('code')
code_block = [codes[pos:pos + 2] for pos in xrange(0, len(codes), 2)]
ns_map = {code[0].text[:-1]: code[1].text for code in code_block}


# Access namespace objects as attributes
class AttrDict(dict):
    def __init__(self, *args, **kwargs):
        super(AttrDict, self).__init__(*args, **kwargs)
        self.__dict__ = self 

# create a prov bundle to store the graph
bundle = prov.ProvBundle()

# add namespaces to the bundle
for k, v in ns_map.iteritems():
    ns = prov.Namespace(k, v)
    bundle.add_namespace(ns)

ns = AttrDict(bundle._namespaces)

# Core Metadata
# property: [summary, version, distribution]
core_metadata = {ns.rdf['type']: ["MUST", "MUST", "MUST"],
                 ns.dct['title']: ["MUST", "MUST", "MUST"],
                 ns.dct['alternative']: ["MAY", "MAY", "MAY"],
                 ns.dct['description']: ["MUST", "MUST", "MUST"],
                 ns.dct['created']: ["NEVER", "SHOULD", "SHOULD"],