def to_graph(subject_specific_dir, project_id, output_dir, new_id=None): # location of FreeSurfer $SUBJECTS_DIR basedir = os.path.abspath(subject_specific_dir) subject_id = basedir.rstrip(os.path.sep).split(os.path.sep)[-1] graph = prov.ProvBundle() graph.add_namespace(foaf) graph.add_namespace(dcterms) graph.add_namespace(fs) graph.add_namespace(nidm) graph.add_namespace(niiri) graph.add_namespace(obo) graph.add_namespace(nif) graph.add_namespace(crypto) graph = encode_fs_directory(graph, basedir, project_id, subject_id) provn = graph.get_provn() old_id = subject_id if new_id: provn = provn.replace(subject_id, new_id) subject_id = new_id filename = os.path.join(output_dir, '%s_%s.provn' % (subject_id, project_id)) with open(filename, 'wt') as fp: fp.writelines(provn) filename_ttl = os.path.join(output_dir, '%s_%s.ttl' % (subject_id, project_id)) graph.rdf().serialize(filename_ttl, format='turtle') if new_id: map_graph = rdflib.Graph() map_graph.namespace_manager.bind('fs', fs.get_uri()) map_graph.namespace_manager.bind('nidm', nidm.get_uri()) map_graph.add((fs[new_id].rdf_representation(), nidm['sameSubjectAs'].rdf_representation(), fs[old_id].rdf_representation())) if os.path.exists('mapper.ttl'): map_graph.parse('mapper.ttl', format='turtle') map_graph.serialize('mapper.ttl', format='turtle') return graph, old_id
def cff2provn(filename): """Parse cml xml file and return a prov bundle object""" #filename = "/Users/fariba/Desktop/UCI/freesurfer/scripts/meta-MC-SCA-023_tp1.cml" tree = xml.dom.minidom.parse(filename) collections = tree.documentElement g = prov.ProvBundle() g.add_namespace(xsd) g.add_namespace(dcterms) g.add_namespace(cml) url_entity = g.entity(cml[get_id()]) url_entity.add_extra_attributes({ prov.PROV['type']: nidm['nidm:ConnectomeFileFormat'], prov.PROV['location']: prov.Literal(filename, prov.XSD['String']) }) cml_collection = g.collection(cml[get_id()]) cml_collection.add_extra_attributes({ prov.PROV['type']: cml['connectome'], prov.PROV['label']: filename }) g.wasDerivedFrom(cml_collection, url_entity) # get species, subject_name, and subject_timepoint species = tree.getElementsByTagName('cml:species')[0].toxml() species = species.replace('<cml:species>', '').replace('</cml:species>', '') tp = '' sub = '' tags = collections.getElementsByTagName("cml:tag") for t in tags: if t.attributes['key'].value == 'subject_name': sub = t.toxml() if t.attributes['key'].value == 'subject_timepoint': tp = t.toxml() sub = sub.replace('<cml:tag key="subject_name">', '').replace('</cml:tag>', '') tp = tp.replace('<cml:tag key="subject_timepoint">', '').replace('</cml:tag>', '') #print species + " " + sub + " " + tp cml_meta = g.entity(cml[get_id()]) cml_meta.add_extra_attributes({ prov.PROV['type']: cml['connectome-meta'], cml['species']: species, cml['timepoint']: tp, cml['subject_name']: sub }) g.hadMember(cml_collection, cml_meta) volumes = collections.getElementsByTagName("cml:connectome-volume") c = 0 for v in volumes: c = c + 1 #print v.getAttribute("src") + " " + v.getAttribute("dtype") + " " + v.getAttribute("name") + " " + v.getAttribute("fileformat") #print v.attributes['fileformat'].value dtype = v.getAttribute('dtype') src = v.getAttribute('src') name = v.getAttribute('name') fileformat = v.getAttribute('fileformat') cml_volume = g.entity(cml[get_id()]) cml_volume.add_extra_attributes({ prov.PROV['type']: cml['connectome-volume'], cml['dtype']: dtype, cml['src']: src, cml['name']: name, cml['fileformat']: fileformat }) g.hadMember(cml_collection, cml_volume) tracks = collections.getElementsByTagName("cml:connectome-track") c = 0 for t in tracks: c = c + 1 #print t.getAttribute("src") + " " + t.getAttribute("dtype") + " " + t.getAttribute("name") + " " + t.getAttribute("fileformat") dtype = t.getAttribute('dtype') src = t.getAttribute('src') name = t.getAttribute('name') fileformat = t.getAttribute('fileformat') cml_track = g.entity(cml[get_id()]) cml_track.add_extra_attributes({ prov.PROV['type']: cml['connectome-track'], cml['dtype']: dtype, cml['src']: src, cml['name']: name, cml['fileformat']: fileformat }) g.hadMember(cml_collection, cml_track) networks = collections.getElementsByTagName("cml:connectome-network") c = 0 for n in networks: c = c + 1 #print n.getAttribute("src") + " " + n.getAttribute("dtype") + " " + n.getAttribute("name") + " " + n.getAttribute("fileformat") dtype = n.getAttribute('dtype') src = n.getAttribute('src') name = n.getAttribute('name') fileformat = n.getAttribute('fileformat') cml_network = g.entity(cml[get_id()]) cml_network.add_extra_attributes({ prov.PROV['type']: cml['connectome-network'], cml['dtype']: dtype, cml['src']: src, cml['name']: name, cml['fileformat']: fileformat }) g.hadMember(cml_collection, cml_network) surfaces = collections.getElementsByTagName("cml:connectome-surface") c = 0 for s in surfaces: c = c + 1 #print s.getAttribute("src") + " " + s.getAttribute("dtype") + " " + s.getAttribute("name") + " " + s.getAttribute("fileformat") dtype = s.getAttribute('dtype') src = s.getAttribute('src') name = s.getAttribute('name') fileformat = s.getAttribute('fileformat') cml_surface = g.entity(cml[get_id()]) cml_surface.add_extra_attributes({ prov.PROV['type']: cml['connectome-surface'], cml['dtype']: dtype, cml['src']: src, cml['name']: name, cml['fileformat']: fileformat }) g.hadMember(cml_collection, cml_surface) data = collections.getElementsByTagName("cml:connectome-data") c = 0 for d in data: c = c + 1 #print d.getAttribute("src") + " " + d.getAttribute("dtype") + " " + d.getAttribute("name") + " " + d.getAttribute("fileformat") dtype = d.getAttribute('dtype') src = d.getAttribute('src') name = d.getAttribute('name') cml_data = g.entity(cml[get_id()]) cml_data.add_extra_attributes({ prov.PROV['type']: cml['connectome-data'], cml['dtype']: dtype, cml['src']: src, cml['name']: name, cml['fileformat']: fileformat }) g.hadMember(cml_collection, cml_data) return g
def __init__(self): self.g = pm.ProvBundle(identifier=get_id()) self.g.add_namespace(foaf) self.g.add_namespace(dcterms) self.g.add_namespace(nipype_ns) self.g.add_namespace(niiri)
r = requests.get('https://raw.githubusercontent.com/joejimbo/HCLSDatasetDescriptions/master/Overview.html') soup = BeautifulSoup(r.text) table = soup.find('table') codes = table.findChildren('code') code_block = [codes[pos:pos + 2] for pos in xrange(0, len(codes), 2)] ns_map = {code[0].text[:-1]: code[1].text for code in code_block} # Access namespace objects as attributes class AttrDict(dict): def __init__(self, *args, **kwargs): super(AttrDict, self).__init__(*args, **kwargs) self.__dict__ = self # create a prov bundle to store the graph bundle = prov.ProvBundle() # add namespaces to the bundle for k, v in ns_map.iteritems(): ns = prov.Namespace(k, v) bundle.add_namespace(ns) ns = AttrDict(bundle._namespaces) # Core Metadata # property: [summary, version, distribution] core_metadata = {ns.rdf['type']: ["MUST", "MUST", "MUST"], ns.dct['title']: ["MUST", "MUST", "MUST"], ns.dct['alternative']: ["MAY", "MAY", "MAY"], ns.dct['description']: ["MUST", "MUST", "MUST"], ns.dct['created']: ["NEVER", "SHOULD", "SHOULD"],