def RadiomicsRDF(featureVector, patientID, myStructUID, ROI): #&&&&&&&&&&&&&&&&&&& reading ORAW universal template yaml file &&&&&&&&&&&&&&&&&&&&&& with open( os.path.join(os.getcwd(), 'ParamsSettings', 'ORAW_UniversalTemplate.yaml')) as data: try: Utemplate = yaml.safe_load(data) except yaml.YAMLError as exc: print(exc) # ----------------------------------For PyRadiomics --------------------------------- # Mapping O-RAW setting to universal template. Some are already there, but some are needed to update Utemplate['General']['Software']['name'] = 'PyRadiomics' Utemplate['General']['Software']['version'] = featureVector[ 'diagnostics_Versions_PyRadiomics'] Utemplate['General']['Software']['programminglanguage'] = 'Python' Utemplate['ImageProcessing']['Processing'] = featureVector[ 'diagnostics_Configuration_Settings']['resampledPixelSpacing'] Utemplate['ROISegmentation']['ROIType'] = ROI Utemplate['Interpolation']['ImageInterplationMethod'] = featureVector[ 'diagnostics_Configuration_Settings']['interpolator'] Utemplate['ROIResegmentation']['ResegmentRange'] = featureVector[ 'diagnostics_Configuration_Settings']['resegmentRange'] Utemplate['ROIResegmentation']['ResegmentMode'] = featureVector[ 'diagnostics_Configuration_Settings']['resegmentMode'] # ----------------------------------------------------- --------------------------------- # def ToRDF(featureVector,exportDir,patientID,myStructUID,ROI,export_format,export_name): graph = Graph() # Create a rdflib graph object # feature_name = [] # Create a list for features # feature_uri = [] # Create a list for feature uri (ontology) # Namespaces used in O-RAW ro = Namespace('http://www.radiomics.org/RO/') roo = Namespace('http://www.cancerdata.org/roo/') IAO = Namespace('http://purl.obolibary.org/obo/IAO_') SWO = Namespace('http://www.ebi.ac.uk/swo/SWO_') NCIT = Namespace('http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#') # Adding namespace to graph space graph.bind('ro', ro) graph.bind('roo', roo) graph.bind('IAO', IAO) graph.bind('SWO', SWO) graph.bind('NCIT', NCIT) # ------------------------- URI of related entities ----------------- # ^^^^^^^^^^^^^^^^^^^^^^^^^ Level-1 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ patient_uri = URIRef(NCIT + 'C16960') has_pacs_study = URIRef(roo + '100284') # patient has_pacs_study scan scan_uri = URIRef(NCIT + 'C17999') converted_to = URIRef(ro + '0310') # scan converted_to image_volume has_imaging_modality = URIRef( ro + 'P02928312341') # scan has_imaging_modality CT, PET, MR image_volume_uri = URIRef(ro + '0271') is_part_of = URIRef(ro + '0298') # image_volume is_part_of image_space has_processing = URIRef(ro + 'P00080') # image_volume has_processing method has_voxel_dimension = URIRef( ro + 'P00118') # image_volume has_has_voxel_dimension voxel_size has_voxel_dimensionx = URIRef( ro + 'P00118') # image_volume has_has_voxel_dimension voxel_size has_voxel_dimensiony = URIRef( ro + 'P00123') # image_volume has_has_voxel_dimension voxel_size has_voxel_dimensionz = URIRef( ro + 'P00149') # image_volume has_has_voxel_dimension voxel_size image_space_uri = URIRef(ro + '0225') # ROImask_uri = URIRef(roo + '0272') # ROImask is_part_of image_space is_label_of = URIRef(ro + 'P00190') # GTV/... is_label_of ROImask has_label = URIRef(ro + 'P00051') has_segmentation_method = URIRef(ro + 'P00092') # GTV_uri = URIRef(roo + '100006') used_to_compute = URIRef( ro + '0296') # image_space used_to_compute RadiomicsFeature # tempral has_property = URIRef(roo + '100212') # ^^^^^^^^^^^^^^^^^^^^^^^^^ Level-2 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ mm_uri = URIRef(ro + 'I0020') mm2_uri = URIRef(ro + 'I0027') mm3_uri = URIRef(ro + 'I0011') has_value = URIRef(ro + '010191') # RadiomicsFeature has_value has_unit = URIRef(ro + '010198') # RadiomicsFeature has_unit # ^^^^^^^^^^^^^^^^^^^^^^^^^ Level-3 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ computed_using = URIRef( ro + 'P00002') # RadiomicsFeature computed_using calculationrun_space calculationrun_space_uri = URIRef(ro + '0297') # run_on = URIRef(ro + '00000002') # calclulationrun run_on datetime at_date_time = URIRef(roo + '100041') performed_by = URIRef( ro + '0283') # calculationrun_space performed_by softwareproperties_uri softwareproperties_uri = URIRef( ro + '010215') # software has_label literal(SoftwareProperties) has_programming_language = URIRef( ro + '0010195') # software has_programming_language programminglanguage # programminglanguage_uri = URIRef(IAO + '0000025') # python_uri = URIRef(SWO + '000018') has_version = URIRef(ro + '0010192') # software has_version # version_uri = URIRef(ro + '010166') # ^^^^^^^^^^^^^^^^^^^^^^^^^ Level-4 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ featureparameterspace_uri = URIRef(ro + '001000') defined_by = URIRef(ro + 'P000009') # featureparameterspace defined_by settings # filterproperties_uri = URIRef(roo + '0255') # has_value wavelet right/not? # aggregationparameters = URIRef(roo + '0218') # discretizationparameters = URIRef(roo + '0214') # featureSpecificparameters = URIRef(roo + '0215') # interpolationparameters = URIRef(roo + '0217') # reSegmentationparameters = URIRef(roo + '0216') # -------------- localhost URIs --------------------------- localhost_patient = 'http://localhost/data/patient_' localhost_scan = 'http://localhost/data/scan_' localhost_imagevolume = 'http://localhost/data/imagevolume_' localhost_imagespace = 'http://localhost/data/imagespace_' localhost_ROI = 'http://localhost/data/ROI_' localhost_feature = 'http://localhost/data/feature_' localhost_featureparameter = 'http://localhost/data/localhost_featureparameter_' #----------------------- localhost_mm = 'http://localhost/data/mm' localhost_mm2 = 'http://localhost/data/mm2' localhost_mm3 = 'http://localhost/data/mm3' # ---------------------- info from yaml file ------------------ # O-RAW_config: # export_format: rdf # export_name: ORAW_RDF_test # General RDF_General_ImageAcquisition_ImagingModality = Literal( Utemplate['General']['ImageAcquisition']['ImagingModality']) RDF_General_VolumetricAnalysis = Literal( Utemplate['General']['VolumetricAnalysis']) RDF_General_WorkflowStructure = Literal( Utemplate['General']['WorkflowStructure']) RDF_General_Software_name = Literal( Utemplate['General']['Software']['name']) RDF_General_Software_version = Literal( Utemplate['General']['Software']['version']) RDF_General_Software_programminglanguage = Literal( Utemplate['General']['Software']['programminglanguage']) # ImageProcessing RDF_ImageProcessing_Conversion = Literal( Utemplate['ImageProcessing']['Conversion']) RDF_ImageProcessing_Processing = Literal( Utemplate['ImageProcessing']['Processing']) # ROISegmentation RDF_ROISegmentation_SegmentationMethod = Literal( Utemplate['ROISegmentation']['SegmentationMethod']) RDF_ROISegmentation_ROIType = Literal( Utemplate['ROISegmentation']['ROIType']) # Interpolation RDF_Interpolation_VoxelDimensions = Literal( Utemplate['Interpolation']['VoxelDimensions']) RDF_Interpolation_ImageInterplationMethod = Literal( Utemplate['Interpolation']['ImageInterplationMethod']) RDF_Interpolation_IntensityRounding = Literal( Utemplate['Interpolation']['IntensityRounding']) RDF_Interpolation_ROIInterplationMethod = Literal( Utemplate['Interpolation']['ROIInterplationMethod']) RDF_Interpolation_ROIPartialVolume = Literal( Utemplate['Interpolation']['ROIPartialVolume']) # ROIResegmentation RDF_ROIResegmentation_ResegmentRange = Literal( Utemplate['ROIResegmentation']['ResegmentRange']) RDF_ROIResegmentation_ResegmentMode = Literal( Utemplate['ROIResegmentation']['ResegmentMode']) # ImageDiscretization # RDF_ImageDiscretization_DiscretizationMethod = Literal(Utemplate['ImageDiscretization']['DiscretizationMethod']) # RDF_ImageDiscretization_DiscretizationParameters = Literal(Utemplate['ImageDiscretization']['DiscretizationParameters']) #------------------------RDF entities-------------------------------------------------- RDF_patid = URIRef(localhost_patient + patientID) RDF_scan = URIRef(localhost_scan + myStructUID) RDF_imagevolume = URIRef(localhost_imagevolume + myStructUID + '_' + urllib.parse.quote(RDF_ROISegmentation_ROIType)) RDF_imagespace = URIRef(localhost_imagespace + myStructUID + '_' + urllib.parse.quote(RDF_ROISegmentation_ROIType)) RDF_featureparameter = URIRef( localhost_featureparameter + myStructUID + '_' + urllib.parse.quote(RDF_ROISegmentation_ROIType)) RDF_ROI = URIRef(localhost_ROI + myStructUID + '_' + urllib.parse.quote(RDF_ROISegmentation_ROIType)) RDF_Datetime = Literal( datetime.now().strftime("%Y-%m-%d")) # run at_date_time RDF_mm = URIRef(localhost_mm) RDF_mm2 = URIRef(localhost_mm2) RDF_mm3 = URIRef(localhost_mm3) #------------------------read Radiomics Ontology Table--------------------------------- df_RO = pd.read_csv( os.path.join(os.getcwd(), 'RadiomicsOntology', 'ORAW_RO_Table.csv')) #extract feature keys and values from featureVector cumputed by pyradiomcis f_key = list(featureVector.keys()) f_value = list(featureVector.values()) # # remove columns with general info from pyradiomics results f_index = [] for i in range(len(f_key)): if 'diagnostics' not in f_key[ i]: # filter out 'general_info' from featureVector f_index.append(i) radiomics_key = [] radiomics_value = [] for j in f_index: radiomics_key.append(f_key[j]) radiomics_value.append(f_value[j]) #-----------------Adding elements to graph -------------------------------------------- for i in range(len(radiomics_key) ): # -3 means filter out patientid, RTid, and countour try: ImageFilterSpace = Utemplate['FeatureCalculation'][ 'FeatureParameter']['ImageFilterSpace'] for j in range(len(ImageFilterSpace)): imagetype = ImageFilterSpace[j] if imagetype.lower() in radiomics_key[i]: RDF_ImageFilterSpace = Literal( Utemplate['FeatureCalculation']['FeatureParameter'] ['ImageFilterSpace'][j]) RDF_ImageFilterMethod = Literal( Utemplate['FeatureCalculation']['FeatureParameter'] ['DiscretizationMethod'][j]) # RDF_ImageFilterDiscretizationParameters = Literal(Utemplate['FeatureCalculation']['FeatureParameter']['DiscretizationParameters']) except: print( 'radiomic features do not match the used filter method, please check the Universal Template and Radiomics Table !!!' ) if 'original' in radiomics_key[i]: radiomics_feature = radiomics_key[i][9:] elif 'log' in radiomics_key[i]: radiomics_feature = radiomics_key[i][20:] elif 'wavelet' in radiomics_key[i]: radiomics_feature = radiomics_key[i][12:] else: radiomics_feature = radiomics_feature ## -------------------------------------------------- ind = pd.Index(df_RO.iloc[:, 0]).get_loc(radiomics_feature) tmp_ROcode = df_RO.iloc[:, 1][ind] tmp_uri = URIRef(tmp_ROcode) tmp_value = Literal(radiomics_value[i]) # radiomics_feature #---------------------------------RDF entity for feature RDF_feature = URIRef(localhost_feature + myStructUID + '_' + urllib.parse.quote(RDF_ROISegmentation_ROIType) + '_' + radiomics_key[i]) RDF_featureparameterspace = URIRef(featureparameterspace_uri + '_' + radiomics_key[i]) # ---------------------------------------------------- # start adding # ------------ patient layer --------------- graph.add((RDF_patid, RDF.type, patient_uri)) graph.add((RDF_patid, has_pacs_study, RDF_scan)) # ------------ scan layer ----------------- graph.add((RDF_scan, RDF.type, scan_uri)) graph.add((RDF_scan, converted_to, RDF_imagevolume)) graph.add((RDF_scan, has_imaging_modality, RDF_General_ImageAcquisition_ImagingModality)) # ------------ image_volume layer --------- graph.add((RDF_imagevolume, RDF.type, image_volume_uri)) graph.add( (RDF_imagevolume, has_processing, RDF_ImageProcessing_Processing)) graph.add( (RDF_imagevolume, has_property, RDF_ImageProcessing_Conversion)) graph.add((RDF_imagevolume, has_voxel_dimension, RDF_Interpolation_VoxelDimensions)) graph.add((RDF_imagevolume, is_part_of, RDF_imagespace)) # ------------ image_space layer ------------ graph.add((RDF_imagespace, RDF.type, image_space_uri)) graph.add((RDF_imagespace, used_to_compute, RDF_feature)) # ------------ ROI mask layer --------------- graph.add((RDF_ROI, is_part_of, RDF_imagespace)) graph.add((RDF_ROI, has_label, RDF_ROISegmentation_ROIType)) graph.add((RDF_ROI, has_segmentation_method, RDF_ROISegmentation_SegmentationMethod)) graph.add( (RDF_ROI, has_property, RDF_Interpolation_ROIInterplationMethod)) graph.add((RDF_ROI, has_property, RDF_Interpolation_ROIPartialVolume)) # ------------ feature layer ---------------- graph.add((RDF_feature, RDF.type, tmp_uri)) graph.add((RDF_feature, has_value, tmp_value)) # ------------ calculatin run layer --------- graph.add((RDF_feature, computed_using, calculationrun_space_uri)) graph.add( (calculationrun_space_uri, performed_by, softwareproperties_uri)) ### missing ontology of at_date_time -------- graph.add((calculationrun_space_uri, at_date_time, RDF_Datetime)) graph.add((softwareproperties_uri, has_programming_language, RDF_General_Software_programminglanguage)) graph.add((softwareproperties_uri, has_version, RDF_General_Software_version)) graph.add( (softwareproperties_uri, has_property, RDF_General_Software_name)) # ------------feature parameter layer-------- graph.add((RDF_feature, computed_using, RDF_featureparameterspace)) graph.add( (RDF_featureparameterspace, defined_by, RDF_featureparameter)) graph.add((RDF_featureparameterspace, defined_by, RDF_ROIResegmentation_ResegmentRange)) graph.add((RDF_featureparameterspace, defined_by, RDF_ROIResegmentation_ResegmentMode)) # graph.add((RDF_featureparameterspace,defined_by,RDF_ImageDiscretization_DiscretizationMethod)) # graph.add((RDF_featureparameterspace,defined_by,RDF_ImageDiscretization_DiscretizationParameters)) graph.add((RDF_featureparameterspace, defined_by, RDF_Interpolation_ImageInterplationMethod)) graph.add((RDF_featureparameterspace, defined_by, RDF_Interpolation_IntensityRounding)) graph.add( (RDF_featureparameterspace, defined_by, RDF_ImageFilterSpace)) graph.add( (RDF_featureparameterspace, defined_by, RDF_ImageFilterMethod)) # graph.add((RDF_featureparameterspace,defined_by,RDF_ImageFilterDiscretizationParameters)) # ----------- add unit to feature, if it has ------------------ if tmp_ROcode == 'www.radiomics.org/RO/RNU0': graph.add((RDF_feature, has_unit, RDF_mm3)) graph.add((RDF_mm3, RDF.type, mm3_uri)) if tmp_ROcode == 'www.radiomics.org/RO/C0JK': graph.add((RDF_feature, has_unit, RDF_mm2)) graph.add((RDF_mm2, RDF.type, mm2_uri)) # if tmp_ROcode == 'original_shape_LeastAxis': # graph.add((RDF_feature,has_unit,RDF_mm)) # graph.add((RDF_mm,RDF.type,mm_uri)) # if tmp_ROcode == 'original_shape_MajorAxis': # graph.add((RDF_feature,has_unit,RDF_mm)) # graph.add((RDF_mm,RDF.type,mm_uri)) if tmp_ROcode == 'www.radiomics.org/RO/2150': graph.add((RDF_feature, has_unit, RDF_mm)) graph.add((RDF_mm, RDF.type, mm_uri)) if tmp_ROcode == 'www.radiomics.org/RO/2140': graph.add((RDF_feature, has_unit, RDF_mm)) graph.add((RDF_mm, RDF.type, mm_uri)) if tmp_ROcode == 'www.radiomics.org/RO/2130': graph.add((RDF_feature, has_unit, RDF_mm)) graph.add((RDF_mm, RDF.type, mm_uri)) if tmp_ROcode == 'www.radiomics.org/RO/L0JK': graph.add((RDF_feature, has_unit, RDF_mm)) graph.add((RDF_mm, RDF.type, mm_uri)) # if tmp_ROcode == 'original_shape_MinorAxis': # graph.add((RDF_feature,has_unit,RDF_mm)) # graph.add((RDF_mm,RDF.type,mm_uri)) return graph
from lxml import html from rdflib.namespace import Namespace from rdflib import Graph, BNode, RDF, RDFS, URIRef, Literal, XSD import hashlib import os.path import pickle from SPARQLWrapper import SPARQLWrapper, JSON import csv import coloredlogs, logging import networkx as nx from networkx.readwrite import json_graph import sys from itertools import combinations NAMESPACES = { 'schema': Namespace('http://schema.org/'), 'dcterms': Namespace('http://purl.org/dc/terms/'), 'wdt': Namespace('http://www.wikidata.org/prop/direct/'), 'wd': Namespace('http://www.wikidata.org/entity/') } g = Graph() g.load("./data/poit.rdf") # Set up colored logging! logger = logging.getLogger("poitlab") coloredlogs.install(level='DEBUG') # Create a graph of co-occurring people in posts copeople = nx.Graph()
from py4s import FourStore from rdflib.namespace import Namespace, RDF, RDFS from rdflib.term import URIRef, Literal, BNode from rdflib.graph import Graph XSD = Namespace("http://www.w3.org/2001/XMLSchema#") DC = Namespace("http://purl.org/dc/terms/") SCV = Namespace("http://purl.org/NET/scovo#") DOAP = Namespace("http://usefulinc.com/ns/doap#") FOAF = Namespace("http://xmlns.com/foaf/0.1/") OWL = Namespace("http://www.w3.org/2002/07/owl#") SKOS = Namespace("http://www.w3.org/2004/02/skos/core#") initNs = {"dc": DC, "rdf": RDF, "rdfs": RDFS, "scv": SCV, "doap": DOAP} def packages(cursor): q = "SELECT DISTINCT ?pkg WHERE { ?pkg a scv:Dataset }" g = Graph(identifier="http://semantic.ckan.net/packages") for pkg, in cursor.execute(q, initNs=initNs, soft_limit=-1): print pkg g.add((pkg, RDF.type, SCV["Dataset"])) cursor.delete_model(g.identifier) cursor.add_model(g) def tags(cursor): q = "SELECT DISTINCT ?tag WHERE { ?s doap:category ?tag }" g = Graph(identifier="http://semantic.ckan.net/tags") for tag, in cursor.execute(q, initNs=initNs, soft_limit=-1): print tag
class Eldis(object): database = 'eldis' token = 'c2ee7827-83de-4c99-b336-dbe73d340874' RDF = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#") RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#") OWL = Namespace("http://www.w3.org/2002/07/owl#") DC = Namespace("http://purl.org/dc/elements/1.1/") DCTERMS = Namespace("http://purl.org/dc/terms/") DBPEDIA = Namespace("http://dbpedia.org/ontology/") DBPROP = Namespace("http://dbpedia.org/property/") DBRES = Namespace("http://dbpedia.org/resource/") FAO = Namespace( "http://www.fao.org/countryprofiles/geoinfo/geopolitical/resource/") IATI = Namespace("http://tools.aidinfolabs.org/linked-iati/def/iati-1.01#") FOAF = Namespace("http://xmlns.com/foaf/0.1/") SKOS = Namespace("http://www.w3.org/2004/02/skos/core#") BIBO = Namespace("http://purl.org/ontology/bibo/") BASE = Namespace("http://linked-development.org/" + database + "/") def __init__(self, out_dir='/home/eldis/', data_url=None, loop=1): self.graph = Graph() self.graph.namespace_manager.bind('owl', self.OWL, override=False) self.graph.namespace_manager.bind('dc', self.DC, override=False) self.graph.namespace_manager.bind('dcterms', self.DCTERMS, override=False) self.graph.namespace_manager.bind('dbpedia', self.DBPEDIA, override=False) self.graph.namespace_manager.bind('dbprop', self.DBPROP, override=False) self.graph.namespace_manager.bind('dbres', self.DBRES, override=False) self.graph.namespace_manager.bind('fao', self.FAO, override=False) self.graph.namespace_manager.bind('iati', self.IATI, override=False) self.graph.namespace_manager.bind('foaf', self.FOAF, override=False) self.graph.namespace_manager.bind('skos', self.SKOS, override=False) self.graph.namespace_manager.bind('bibo', self.BIBO, override=False) self.graph.namespace_manager.bind('base', self.BASE, override=False) self.out_dir = out_dir if data_url: self.data_url = data_url else: contfile = open(outdir + 'nexturl', 'r') data_url = contfile.readline() contfile.close() self.loop = loop def dbpedia_url(self, string): string = string[0].upper() + string[1:].lower() string = string.replace(" ", "_") return string def fetch_data(self, data_url): req = urllib2.Request(data_url) req.add_header('Accept', 'application/json') req.add_header('Token-Guid', self.token) try: resp = urllib2.urlopen(req) content = json.loads(resp.read()) except Exception as inst: print inst print "ERROR fetching" + data_url return content # Replace [ and ] if they occur in the path, query or fragment def fix_iri(self, url): urlobj = urlparse(url) path = urlobj.path.replace('[', "%5B").replace(']', "%5D") query = urlobj.query.replace('[', "%5B").replace(']', "%5D") fragment = urlobj.fragment.replace('[', "%5B").replace(']', "%5D") return url.replace(urlobj.path, path).replace(urlobj.query, query).replace( urlobj.fragment, fragment) def build_graph(self): """ ok this is fun. using rdflib 3.4, none of these commands self.graph.remove((None,None,None)) self.grpah.close() self.graph = None self.graph = Graph() free the ram used, they all make empty graphs, so if we iterate over reading in files to graphs our memory usage spirals. on 2013/04/12 the memory usage for http://api.ids.ac.uk/openapi/"+eldis.database+"/get_all/documents/full in 1000 record chunks was 1.5G, if that memory is not available then the process is KILLED I cannot find a way to free this from inside python have looked at gc module, I suspect this may lie in some underlieing code. the current fix will to to write out to a file either a follow up url or 'finished', and take this as the input, and run a loop from outside this code to spawn a series of python processes so the memory is always freed when the process ends. file names have a datestamp in them because virtuoso by default does not import the same file twice. So without this updates will not be read. """ date = datetime.date.today().isoformat() print "Reading " + self.data_url content = self.fetch_data(self.data_url) try: for document in content['results']: uri = self.BASE['output/' + document['object_id'] + '/'] self.graph.add( (uri, self.DCTERMS['title'], Literal(document['title']))) try: self.graph.add((uri, self.DCTERMS['abstract'], Literal(document['description']))) except: pass self.graph.add( (uri, self.DCTERMS['type'], self.DCTERMS['Text'])) self.graph.add((uri, self.RDF['type'], self.BIBO['Article'])) self.graph.add((uri, self.DCTERMS['identifier'], URIRef(document['metadata_url']))) self.graph.add( (uri, self.DCTERMS['date'], Literal(document['publication_date'].replace(' ', 'T')))) self.graph.add((uri, self.DCTERMS['language'], Literal(document['language_name']))) self.graph.add((uri, self.RDFS['seeAlso'], URIRef(document['website_url'].replace( 'display&', 'display?')))) for author in document['author']: self.graph.add( (uri, self.DCTERMS['creator'], Literal(author))) try: for publisher in document['publisher_array']['Publisher']: puburi = self.BASE['organisation/' + publisher['object_id'] + '/'] self.graph.add( (uri, self.DCTERMS['publisher'], puburi)) self.graph.add((puburi, self.DCTERMS['title'], Literal(publisher['object_name']))) self.graph.add((puburi, self.FOAF['name'], Literal(publisher['object_name']))) self.graph.add((puburi, self.RDF['type'], self.DBPEDIA['Organisation'])) self.graph.add((puburi, self.RDF['type'], self.FAO['organization'])) self.graph.add((puburi, self.RDF['type'], self.FOAF['organization'])) # We could follow this URL to get more information on the organisation... self.graph.add((puburi, self.RDFS['seeAlso'], publisher['metadata_url'])) except: #This could be improved. Bridge and Eldis appear to differ on publisher values self.graph.add((uri, self.DCTERMS['publisher'], Literal(document['publisher']))) #ELDIS / BRIDGE Regions do not map onto FAO regions effectively. We could model containments in future... try: for region in document['category_region_array']['Region']: regionuri = self.BASE['regions/' + region['object_id'] + '/'] self.graph.add( (uri, self.DCTERMS['coverage'], regionuri)) self.graph.add((regionuri, self.RDFS['label'], Literal(region['object_name']))) except: pass try: for country in document['country_focus_array']['Country']: countryuri = self.BASE['countries/' + country['object_id'] + '/'] self.graph.add( (uri, self.DCTERMS['coverage'], countryuri)) self.graph.add((countryuri, self.RDFS['label'], Literal(country['object_name']))) self.graph.add( (countryuri, self.FAO['codeISO2'], Literal(country['iso_two_letter_code']))) self.graph.add((countryuri, self.RDFS['seeAlso'], URIRef(country['metadata_url']))) self.graph.add((countryuri, self.OWL['sameAs'], self.DBRES[country['object_name']])) self.graph.add((countryuri, self.OWL['sameAs'], self.FAO[country['object_name']])) except: pass try: for category in document['category_theme_array']['theme']: themeuri = self.BASE['themes/' + category['object_id'] + '/'] self.graph.add( (uri, self.DCTERMS['subject'], themeuri)) self.graph.add( (themeuri, self.RDF['type'], self.SKOS['Concept'])) self.graph.add((themeuri, self.RDFS['label'], Literal(category['object_name']))) self.graph.add((themeuri, self.RDFS['seeAlso'], URIRef(category['metadata_url']))) self.graph.add( (themeuri, self.OWL['sameAs'], self.dbpedia_url( self.DBRES[category['object_name']]))) except: pass try: for document_url in document['urls']: self.graph.add( (uri, self.BIBO['uri'], fix_iri(document_url))) except: pass rdf = open( self.out_dir + 'rdf/' + self.database + '-' + date + '-' + str(self.loop) + '.rdf', 'w') rdf.write(self.graph.serialize()) rdf.close() self.graph.remove((None, None, None)) contfile = open(self.out_dir + 'nexturl', 'w') try: if (content['metadata']['next_page']): contfile.write(content['metadata']['next_page']) print str( int(content['metadata']['total_results']) - int(content['metadata']['start_offset']) ) + " records remaining" #self.build_graph(content['metadata']['next_page'],n+1) else: print "Build complete" except: contfile.write("No more pages") print "No more pages" contfile.close() except Exception as inst: print inst print "Failed to read " + self.data_url
default_graph_uri = "http://rdflib.net/rdfstore" configString = "/var/tmp/rdfstore" # Get the Sleepycat plugin. store = plugin.get('Sleepycat', Store)('rdfstore') # Open previously created store, or create it if it doesn't exist yet graph = Graph(store="Sleepycat", identifier=URIRef(default_graph_uri)) path = mkdtemp() rt = graph.open(path, create=False) if rt == NO_STORE: # There is no underlying Sleepycat infrastructure, create it graph.open(path, create=True) else: assert rt == VALID_STORE, "The underlying store is corrupt" print "Triples in graph before add: ", len(graph) # Now we'll add some triples to the graph & commit the changes rdflib = Namespace('http://rdflib.net/test/') graph.bind("test", "http://rdflib.net/test/") graph.add((rdflib['pic:1'], rdflib['name'], Literal('Jane & Bob'))) graph.add((rdflib['pic:2'], rdflib['name'], Literal('Squirrel in Tree'))) graph.commit() print "Triples in graph after add: ", len(graph) # display the graph in RDF/XML print graph.serialize()
from datetime import datetime from rdflib import Graph, BNode, RDF, URIRef, OWL, RDFS from rdflib.namespace import Namespace, NamespaceManager from modules.filesystem import read_file, write_file __author__ = "Sascha KAUFMANN" __copyright__ = "Copyright 2018, NIE-INE" __credits__ = [] __license__ = "3-Clause BSD License" __version__ = "0.0.3" __maintainer__ = "Sascha KAUFMANN" __email__ = "*****@*****.**" __status__ = "Production" _KNORA_NS = Namespace("http://www.knora.org/ontology/") _KBO_NS = Namespace("http://www.knora.org/ontology/knora-base") KBO_NS = Namespace("http://www.knora.org/ontology/knora-base#") _DIRSEP = os.sep _TIMESTAMP = datetime.now().strftime('%Y-%m-%d %H:%M:%S') def is_shortcode(code): """ check it the given code is a shortcode :param code: :return: """ re_code = re.compile('[0-9A-F]{4}|shared') try:
def query(): """Example query: Returns all the triples in the application graph in a JSON format.""" q = " SELECT *" q += " WHERE{" q += " GRAPH <http://mu.semte.ch/application> {" q += " ?s ?p ?o" q += " }" q += " }" return flask.jsonify(helpers.query(q)) ################## ## Vocabularies ## ################## mu = Namespace('http://mu.semte.ch/vocabularies/') mu_core = Namespace('http://mu.semte.ch/vocabularies/core/') mu_ext = Namespace('http://mu.semte.ch/vocabularies/ext/') graph = os.environ.get('MU_APPLICATION_GRAPH') SERVICE_RESOURCE_BASE = 'http://mu.semte.ch/services/' ####################### ## Start Application ## ####################### if __name__ == '__main__': __builtin__.app = app __builtin__.helpers = helpers __builtin__.sparql_escape = sparql_escape app_file = os.environ.get('APP_ENTRYPOINT') f = open('/app/__init__.py', 'w+')
res.serialize(), ) except: if test.syntax: raise testers: Dict[Node, Callable[[RDFTest], None]] = { RDFT.TestTurtlePositiveSyntax: turtle, RDFT.TestTurtleNegativeSyntax: turtle, RDFT.TestTurtleEval: turtle, RDFT.TestTurtleNegativeEval: turtle, } NAMESPACE = Namespace("http://www.w3.org/2013/TurtleTests/manifest.ttl#") EXPECTED_FAILURES: Dict[URIRef, str] = {} if os.name == "nt": for test in ["literal_with_LINE_FEED", "turtle-subm-15", "turtle-subm-16"]: EXPECTED_FAILURES[NAMESPACE[ test]] = "Issue with nt parser and line endings on windows" @pytest.mark.parametrize( "rdf_test_uri, type, rdf_test", read_manifest("test/w3c/turtle/manifest.ttl"), ) def test_manifest(rdf_test_uri: URIRef, type: Node, rdf_test: RDFTest): if rdf_test_uri in EXPECTED_FAILURES: pytest.xfail(EXPECTED_FAILURES[rdf_test_uri])
from reporter import Reporter from rdflib import Graph, ConjunctiveGraph, URIRef, Literal from rdflib.namespace import RDF, Namespace, RDFS import csv from support import find_local_line_id from datetime import datetime import glob import collections context_path = "https://w3id.org/oc/corpus/context.json" repok = Reporter(True, prefix="[fix_prov.py: INFO] ") reperr = Reporter(True, prefix="[fix_prov.py: ERROR] ") repok.new_article() reperr.new_article() context_json = {} PROV = Namespace("http://www.w3.org/ns/prov#") OCO = Namespace("https://w3id.org/oc/ontology/") DCTERMS = Namespace("http://purl.org/dc/terms/") CITO = Namespace("http://purl.org/spar/cito/") DATACITE = Namespace("http://purl.org/spar/datacite/") FRBR = Namespace("http://purl.org/vocab/frbr/core#") LITERAL = Namespace("http://www.essepuntato.it/2010/06/literalreification/") def load(file_p, tmp_dir=None): errors = "" current_graph = ConjunctiveGraph() if tmp_dir is not None: file_path = tmp_dir + os.sep + "tmp_rdf_file.rdf" shutil.copyfile(file_p, file_path)
****** :Description: Example1 Ejemplos de RDFLIB """ from rdflib.namespace import RDF, RDFS, Namespace, FOAF, OWL from rdflib import Graph, BNode, Literal from pprint import pformat __author__ = 'bejar' g = Graph() n = Namespace('http://ejemplo.org/') p1 = n.persona1 v = Literal(22) g.add((p1, FOAF.age, v)) # g.serialize('a.rdf') for a, b, c in g: print(a, b, c) for a, b in g[p1]: print(a, b) t = g.triples((None, FOAF.age, Literal(22)))
""" Notation 3 (N3) RDF graph serializer for RDFLib. """ from rdflib.graph import Graph from rdflib.namespace import Namespace, OWL from rdflib.plugins.serializers.turtle import (TurtleSerializer, SUBJECT, VERB, OBJECT) __all__ = ['N3Serializer'] SWAP_LOG = Namespace("http://www.w3.org/2000/10/swap/log#") class N3Serializer(TurtleSerializer): short_name = "n3" def __init__(self, store, parent=None): super(N3Serializer, self).__init__(store) self.keywords.update({ OWL.sameAs: '=', SWAP_LOG.implies: '=>' }) self.parent = parent def reset(self): super(N3Serializer, self).reset() self._stores = {} def subjectDone(self, subject): super(N3Serializer, self).subjectDone(subject)
import nose from pylons import config from rdflib import Graph, URIRef, Literal from rdflib.namespace import Namespace, RDF from ckanext.iaest.processors import (RDFParser, RDFParserException, RDFProfileException, DEFAULT_RDF_PROFILES, RDF_PROFILES_CONFIG_OPTION) from ckanext.iaest.profiles import RDFProfile DCT = Namespace("http://purl.org/dc/terms/") DCAT = Namespace("http://www.w3.org/ns/dcat#") eq_ = nose.tools.eq_ def _default_graph(): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) g.add((dataset1, DCT.title, Literal('Test Dataset 1'))) distribution1_1 = URIRef("http://example.org/datasets/1/ds/1") g.add((distribution1_1, RDF.type, DCAT.Distribution)) distribution1_2 = URIRef("http://example.org/datasets/1/ds/2")
#!/usr/bin/python3 import odgi import rdflib import io from rdflib.namespace import RDF, RDFS, NamespaceManager, Namespace from rdflib.store import Store from rdflib.term import Literal from rdflib import Graph from rdflib import plugin from itertools import chain from spodgi.term import StepIriRef, NodeIriRef, StepBeginIriRef, StepEndIriRef VG = Namespace('http://biohackathon.org/resource/vg#') FALDO = Namespace('http://biohackathon.org/resource/faldo#') knownTypes = [ VG.Node, VG.Path, VG.Step, FALDO.Region, FALDO.ExactPosition, FALDO.Position ] knownPredicates = [ RDF.value, VG.rank, VG.position, VG.step, VG.path, VG.linksForwardToForward, VG.linksForwardToReverse, VG.linksReverseToForward, VG.linksReverseToReverse, VG.links, VG.reverseOfNode, VG.node, FALDO.begin, FALDO.end, FALDO.reference, FALDO.position ] nodeRelatedPredicates = [ VG.linksForwardToForward, VG.linksForwardToReverse, VG.linksReverseToForward, VG.linksReverseToReverse, VG.links, RDF.value ] stepAssociatedTypes = [
from rdflib.serializer import Serializer from rdflib.plugins.serializers.xmlwriter import XMLWriter from rdflib.term import URIRef, Literal, BNode from rdflib.namespace import Namespace from rdflib.graph import Graph, ConjunctiveGraph __all__ = ["TriXSerializer"] # TODO: Move this somewhere central TRIXNS = Namespace("http://www.w3.org/2004/03/trix/trix-1/") XMLNS = Namespace("http://www.w3.org/XML/1998/namespace") class TriXSerializer(Serializer): def __init__(self, store): super(TriXSerializer, self).__init__(store) if not store.context_aware: raise Exception( "TriX serialization only makes sense for context-aware stores") def serialize(self, stream, base=None, encoding=None, **args): nm = self.store.namespace_manager self.writer = XMLWriter(stream, nm, encoding, extra_ns={"": TRIXNS}) self.writer.push(TRIXNS[u"TriX"]) # if base is given here, use that, if not and a base is set for the graph use that if base is None and self.store.base is not None:
# -*- encoding: utf-8 -*- import urllib import urllib2 from rdflib import Graph from rdflib.namespace import Namespace, FOAF, DC, XSD, RDF, RDFS from django.conf import settings BIBO = Namespace('http://purl.org/ontology/bibo/') DCTERMS = Namespace('http://purl.org/dc/terms/') GEONAMES = Namespace('http://www.geonames.org/ontology#') MUTO = Namespace('http://purl.org/muto/core#') PLACES = Namespace('http://purl.org/ontology/places#') SWRC = Namespace('http://swrc.ontoware.org/ontology#') SWRCFE = Namespace('http://www.morelab.deusto.es/ontologies/swrcfe#') def create_namespaced_graph(): graph = Graph() graph.bind('bibo', BIBO) graph.bind('dc', DC) graph.bind('dcterms', DCTERMS) graph.bind('foaf', FOAF) graph.bind('geonames', GEONAMES) graph.bind('muto', MUTO) graph.bind('places', PLACES) graph.bind('rdf', RDF) graph.bind('rdfs', RDFS)