Exemple #1
0
def RadiomicsRDF(featureVector, patientID, myStructUID, ROI):

    #&&&&&&&&&&&&&&&&&&& reading ORAW universal template yaml file &&&&&&&&&&&&&&&&&&&&&&
    with open(
            os.path.join(os.getcwd(), 'ParamsSettings',
                         'ORAW_UniversalTemplate.yaml')) as data:
        try:
            Utemplate = yaml.safe_load(data)
        except yaml.YAMLError as exc:
            print(exc)
    # ----------------------------------For PyRadiomics ---------------------------------
    # Mapping O-RAW setting to universal template. Some are already there, but some are needed to update
    Utemplate['General']['Software']['name'] = 'PyRadiomics'
    Utemplate['General']['Software']['version'] = featureVector[
        'diagnostics_Versions_PyRadiomics']
    Utemplate['General']['Software']['programminglanguage'] = 'Python'
    Utemplate['ImageProcessing']['Processing'] = featureVector[
        'diagnostics_Configuration_Settings']['resampledPixelSpacing']
    Utemplate['ROISegmentation']['ROIType'] = ROI

    Utemplate['Interpolation']['ImageInterplationMethod'] = featureVector[
        'diagnostics_Configuration_Settings']['interpolator']
    Utemplate['ROIResegmentation']['ResegmentRange'] = featureVector[
        'diagnostics_Configuration_Settings']['resegmentRange']
    Utemplate['ROIResegmentation']['ResegmentMode'] = featureVector[
        'diagnostics_Configuration_Settings']['resegmentMode']

    # ----------------------------------------------------- ---------------------------------

    # def ToRDF(featureVector,exportDir,patientID,myStructUID,ROI,export_format,export_name):
    graph = Graph()  # Create a rdflib graph object
    # feature_name = [] # Create a list for features
    # feature_uri = [] # Create a list for feature uri (ontology)

    # Namespaces used in O-RAW
    ro = Namespace('http://www.radiomics.org/RO/')
    roo = Namespace('http://www.cancerdata.org/roo/')
    IAO = Namespace('http://purl.obolibary.org/obo/IAO_')
    SWO = Namespace('http://www.ebi.ac.uk/swo/SWO_')
    NCIT = Namespace('http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#')
    # Adding namespace to graph space
    graph.bind('ro', ro)
    graph.bind('roo', roo)
    graph.bind('IAO', IAO)
    graph.bind('SWO', SWO)
    graph.bind('NCIT', NCIT)
    # ------------------------- URI of related entities -----------------
    # ^^^^^^^^^^^^^^^^^^^^^^^^^ Level-1 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    patient_uri = URIRef(NCIT + 'C16960')
    has_pacs_study = URIRef(roo + '100284')  # patient has_pacs_study scan
    scan_uri = URIRef(NCIT + 'C17999')
    converted_to = URIRef(ro + '0310')  # scan converted_to image_volume
    has_imaging_modality = URIRef(
        ro + 'P02928312341')  # scan has_imaging_modality CT, PET, MR
    image_volume_uri = URIRef(ro + '0271')
    is_part_of = URIRef(ro + '0298')  # image_volume is_part_of image_space
    has_processing = URIRef(ro +
                            'P00080')  # image_volume has_processing method
    has_voxel_dimension = URIRef(
        ro + 'P00118')  # image_volume has_has_voxel_dimension voxel_size
    has_voxel_dimensionx = URIRef(
        ro + 'P00118')  # image_volume has_has_voxel_dimension voxel_size
    has_voxel_dimensiony = URIRef(
        ro + 'P00123')  # image_volume has_has_voxel_dimension voxel_size
    has_voxel_dimensionz = URIRef(
        ro + 'P00149')  # image_volume has_has_voxel_dimension voxel_size
    image_space_uri = URIRef(ro + '0225')
    # ROImask_uri = URIRef(roo + '0272') # ROImask is_part_of image_space
    is_label_of = URIRef(ro + 'P00190')  #  GTV/... is_label_of ROImask
    has_label = URIRef(ro + 'P00051')
    has_segmentation_method = URIRef(ro + 'P00092')
    # GTV_uri = URIRef(roo + '100006')
    used_to_compute = URIRef(
        ro + '0296')  # image_space used_to_compute RadiomicsFeature
    # tempral
    has_property = URIRef(roo + '100212')

    # ^^^^^^^^^^^^^^^^^^^^^^^^^ Level-2 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    mm_uri = URIRef(ro + 'I0020')
    mm2_uri = URIRef(ro + 'I0027')
    mm3_uri = URIRef(ro + 'I0011')
    has_value = URIRef(ro + '010191')  # RadiomicsFeature has_value
    has_unit = URIRef(ro + '010198')  # RadiomicsFeature has_unit

    # ^^^^^^^^^^^^^^^^^^^^^^^^^ Level-3 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    computed_using = URIRef(
        ro + 'P00002')  # RadiomicsFeature computed_using calculationrun_space
    calculationrun_space_uri = URIRef(ro + '0297')
    # run_on = URIRef(ro + '00000002') # calclulationrun run_on datetime
    at_date_time = URIRef(roo + '100041')
    performed_by = URIRef(
        ro +
        '0283')  # calculationrun_space performed_by softwareproperties_uri
    softwareproperties_uri = URIRef(
        ro + '010215')  # software has_label literal(SoftwareProperties)
    has_programming_language = URIRef(
        ro +
        '0010195')  # software has_programming_language programminglanguage
    # programminglanguage_uri = URIRef(IAO + '0000025')
    # python_uri = URIRef(SWO + '000018')
    has_version = URIRef(ro + '0010192')  # software has_version
    # version_uri = URIRef(ro + '010166')

    # ^^^^^^^^^^^^^^^^^^^^^^^^^ Level-4 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    featureparameterspace_uri = URIRef(ro + '001000')
    defined_by = URIRef(ro +
                        'P000009')  # featureparameterspace defined_by settings

    # filterproperties_uri = URIRef(roo + '0255') # has_value wavelet right/not?
    # aggregationparameters = URIRef(roo + '0218')
    # discretizationparameters = URIRef(roo + '0214')
    # featureSpecificparameters = URIRef(roo + '0215')
    # interpolationparameters = URIRef(roo + '0217')
    # reSegmentationparameters = URIRef(roo + '0216')
    # -------------- localhost URIs ---------------------------
    localhost_patient = 'http://localhost/data/patient_'
    localhost_scan = 'http://localhost/data/scan_'
    localhost_imagevolume = 'http://localhost/data/imagevolume_'
    localhost_imagespace = 'http://localhost/data/imagespace_'
    localhost_ROI = 'http://localhost/data/ROI_'
    localhost_feature = 'http://localhost/data/feature_'
    localhost_featureparameter = 'http://localhost/data/localhost_featureparameter_'
    #-----------------------
    localhost_mm = 'http://localhost/data/mm'
    localhost_mm2 = 'http://localhost/data/mm2'
    localhost_mm3 = 'http://localhost/data/mm3'

    # ---------------------- info from yaml file ------------------
    # O-RAW_config:
    #   export_format: rdf
    #   export_name: ORAW_RDF_test
    # General
    RDF_General_ImageAcquisition_ImagingModality = Literal(
        Utemplate['General']['ImageAcquisition']['ImagingModality'])
    RDF_General_VolumetricAnalysis = Literal(
        Utemplate['General']['VolumetricAnalysis'])
    RDF_General_WorkflowStructure = Literal(
        Utemplate['General']['WorkflowStructure'])
    RDF_General_Software_name = Literal(
        Utemplate['General']['Software']['name'])
    RDF_General_Software_version = Literal(
        Utemplate['General']['Software']['version'])
    RDF_General_Software_programminglanguage = Literal(
        Utemplate['General']['Software']['programminglanguage'])
    # ImageProcessing
    RDF_ImageProcessing_Conversion = Literal(
        Utemplate['ImageProcessing']['Conversion'])
    RDF_ImageProcessing_Processing = Literal(
        Utemplate['ImageProcessing']['Processing'])
    # ROISegmentation
    RDF_ROISegmentation_SegmentationMethod = Literal(
        Utemplate['ROISegmentation']['SegmentationMethod'])
    RDF_ROISegmentation_ROIType = Literal(
        Utemplate['ROISegmentation']['ROIType'])
    # Interpolation
    RDF_Interpolation_VoxelDimensions = Literal(
        Utemplate['Interpolation']['VoxelDimensions'])
    RDF_Interpolation_ImageInterplationMethod = Literal(
        Utemplate['Interpolation']['ImageInterplationMethod'])
    RDF_Interpolation_IntensityRounding = Literal(
        Utemplate['Interpolation']['IntensityRounding'])
    RDF_Interpolation_ROIInterplationMethod = Literal(
        Utemplate['Interpolation']['ROIInterplationMethod'])
    RDF_Interpolation_ROIPartialVolume = Literal(
        Utemplate['Interpolation']['ROIPartialVolume'])
    # ROIResegmentation
    RDF_ROIResegmentation_ResegmentRange = Literal(
        Utemplate['ROIResegmentation']['ResegmentRange'])
    RDF_ROIResegmentation_ResegmentMode = Literal(
        Utemplate['ROIResegmentation']['ResegmentMode'])
    # ImageDiscretization
    # RDF_ImageDiscretization_DiscretizationMethod = Literal(Utemplate['ImageDiscretization']['DiscretizationMethod'])
    # RDF_ImageDiscretization_DiscretizationParameters = Literal(Utemplate['ImageDiscretization']['DiscretizationParameters'])

    #------------------------RDF entities--------------------------------------------------
    RDF_patid = URIRef(localhost_patient + patientID)
    RDF_scan = URIRef(localhost_scan + myStructUID)
    RDF_imagevolume = URIRef(localhost_imagevolume + myStructUID + '_' +
                             urllib.parse.quote(RDF_ROISegmentation_ROIType))
    RDF_imagespace = URIRef(localhost_imagespace + myStructUID + '_' +
                            urllib.parse.quote(RDF_ROISegmentation_ROIType))
    RDF_featureparameter = URIRef(
        localhost_featureparameter + myStructUID + '_' +
        urllib.parse.quote(RDF_ROISegmentation_ROIType))
    RDF_ROI = URIRef(localhost_ROI + myStructUID + '_' +
                     urllib.parse.quote(RDF_ROISegmentation_ROIType))
    RDF_Datetime = Literal(
        datetime.now().strftime("%Y-%m-%d"))  # run at_date_time
    RDF_mm = URIRef(localhost_mm)
    RDF_mm2 = URIRef(localhost_mm2)
    RDF_mm3 = URIRef(localhost_mm3)
    #------------------------read Radiomics Ontology Table---------------------------------
    df_RO = pd.read_csv(
        os.path.join(os.getcwd(), 'RadiomicsOntology', 'ORAW_RO_Table.csv'))

    #extract feature keys and values from featureVector cumputed by pyradiomcis
    f_key = list(featureVector.keys())
    f_value = list(featureVector.values())
    # # remove columns with general info from pyradiomics results
    f_index = []
    for i in range(len(f_key)):
        if 'diagnostics' not in f_key[
                i]:  # filter out 'general_info' from featureVector
            f_index.append(i)
    radiomics_key = []
    radiomics_value = []
    for j in f_index:
        radiomics_key.append(f_key[j])
        radiomics_value.append(f_value[j])

    #-----------------Adding elements to graph --------------------------------------------
    for i in range(len(radiomics_key)
                   ):  # -3 means filter out patientid, RTid, and countour
        try:
            ImageFilterSpace = Utemplate['FeatureCalculation'][
                'FeatureParameter']['ImageFilterSpace']
            for j in range(len(ImageFilterSpace)):
                imagetype = ImageFilterSpace[j]
                if imagetype.lower() in radiomics_key[i]:
                    RDF_ImageFilterSpace = Literal(
                        Utemplate['FeatureCalculation']['FeatureParameter']
                        ['ImageFilterSpace'][j])
                    RDF_ImageFilterMethod = Literal(
                        Utemplate['FeatureCalculation']['FeatureParameter']
                        ['DiscretizationMethod'][j])
                    # RDF_ImageFilterDiscretizationParameters = Literal(Utemplate['FeatureCalculation']['FeatureParameter']['DiscretizationParameters'])
        except:
            print(
                'radiomic features do not match the used filter method, please check the Universal Template and Radiomics Table !!!'
            )

        if 'original' in radiomics_key[i]:
            radiomics_feature = radiomics_key[i][9:]
        elif 'log' in radiomics_key[i]:
            radiomics_feature = radiomics_key[i][20:]
        elif 'wavelet' in radiomics_key[i]:
            radiomics_feature = radiomics_key[i][12:]
        else:
            radiomics_feature = radiomics_feature

        ## --------------------------------------------------
        ind = pd.Index(df_RO.iloc[:, 0]).get_loc(radiomics_feature)
        tmp_ROcode = df_RO.iloc[:, 1][ind]
        tmp_uri = URIRef(tmp_ROcode)
        tmp_value = Literal(radiomics_value[i])  # radiomics_feature
        #---------------------------------RDF entity for feature
        RDF_feature = URIRef(localhost_feature + myStructUID + '_' +
                             urllib.parse.quote(RDF_ROISegmentation_ROIType) +
                             '_' + radiomics_key[i])
        RDF_featureparameterspace = URIRef(featureparameterspace_uri + '_' +
                                           radiomics_key[i])

        # ----------------------------------------------------
        # start adding
        # ------------ patient layer ---------------
        graph.add((RDF_patid, RDF.type, patient_uri))
        graph.add((RDF_patid, has_pacs_study, RDF_scan))
        # ------------ scan layer -----------------
        graph.add((RDF_scan, RDF.type, scan_uri))
        graph.add((RDF_scan, converted_to, RDF_imagevolume))
        graph.add((RDF_scan, has_imaging_modality,
                   RDF_General_ImageAcquisition_ImagingModality))
        # ------------ image_volume layer ---------
        graph.add((RDF_imagevolume, RDF.type, image_volume_uri))
        graph.add(
            (RDF_imagevolume, has_processing, RDF_ImageProcessing_Processing))
        graph.add(
            (RDF_imagevolume, has_property, RDF_ImageProcessing_Conversion))
        graph.add((RDF_imagevolume, has_voxel_dimension,
                   RDF_Interpolation_VoxelDimensions))
        graph.add((RDF_imagevolume, is_part_of, RDF_imagespace))
        # ------------ image_space layer ------------
        graph.add((RDF_imagespace, RDF.type, image_space_uri))
        graph.add((RDF_imagespace, used_to_compute, RDF_feature))
        # ------------ ROI mask layer ---------------
        graph.add((RDF_ROI, is_part_of, RDF_imagespace))
        graph.add((RDF_ROI, has_label, RDF_ROISegmentation_ROIType))
        graph.add((RDF_ROI, has_segmentation_method,
                   RDF_ROISegmentation_SegmentationMethod))
        graph.add(
            (RDF_ROI, has_property, RDF_Interpolation_ROIInterplationMethod))
        graph.add((RDF_ROI, has_property, RDF_Interpolation_ROIPartialVolume))
        # ------------ feature layer ----------------
        graph.add((RDF_feature, RDF.type, tmp_uri))
        graph.add((RDF_feature, has_value, tmp_value))
        # ------------ calculatin run layer ---------
        graph.add((RDF_feature, computed_using, calculationrun_space_uri))
        graph.add(
            (calculationrun_space_uri, performed_by, softwareproperties_uri))
        ### missing ontology of at_date_time --------
        graph.add((calculationrun_space_uri, at_date_time, RDF_Datetime))
        graph.add((softwareproperties_uri, has_programming_language,
                   RDF_General_Software_programminglanguage))
        graph.add((softwareproperties_uri, has_version,
                   RDF_General_Software_version))
        graph.add(
            (softwareproperties_uri, has_property, RDF_General_Software_name))
        # ------------feature parameter layer--------
        graph.add((RDF_feature, computed_using, RDF_featureparameterspace))
        graph.add(
            (RDF_featureparameterspace, defined_by, RDF_featureparameter))
        graph.add((RDF_featureparameterspace, defined_by,
                   RDF_ROIResegmentation_ResegmentRange))
        graph.add((RDF_featureparameterspace, defined_by,
                   RDF_ROIResegmentation_ResegmentMode))
        # graph.add((RDF_featureparameterspace,defined_by,RDF_ImageDiscretization_DiscretizationMethod))
        # graph.add((RDF_featureparameterspace,defined_by,RDF_ImageDiscretization_DiscretizationParameters))
        graph.add((RDF_featureparameterspace, defined_by,
                   RDF_Interpolation_ImageInterplationMethod))
        graph.add((RDF_featureparameterspace, defined_by,
                   RDF_Interpolation_IntensityRounding))
        graph.add(
            (RDF_featureparameterspace, defined_by, RDF_ImageFilterSpace))
        graph.add(
            (RDF_featureparameterspace, defined_by, RDF_ImageFilterMethod))
        # graph.add((RDF_featureparameterspace,defined_by,RDF_ImageFilterDiscretizationParameters))

        # ----------- add unit to feature, if it has ------------------
        if tmp_ROcode == 'www.radiomics.org/RO/RNU0':
            graph.add((RDF_feature, has_unit, RDF_mm3))
            graph.add((RDF_mm3, RDF.type, mm3_uri))
        if tmp_ROcode == 'www.radiomics.org/RO/C0JK':
            graph.add((RDF_feature, has_unit, RDF_mm2))
            graph.add((RDF_mm2, RDF.type, mm2_uri))
        # if tmp_ROcode == 'original_shape_LeastAxis':
        # 	graph.add((RDF_feature,has_unit,RDF_mm))
        # 	graph.add((RDF_mm,RDF.type,mm_uri))
        # if tmp_ROcode == 'original_shape_MajorAxis':
        # 	graph.add((RDF_feature,has_unit,RDF_mm))
        # 	graph.add((RDF_mm,RDF.type,mm_uri))
        if tmp_ROcode == 'www.radiomics.org/RO/2150':
            graph.add((RDF_feature, has_unit, RDF_mm))
            graph.add((RDF_mm, RDF.type, mm_uri))
        if tmp_ROcode == 'www.radiomics.org/RO/2140':
            graph.add((RDF_feature, has_unit, RDF_mm))
            graph.add((RDF_mm, RDF.type, mm_uri))
        if tmp_ROcode == 'www.radiomics.org/RO/2130':
            graph.add((RDF_feature, has_unit, RDF_mm))
            graph.add((RDF_mm, RDF.type, mm_uri))
        if tmp_ROcode == 'www.radiomics.org/RO/L0JK':
            graph.add((RDF_feature, has_unit, RDF_mm))
            graph.add((RDF_mm, RDF.type, mm_uri))
        # if tmp_ROcode == 'original_shape_MinorAxis':
        # 	graph.add((RDF_feature,has_unit,RDF_mm))
        # 	graph.add((RDF_mm,RDF.type,mm_uri))
    return graph
Exemple #2
0
from lxml import html
from rdflib.namespace import Namespace
from rdflib import Graph, BNode, RDF, RDFS, URIRef, Literal, XSD
import hashlib
import os.path
import pickle
from SPARQLWrapper import SPARQLWrapper, JSON
import csv
import coloredlogs, logging
import networkx as nx
from networkx.readwrite import json_graph
import sys
from itertools import combinations

NAMESPACES = {
    'schema': Namespace('http://schema.org/'),
    'dcterms': Namespace('http://purl.org/dc/terms/'),
    'wdt': Namespace('http://www.wikidata.org/prop/direct/'),
    'wd': Namespace('http://www.wikidata.org/entity/')
}

g = Graph()
g.load("./data/poit.rdf")

# Set up colored logging!
logger = logging.getLogger("poitlab")
coloredlogs.install(level='DEBUG')

# Create a graph of co-occurring people in posts
copeople = nx.Graph()
Exemple #3
0
from py4s import FourStore
from rdflib.namespace import Namespace, RDF, RDFS
from rdflib.term import URIRef, Literal, BNode
from rdflib.graph import Graph

XSD = Namespace("http://www.w3.org/2001/XMLSchema#")
DC = Namespace("http://purl.org/dc/terms/")
SCV = Namespace("http://purl.org/NET/scovo#")
DOAP = Namespace("http://usefulinc.com/ns/doap#")
FOAF = Namespace("http://xmlns.com/foaf/0.1/")
OWL = Namespace("http://www.w3.org/2002/07/owl#")
SKOS = Namespace("http://www.w3.org/2004/02/skos/core#")

initNs = {"dc": DC, "rdf": RDF, "rdfs": RDFS, "scv": SCV, "doap": DOAP}


def packages(cursor):
    q = "SELECT DISTINCT ?pkg WHERE { ?pkg a scv:Dataset }"
    g = Graph(identifier="http://semantic.ckan.net/packages")
    for pkg, in cursor.execute(q, initNs=initNs, soft_limit=-1):
        print pkg
        g.add((pkg, RDF.type, SCV["Dataset"]))
    cursor.delete_model(g.identifier)
    cursor.add_model(g)


def tags(cursor):
    q = "SELECT DISTINCT ?tag WHERE { ?s doap:category ?tag }"
    g = Graph(identifier="http://semantic.ckan.net/tags")
    for tag, in cursor.execute(q, initNs=initNs, soft_limit=-1):
        print tag
class Eldis(object):

    database = 'eldis'
    token = 'c2ee7827-83de-4c99-b336-dbe73d340874'

    RDF = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
    RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#")
    OWL = Namespace("http://www.w3.org/2002/07/owl#")
    DC = Namespace("http://purl.org/dc/elements/1.1/")
    DCTERMS = Namespace("http://purl.org/dc/terms/")
    DBPEDIA = Namespace("http://dbpedia.org/ontology/")
    DBPROP = Namespace("http://dbpedia.org/property/")
    DBRES = Namespace("http://dbpedia.org/resource/")
    FAO = Namespace(
        "http://www.fao.org/countryprofiles/geoinfo/geopolitical/resource/")
    IATI = Namespace("http://tools.aidinfolabs.org/linked-iati/def/iati-1.01#")
    FOAF = Namespace("http://xmlns.com/foaf/0.1/")
    SKOS = Namespace("http://www.w3.org/2004/02/skos/core#")
    BIBO = Namespace("http://purl.org/ontology/bibo/")
    BASE = Namespace("http://linked-development.org/" + database + "/")

    def __init__(self, out_dir='/home/eldis/', data_url=None, loop=1):
        self.graph = Graph()
        self.graph.namespace_manager.bind('owl', self.OWL, override=False)
        self.graph.namespace_manager.bind('dc', self.DC, override=False)
        self.graph.namespace_manager.bind('dcterms',
                                          self.DCTERMS,
                                          override=False)
        self.graph.namespace_manager.bind('dbpedia',
                                          self.DBPEDIA,
                                          override=False)
        self.graph.namespace_manager.bind('dbprop',
                                          self.DBPROP,
                                          override=False)
        self.graph.namespace_manager.bind('dbres', self.DBRES, override=False)
        self.graph.namespace_manager.bind('fao', self.FAO, override=False)
        self.graph.namespace_manager.bind('iati', self.IATI, override=False)
        self.graph.namespace_manager.bind('foaf', self.FOAF, override=False)
        self.graph.namespace_manager.bind('skos', self.SKOS, override=False)
        self.graph.namespace_manager.bind('bibo', self.BIBO, override=False)
        self.graph.namespace_manager.bind('base', self.BASE, override=False)
        self.out_dir = out_dir
        if data_url:
            self.data_url = data_url
        else:
            contfile = open(outdir + 'nexturl', 'r')
            data_url = contfile.readline()
            contfile.close()
        self.loop = loop

    def dbpedia_url(self, string):
        string = string[0].upper() + string[1:].lower()
        string = string.replace(" ", "_")
        return string

    def fetch_data(self, data_url):
        req = urllib2.Request(data_url)
        req.add_header('Accept', 'application/json')
        req.add_header('Token-Guid', self.token)
        try:
            resp = urllib2.urlopen(req)
            content = json.loads(resp.read())
        except Exception as inst:
            print inst
            print "ERROR fetching" + data_url
        return content

    # Replace [ and ] if they occur in the path, query or fragment
    def fix_iri(self, url):
        urlobj = urlparse(url)
        path = urlobj.path.replace('[', "%5B").replace(']', "%5D")
        query = urlobj.query.replace('[', "%5B").replace(']', "%5D")
        fragment = urlobj.fragment.replace('[', "%5B").replace(']', "%5D")
        return url.replace(urlobj.path,
                           path).replace(urlobj.query, query).replace(
                               urlobj.fragment, fragment)

    def build_graph(self):
        """
        ok this is fun.
        using rdflib 3.4, none of these commands
        self.graph.remove((None,None,None))
        self.grpah.close()
        self.graph = None
        self.graph = Graph()
        
        free the ram used, they all make empty graphs, so if we iterate
        over reading in files to graphs our memory usage spirals. on 2013/04/12 
        the memory usage for  http://api.ids.ac.uk/openapi/"+eldis.database+"/get_all/documents/full
        in 1000 record chunks was 1.5G, if that memory is not available then the process is KILLED
        
        I cannot find a way to free this from inside python have looked at gc module, I suspect this
        may lie in some underlieing code.
        
        the current fix will to to write out to a file either a follow up url or 'finished', 
        and take this as the input, and run a loop from outside this code to spawn a series
        of python processes so the memory is always freed when the process ends.
        
        file names have a datestamp in them because virtuoso by default does not import the same
        file twice. So without this updates will not be read.
        
        """
        date = datetime.date.today().isoformat()
        print "Reading " + self.data_url
        content = self.fetch_data(self.data_url)
        try:
            for document in content['results']:
                uri = self.BASE['output/' + document['object_id'] + '/']
                self.graph.add(
                    (uri, self.DCTERMS['title'], Literal(document['title'])))
                try:
                    self.graph.add((uri, self.DCTERMS['abstract'],
                                    Literal(document['description'])))
                except:
                    pass
                self.graph.add(
                    (uri, self.DCTERMS['type'], self.DCTERMS['Text']))
                self.graph.add((uri, self.RDF['type'], self.BIBO['Article']))
                self.graph.add((uri, self.DCTERMS['identifier'],
                                URIRef(document['metadata_url'])))
                self.graph.add(
                    (uri, self.DCTERMS['date'],
                     Literal(document['publication_date'].replace(' ', 'T'))))
                self.graph.add((uri, self.DCTERMS['language'],
                                Literal(document['language_name'])))
                self.graph.add((uri, self.RDFS['seeAlso'],
                                URIRef(document['website_url'].replace(
                                    'display&', 'display?'))))

                for author in document['author']:
                    self.graph.add(
                        (uri, self.DCTERMS['creator'], Literal(author)))

                try:
                    for publisher in document['publisher_array']['Publisher']:
                        puburi = self.BASE['organisation/' +
                                           publisher['object_id'] + '/']
                        self.graph.add(
                            (uri, self.DCTERMS['publisher'], puburi))
                        self.graph.add((puburi, self.DCTERMS['title'],
                                        Literal(publisher['object_name'])))
                        self.graph.add((puburi, self.FOAF['name'],
                                        Literal(publisher['object_name'])))
                        self.graph.add((puburi, self.RDF['type'],
                                        self.DBPEDIA['Organisation']))
                        self.graph.add((puburi, self.RDF['type'],
                                        self.FAO['organization']))
                        self.graph.add((puburi, self.RDF['type'],
                                        self.FOAF['organization']))
                        # We could follow this URL to get more information on the organisation...
                        self.graph.add((puburi, self.RDFS['seeAlso'],
                                        publisher['metadata_url']))
                except:
                    #This could be improved. Bridge and Eldis appear to differ on publisher values
                    self.graph.add((uri, self.DCTERMS['publisher'],
                                    Literal(document['publisher'])))

                #ELDIS / BRIDGE Regions do not map onto FAO regions effectively. We could model containments in future...
                try:
                    for region in document['category_region_array']['Region']:
                        regionuri = self.BASE['regions/' +
                                              region['object_id'] + '/']
                        self.graph.add(
                            (uri, self.DCTERMS['coverage'], regionuri))
                        self.graph.add((regionuri, self.RDFS['label'],
                                        Literal(region['object_name'])))
                except:
                    pass

                try:
                    for country in document['country_focus_array']['Country']:
                        countryuri = self.BASE['countries/' +
                                               country['object_id'] + '/']
                        self.graph.add(
                            (uri, self.DCTERMS['coverage'], countryuri))
                        self.graph.add((countryuri, self.RDFS['label'],
                                        Literal(country['object_name'])))
                        self.graph.add(
                            (countryuri, self.FAO['codeISO2'],
                             Literal(country['iso_two_letter_code'])))
                        self.graph.add((countryuri, self.RDFS['seeAlso'],
                                        URIRef(country['metadata_url'])))
                        self.graph.add((countryuri, self.OWL['sameAs'],
                                        self.DBRES[country['object_name']]))
                        self.graph.add((countryuri, self.OWL['sameAs'],
                                        self.FAO[country['object_name']]))
                except:
                    pass

                try:
                    for category in document['category_theme_array']['theme']:
                        themeuri = self.BASE['themes/' +
                                             category['object_id'] + '/']
                        self.graph.add(
                            (uri, self.DCTERMS['subject'], themeuri))
                        self.graph.add(
                            (themeuri, self.RDF['type'], self.SKOS['Concept']))
                        self.graph.add((themeuri, self.RDFS['label'],
                                        Literal(category['object_name'])))
                        self.graph.add((themeuri, self.RDFS['seeAlso'],
                                        URIRef(category['metadata_url'])))
                        self.graph.add(
                            (themeuri, self.OWL['sameAs'],
                             self.dbpedia_url(
                                 self.DBRES[category['object_name']])))
                except:
                    pass

                try:
                    for document_url in document['urls']:
                        self.graph.add(
                            (uri, self.BIBO['uri'], fix_iri(document_url)))
                except:
                    pass
            rdf = open(
                self.out_dir + 'rdf/' + self.database + '-' + date + '-' +
                str(self.loop) + '.rdf', 'w')
            rdf.write(self.graph.serialize())
            rdf.close()

            self.graph.remove((None, None, None))

            contfile = open(self.out_dir + 'nexturl', 'w')
            try:
                if (content['metadata']['next_page']):
                    contfile.write(content['metadata']['next_page'])
                    print str(
                        int(content['metadata']['total_results']) -
                        int(content['metadata']['start_offset'])
                    ) + " records remaining"
                    #self.build_graph(content['metadata']['next_page'],n+1)
                else:
                    print "Build complete"
            except:
                contfile.write("No more pages")
                print "No more pages"
            contfile.close()
        except Exception as inst:
            print inst
            print "Failed to read " + self.data_url
Exemple #5
0
default_graph_uri = "http://rdflib.net/rdfstore"
configString = "/var/tmp/rdfstore"

# Get the Sleepycat plugin.
store = plugin.get('Sleepycat', Store)('rdfstore')

# Open previously created store, or create it if it doesn't exist yet
graph = Graph(store="Sleepycat", identifier=URIRef(default_graph_uri))
path = mkdtemp()
rt = graph.open(path, create=False)
if rt == NO_STORE:
    # There is no underlying Sleepycat infrastructure, create it
    graph.open(path, create=True)
else:
    assert rt == VALID_STORE, "The underlying store is corrupt"

print "Triples in graph before add: ", len(graph)

# Now we'll add some triples to the graph & commit the changes
rdflib = Namespace('http://rdflib.net/test/')
graph.bind("test", "http://rdflib.net/test/")

graph.add((rdflib['pic:1'], rdflib['name'], Literal('Jane & Bob')))
graph.add((rdflib['pic:2'], rdflib['name'], Literal('Squirrel in Tree')))
graph.commit()

print "Triples in graph after add: ", len(graph)

# display the graph in RDF/XML
print graph.serialize()
Exemple #6
0
from datetime import datetime
from rdflib import Graph, BNode, RDF, URIRef, OWL, RDFS
from rdflib.namespace import Namespace, NamespaceManager

from modules.filesystem import read_file, write_file

__author__ = "Sascha KAUFMANN"
__copyright__ = "Copyright 2018, NIE-INE"
__credits__ = []
__license__ = "3-Clause BSD License"
__version__ = "0.0.3"
__maintainer__ = "Sascha KAUFMANN"
__email__ = "*****@*****.**"
__status__ = "Production"

_KNORA_NS = Namespace("http://www.knora.org/ontology/")
_KBO_NS = Namespace("http://www.knora.org/ontology/knora-base")
KBO_NS = Namespace("http://www.knora.org/ontology/knora-base#")
_DIRSEP = os.sep
_TIMESTAMP = datetime.now().strftime('%Y-%m-%d %H:%M:%S')


def is_shortcode(code):
    """
    check it the given code is a shortcode

    :param code:
    :return:
    """
    re_code = re.compile('[0-9A-F]{4}|shared')
    try:
Exemple #7
0
def query():
    """Example query: Returns all the triples in the application graph in a JSON
    format."""
    q = " SELECT *"
    q += " WHERE{"
    q += "   GRAPH <http://mu.semte.ch/application> {"
    q += "     ?s ?p ?o"
    q += "   }"
    q += " }"
    return flask.jsonify(helpers.query(q))


##################
## Vocabularies ##
##################
mu = Namespace('http://mu.semte.ch/vocabularies/')
mu_core = Namespace('http://mu.semte.ch/vocabularies/core/')
mu_ext = Namespace('http://mu.semte.ch/vocabularies/ext/')

graph = os.environ.get('MU_APPLICATION_GRAPH')
SERVICE_RESOURCE_BASE = 'http://mu.semte.ch/services/'

#######################
## Start Application ##
#######################
if __name__ == '__main__':
    __builtin__.app = app
    __builtin__.helpers = helpers
    __builtin__.sparql_escape = sparql_escape
    app_file = os.environ.get('APP_ENTRYPOINT')
    f = open('/app/__init__.py', 'w+')
Exemple #8
0
                    res.serialize(),
                )

    except:
        if test.syntax:
            raise


testers: Dict[Node, Callable[[RDFTest], None]] = {
    RDFT.TestTurtlePositiveSyntax: turtle,
    RDFT.TestTurtleNegativeSyntax: turtle,
    RDFT.TestTurtleEval: turtle,
    RDFT.TestTurtleNegativeEval: turtle,
}

NAMESPACE = Namespace("http://www.w3.org/2013/TurtleTests/manifest.ttl#")
EXPECTED_FAILURES: Dict[URIRef, str] = {}

if os.name == "nt":
    for test in ["literal_with_LINE_FEED", "turtle-subm-15", "turtle-subm-16"]:
        EXPECTED_FAILURES[NAMESPACE[
            test]] = "Issue with nt parser and line endings on windows"


@pytest.mark.parametrize(
    "rdf_test_uri, type, rdf_test",
    read_manifest("test/w3c/turtle/manifest.ttl"),
)
def test_manifest(rdf_test_uri: URIRef, type: Node, rdf_test: RDFTest):
    if rdf_test_uri in EXPECTED_FAILURES:
        pytest.xfail(EXPECTED_FAILURES[rdf_test_uri])
from reporter import Reporter
from rdflib import Graph, ConjunctiveGraph, URIRef, Literal
from rdflib.namespace import RDF, Namespace, RDFS
import csv
from support import find_local_line_id
from datetime import datetime
import glob
import collections

context_path = "https://w3id.org/oc/corpus/context.json"
repok = Reporter(True, prefix="[fix_prov.py: INFO] ")
reperr = Reporter(True, prefix="[fix_prov.py: ERROR] ")
repok.new_article()
reperr.new_article()
context_json = {}
PROV = Namespace("http://www.w3.org/ns/prov#")
OCO = Namespace("https://w3id.org/oc/ontology/")
DCTERMS = Namespace("http://purl.org/dc/terms/")
CITO = Namespace("http://purl.org/spar/cito/")
DATACITE = Namespace("http://purl.org/spar/datacite/")
FRBR = Namespace("http://purl.org/vocab/frbr/core#")
LITERAL = Namespace("http://www.essepuntato.it/2010/06/literalreification/")


def load(file_p, tmp_dir=None):
    errors = ""
    current_graph = ConjunctiveGraph()

    if tmp_dir is not None:
        file_path = tmp_dir + os.sep + "tmp_rdf_file.rdf"
        shutil.copyfile(file_p, file_path)
Exemple #10
0
******

:Description: Example1

Ejemplos de RDFLIB

"""

from rdflib.namespace import RDF, RDFS, Namespace, FOAF, OWL
from rdflib import Graph, BNode, Literal
from pprint import pformat
__author__ = 'bejar'

g = Graph()

n = Namespace('http://ejemplo.org/')

p1 = n.persona1
v = Literal(22)

g.add((p1, FOAF.age, v))

# g.serialize('a.rdf')

for a, b, c in g:
    print(a, b, c)

for a, b in g[p1]:
    print(a, b)

t = g.triples((None, FOAF.age, Literal(22)))
Exemple #11
0
"""
Notation 3 (N3) RDF graph serializer for RDFLib.
"""
from rdflib.graph import Graph
from rdflib.namespace import Namespace, OWL
from rdflib.plugins.serializers.turtle import (TurtleSerializer,
        SUBJECT, VERB, OBJECT)

__all__ = ['N3Serializer']

SWAP_LOG = Namespace("http://www.w3.org/2000/10/swap/log#")


class N3Serializer(TurtleSerializer):

    short_name = "n3"

    def __init__(self, store, parent=None):
        super(N3Serializer, self).__init__(store)
        self.keywords.update({
            OWL.sameAs: '=',
            SWAP_LOG.implies: '=>'
        })
        self.parent = parent

    def reset(self):
        super(N3Serializer, self).reset()
        self._stores = {}

    def subjectDone(self, subject):
        super(N3Serializer, self).subjectDone(subject)
Exemple #12
0
import nose

from pylons import config

from rdflib import Graph, URIRef, Literal
from rdflib.namespace import Namespace, RDF

from ckanext.iaest.processors import (RDFParser, RDFParserException,
                                      RDFProfileException,
                                      DEFAULT_RDF_PROFILES,
                                      RDF_PROFILES_CONFIG_OPTION)

from ckanext.iaest.profiles import RDFProfile

DCT = Namespace("http://purl.org/dc/terms/")
DCAT = Namespace("http://www.w3.org/ns/dcat#")

eq_ = nose.tools.eq_


def _default_graph():

    g = Graph()

    dataset1 = URIRef("http://example.org/datasets/1")
    g.add((dataset1, RDF.type, DCAT.Dataset))
    g.add((dataset1, DCT.title, Literal('Test Dataset 1')))

    distribution1_1 = URIRef("http://example.org/datasets/1/ds/1")
    g.add((distribution1_1, RDF.type, DCAT.Distribution))
    distribution1_2 = URIRef("http://example.org/datasets/1/ds/2")
Exemple #13
0
#!/usr/bin/python3
import odgi
import rdflib
import io
from rdflib.namespace import RDF, RDFS, NamespaceManager, Namespace
from rdflib.store import Store
from rdflib.term import Literal
from rdflib import Graph
from rdflib import plugin
from itertools import chain
from spodgi.term import StepIriRef, NodeIriRef, StepBeginIriRef, StepEndIriRef

VG = Namespace('http://biohackathon.org/resource/vg#')
FALDO = Namespace('http://biohackathon.org/resource/faldo#')

knownTypes = [
    VG.Node, VG.Path, VG.Step, FALDO.Region, FALDO.ExactPosition,
    FALDO.Position
]
knownPredicates = [
    RDF.value, VG.rank, VG.position, VG.step, VG.path,
    VG.linksForwardToForward, VG.linksForwardToReverse,
    VG.linksReverseToForward, VG.linksReverseToReverse, VG.links,
    VG.reverseOfNode, VG.node, FALDO.begin, FALDO.end, FALDO.reference,
    FALDO.position
]
nodeRelatedPredicates = [
    VG.linksForwardToForward, VG.linksForwardToReverse,
    VG.linksReverseToForward, VG.linksReverseToReverse, VG.links, RDF.value
]
stepAssociatedTypes = [
Exemple #14
0
from rdflib.serializer import Serializer
from rdflib.plugins.serializers.xmlwriter import XMLWriter

from rdflib.term import URIRef, Literal, BNode
from rdflib.namespace import Namespace

from rdflib.graph import Graph, ConjunctiveGraph

__all__ = ["TriXSerializer"]

# TODO: Move this somewhere central
TRIXNS = Namespace("http://www.w3.org/2004/03/trix/trix-1/")
XMLNS = Namespace("http://www.w3.org/XML/1998/namespace")


class TriXSerializer(Serializer):
    def __init__(self, store):
        super(TriXSerializer, self).__init__(store)
        if not store.context_aware:
            raise Exception(
                "TriX serialization only makes sense for context-aware stores")

    def serialize(self, stream, base=None, encoding=None, **args):

        nm = self.store.namespace_manager

        self.writer = XMLWriter(stream, nm, encoding, extra_ns={"": TRIXNS})

        self.writer.push(TRIXNS[u"TriX"])
        # if base is given here, use that, if not and a base is set for the graph use that
        if base is None and self.store.base is not None:
Exemple #15
0
# -*- encoding: utf-8 -*-

import urllib
import urllib2

from rdflib import Graph
from rdflib.namespace import Namespace, FOAF, DC, XSD, RDF, RDFS

from django.conf import settings

BIBO = Namespace('http://purl.org/ontology/bibo/')
DCTERMS = Namespace('http://purl.org/dc/terms/')
GEONAMES = Namespace('http://www.geonames.org/ontology#')
MUTO = Namespace('http://purl.org/muto/core#')
PLACES = Namespace('http://purl.org/ontology/places#')
SWRC = Namespace('http://swrc.ontoware.org/ontology#')
SWRCFE = Namespace('http://www.morelab.deusto.es/ontologies/swrcfe#')


def create_namespaced_graph():
    graph = Graph()

    graph.bind('bibo', BIBO)
    graph.bind('dc', DC)
    graph.bind('dcterms', DCTERMS)
    graph.bind('foaf', FOAF)
    graph.bind('geonames', GEONAMES)
    graph.bind('muto', MUTO)
    graph.bind('places', PLACES)
    graph.bind('rdf', RDF)
    graph.bind('rdfs', RDFS)