예제 #1
0
data = csv.reader(
    open('C:\\Users\\Maral\\Desktop\\Assignments responsible persons_Maral.csv'
         ))

for row in data:

    uri = row[0]

    print(uri)

    jlu = generator_persons(uri)
    print(jlu)

    if jlu != 'error':

        if row[3].strip() != 'None':

            graph.add((URIRef(jlu), owl.sameAs, (URIRef(row[3]))))

        if row[4].strip() != 'None':

            gnd = row[4].rsplit('/', 1)[1]
            graph.add((URIRef(jlu), owl.sameAs, (URIRef(row[4]))))
            graph.add((URIRef(jlu), gndo.gndIdentifier, (Literal(gnd))))

        urisame = parse.unquote(uri)
        graph.add((URIRef(jlu), owl.sameAs, (URIRef(urisame))))

graph.serialize(destination='Haskala.ttl', format="turtle")
예제 #2
0
import csv
from rdflib import Graph, Literal, Namespace, URIRef
from rdflib.namespace import DCTERMS, RDF, RDFS, SKOS, XSD

input_file = csv.DictReader(open("test_sheet.csv"))

# make a graph

for row in input_file:

    # convert it from an OrderedDict to a regular dict
    row = dict(row)
    # print(row)
    #{'Subject Label': 'Pearl Wilmer Booker', 'Subject URI': 'None', 'Predicate Label': 'Daughter Of', 'Predicate URI': '', 'Predicate Symmetry': 'Asymmetric', 'Object Label': 'Mary Booker', 'Object URI': 'None'}
    # make a literal and add it
    # output_graph.add(  (URIRef(row['Subject URI']), RDFS.label, Literal(row['Subject Label'], lang='en')) )

    output_graph = Graph()
    # make a triple with the object as uri
    output_graph.add((URIRef(row['Subject URI']), URIRef(row['Predicate URI']),
                      URIRef(row['Object URI'])))

    triple = output_graph.serialize(format='nt')
    triple = str(triple, 'utf-8').strip()
    triple = triple.replace('.', '')
    triple = f"{triple} <{row['Context']}> ."
    print(triple)

# output_graph.serialize(destination='my_graph.nt', format='nt')
예제 #3
0
 def urijoin(item):
     base, ext = item
     return URIRef(Namespace(base)[ext])
예제 #4
0
def reports_rdf():
    g = Graph()
    count = [0]
    properties = {}
    get_schema_items(schema.ReportSchema(), properties, count)
    get_schema_items(schema.SerisReviewSchema(), properties, count)
    export = get_reports()

    bibo = Namespace('http://uri.gbv.de/ontology/bibo/')
    nao = Namespace(
        'http://www.semanticdesktop.org/ontologies/2007/08/15/nao#')
    theme = Namespace('http://www.eea.europa.eu/themes/')
    bibtex = Namespace('http://purl.org/net/nknouf/ns/bibtex#')
    seris = Namespace(
        'http://forum.eionet.europa.eu/nrc-state-environment/seris/ontology/'
        'schema#')

    for entry in export:
        current_id = entry['report_id']
        current_uri = flask.url_for('views.report_view',
                                    report_id=current_id,
                                    _external=True)

        node = URIRef(current_uri)

        g.add((node, RDF.type, seris.SERISReport))
        g.add((node, DCTERMS.identifier, Literal(current_id)))

        for region in entry['header_region']:
            item = BNode()
            g.add((node, DCTERMS.spatial, item))
            g.add((item, RDF.type, DCTERMS.Location))
            g.add((item, RDFS.label, Literal('Region of report')))
            g.add((item, DCTERMS.subject, Literal(region)))

        for country in entry['header_country']:
            g.add((node, DC.coverage, Literal(country)))

        if 'header_subregion' in entry.keys():
            for subregion in entry['header_subregion']:
                item = BNode()
                g.add((node, DCTERMS.spatial, item))
                g.add((item, RDF.type, DCTERMS.Location))
                g.add((item, RDFS.label, Literal('Subregion of country')))
                g.add((item, DCTERMS.subject, Literal(subregion)))

        if entry['header_soer_cover']:
            g.add((node, DCTERMS.source, Literal(entry['header_soer_cover'])))

        if entry['details_original_name']:
            g.add((node, DC.title, Literal(entry['details_original_name'])))

        for language in entry['details_original_language']:
            g.add((node, DC.language, Literal(language)))

        lang_id = 0
        lang_field = 'details_translated_in_%s' % lang_id
        while lang_field in entry.keys():
            item = BNode()
            g.add((node, DCTERMS.language, item))
            g.add((item, RDF.type, DCTERMS.LinguisticSystem))
            g.add((item, RDFS.label,
                   Literal('Language in which the report was translated')))
            g.add((item, DCTERMS.subject, Literal(entry[lang_field])))
            lang_id += 1
            lang_field = 'details_translated_in_%s' % lang_id

        if entry['details_english_name']:
            g.add((node, DC.title,
                   Literal(entry['details_english_name'], lang="en")))

        if entry['details_publisher']:
            g.add((node, DC.publisher, Literal(entry['details_publisher'])))

        if entry['format_report_type']:
            g.add((node, DC.type, Literal(entry['format_report_type'])))

        if entry['format_date_of_publication']:
            g.add((node, DCTERMS.issued,
                   Literal(entry['format_date_of_publication'])))

        if entry['format_freq_of_pub']:
            item = BNode()
            g.add((node, DCTERMS.accrualPeriodicity, item))
            g.add((item, RDF.type, DCTERMS.Frequency))
            g.add((item, RDFS.label, Literal('Frequency of publication')))
            g.add((item, RDF.value, Literal(entry['format_freq_of_pub'])))

        if entry['format_date_of_last_update']:
            g.add((node, DCTERMS.modified,
                   Literal(entry['format_date_of_last_update'])))

        if entry['format_freq_of_upd']:
            item = BNode()
            g.add((node, DCTERMS.accrualPeriodicity, item))
            g.add((item, RDF.type, DCTERMS.Frequency))
            g.add((item, RDFS.label, Literal('Frequency of update')))
            g.add((item, RDF.value, Literal(entry['format_freq_of_upd'])))

        if entry['format_no_of_pages']:
            g.add((node, bibo.numpages, Literal(entry['format_no_of_pages'])))

        if entry['format_size']:
            item = BNode()
            g.add((node, DCTERMS.extent, item))
            g.add((item, RDF.type, DCTERMS.SizeOrDuration))
            g.add((item, RDFS.label, Literal('Size in MBytes')))
            g.add((item, RDF.value, Literal(entry['format_size'])))

        if entry['format_availability_paper_or_web']:
            g.add((node, DC['format'],
                   Literal(entry['format_availability_paper_or_web'])))

        if entry['format_availability_url']:
            g.add((node, bibtex.hasURL,
                   Literal(entry['format_availability_url'])))

        if entry['format_availability_registration_required']:
            g.add(
                (node, DC.rights,
                 Literal(entry['format_availability_registration_required'])))

        if entry['format_availability_costs']:
            g.add((node, RDFS.comment,
                   Literal('(cost)' + entry['format_availability_costs'])))

        if 'links_target_audience' in entry.keys():
            for audience in entry['links_target_audience']:
                item = BNode()
                g.add((node, DCTERMS.audience, item))
                g.add((item, RDF.type, DCTERMS.AgentClass))
                g.add((item, RDFS.label, Literal('Target audience')))
                g.add((item, RDF.value, Literal(audience)))

        if entry['links_legal_reference']:
            item = BNode()
            g.add((node, DCTERMS.conformTo, item))
            g.add((item, RDF.type, DCTERMS.Standard))
            g.add((item, RDFS.label, Literal('Legal reference')))
            g.add((item, RDF.value, Literal(entry['links_legal_reference'])))

        if entry['links_explanatory_text']:
            g.add((node, bibo.shortDescription,
                   Literal(entry['links_explanatory_text'])))

        topics = {
            'env_issues': [
                'air', 'biodiversity', 'chemicals', 'climate', 'human',
                'landuse', 'natural', 'noise', 'soil', 'waste', 'water',
                'other_issues'
            ],
            'sectors_and_activities': [
                'agriculture', 'energy', 'fishery', 'households', 'industry',
                'economy', 'tourism', 'transport'
            ],
            'across_env': ['technology', 'policy', 'scenarios'],
            'env_regions': ['coast_sea', 'regions', 'urban']
        }

        for key in topics.keys():
            for topic in topics[key]:
                focus = 'topics_' + key + '_' + topic + '_focus'
                indicators = 'topics_' + key + '_' + topic + '_indicators'
                current_item = "http://www.eea.europa.eu/themes/%(topic)s" % {
                    "topic": topic
                }
                item = BNode()
                if (entry[focus] or entry[indicators]):
                    g.add((node, nao.hasTopic, item))
                    g.add((item, RDFS.label, Literal(topic)))
                    g.add((item, RDF.type, bibtex.Entry))
                    g.add((item, bibtex.hasURL, Literal(current_item)))
                    if entry[focus]:
                        g.add(
                            (item, seris.hasFocusValue, Literal(entry[focus])))
                    if entry[indicators]:
                        g.add((item, seris.hasIndicatorCount,
                               Literal(entry[indicators])))
            topic = 'topics_' + key + '_extra_topic_extra_topic_input'
            focus = 'topics_' + key + '_extra_topic_other_radio_focus'
            indicators = ('topics_' + key +
                          '_extra_topic_other_radio_indicators')
            if entry[topic]:
                item = BNode()
                g.add((node, nao.hasTopic, item))
                g.add((item, RDFS.label, Literal(entry[topic])))
                g.add((item, RDF.type, bibtex.Entry))
                if entry[focus]:
                    g.add((item, seris.hasFocusValue, Literal(entry[focus])))
                if entry[indicators]:
                    g.add((item, seris.hasIndicatorCount,
                           Literal(entry[indicators])))

        if entry['structure_indicator_based']:
            item = BNode()
            g.add((node, seris.structure, item))
            g.add((item, RDFS.label, Literal('indicator based')))
            if entry['structure_indicators_estimation']:
                g.add((item, RDF.value,
                       Literal(entry['structure_indicators_estimation'])))
            usage = ''
            if entry['structure_indicators_usage_to_assess_progress']:
                usage += entry['structure_indicators_usage_to_assess_progress']
                usage += ' to assess progress to target/treshold.'
            if entry['structure_indicators_usage_to_compare_countries']:
                usage += entry[
                    'structure_indicators_usage_to_compare_countries']
                usage += ' to compare with other countries/EU.'
            if entry['structure_indicators_usage_to_compare_subnational']:
                usage += entry[
                    'structure_indicators_usage_to_compare_subnational']
                usage += ' to compare at subnational level.'
            if entry['structure_indicators_usage_to_compare_eea']:
                usage += entry['structure_indicators_usage_to_compare_eea']
                usage += ' to relate with EEA/EU developments.'
            if entry['structure_indicators_usage_to_compare_global']:
                usage += entry['structure_indicators_usage_to_compare_global']
                usage += ' to relate to global developments.'
            if entry['structure_indicators_usage_to_evaluate']:
                usage += entry['structure_indicators_usage_to_evaluate']
                usage += ' to rank/evaluate.'
                if entry['structure_indicators_usage_evaluation_method']:
                    usage += 'evaluation method: '
                    usage += entry[
                        'structure_indicators_usage_evaluation_method']
            if usage:
                g.add((item, SKOS.scopeNote, Literal(usage)))

        if entry['structure_policy_recommendations']:
            g.add((node, seris.policyRecommendationsQuantifier,
                   Literal(entry['structure_policy_recommendations'])))

        if entry['structure_reference']:
            quantifier = entry['structure_reference']
            if quantifier[0] == 'N':
                quantifier = 'No'

            text = '[%s] DPSIR framework used' % quantifier
            g.add((node, DCTERMS.references, Literal(text)))

        if entry['short_description']:
            g.add((node, DCTERMS.description,
                   Literal(entry['short_description'])))

        if entry['table_of_contents']:
            g.add((node, DCTERMS.tableOfContents,
                   Literal(entry['table_of_contents'])))

    g.bind("dcterms", DCTERMS)
    g.bind("dc", DC)
    g.bind("bibo", bibo)
    g.bind("foaf", FOAF)
    g.bind("nao", nao)
    g.bind("theme", theme)
    g.bind("bibtex", bibtex)
    g.bind("skos", SKOS)
    g.bind("rdfs", RDFS)
    g.bind("seris", seris)

    return flask.Response(g.serialize(format='xml'), mimetype='text/xml')
예제 #5
0
annotationItemCntr = 1
annotationBodyCntr = 1
annotationEvidenceCntr = 1

annotatedCache = {
}  # indexes annotation ids so that multiple bodies can be attached
currentAnnotation = annotationItemCntr

currentAnnotSet = 'ohdsi-eu-spc-annotation-set-%s' % annotationSetCntr
annotationSetCntr += 1
graph.add((poc[currentAnnotSet], RDF.type, oa["DataAnnotation"]
           ))  # TODO: find out what is being used for collections in OA
graph.add(
    (poc[currentAnnotSet], oa["annotatedAt"], Literal(datetime.date.today())))
graph.add((poc[currentAnnotSet], oa["annotatedBy"],
           URIRef(u"http://www.pitt.edu/~rdb20/triads-lab.xml#TRIADS")))

outf = codecs.open(OUTPUT_FILE, "w", "utf8")
s = graph.serialize(format="n3", encoding="utf8", errors="replace")
outf.write(s)

# DEBUG
cntr = 0

inf = open(DATA_FILE, 'r')
buf = inf.read()
inf.close()
lines = buf.split("\n")
it = [unicode(x.strip(), 'utf-8', 'replace').split("\t")
      for x in lines[1:]]  # skip header
for elt in it:
# import glob

# input_files = glob.glob("./split_*.ttl")

KG = ConjunctiveGraph()
KG.parse(filename, format="turtle")

# index of biotools {IDs: rdf KG}
index = {}
for s, p, o in KG.triples((None, RDF.type, schema.SoftwareApplication)):
    index[str(s)] = None

for bio_tools_Id in tqdm(index.keys()):
    sub_graph = ConjunctiveGraph()
    for s, p, o in KG.triples((URIRef(bio_tools_Id), None, None)):
        sub_graph.add((s, p, o))
    index[bio_tools_Id] = sub_graph

# for each index, FAIR evaluation of all entries
df = pd.DataFrame()
df_time = pd.DataFrame()

evals, exec_time = mass_eval(index)
df = pd.concat([df, pd.DataFrame.from_records(evals)])
df_time = pd.concat([df_time, pd.DataFrame.from_records(exec_time)])

head, tail = path.split(filename)
df.to_csv("../results/biotools_all/FC_results_" + tail + ".csv")
df_time.to_csv("../results/biotools_all/exec_time_" + tail + ".csv")
def get_RDF(bio_tools_Id):
    sub_graph = ConjunctiveGraph()
    for s, p, o in KG.triples((URIRef(bio_tools_Id), None, None)):
        sub_graph.add((s, p, o))
    print(sub_graph.serialize(format="turtle"))
def createLine(line):  #
    singleLine = URIRef(
        'http://data.linkedevents.org/transit/london/subwayRoute/' +
        Literal(line).replace(" ", ""))
    return singleLine
def createAddress(addressGUID):
    singleAddress = URIRef(
        ('http://data.linkedevents.org/location/%s/address') % addressGUID)
    return singleAddress
예제 #10
0
 def context_id(self, uri, context_id=None):
     """URI#context"""
     uri = uri.split("#", 1)[0]
     if context_id is None:
         context_id = "#context"
     return URIRef(context_id, base=uri)
def createStationGeom(stopGUID):  #createTubeSGraph
    stationGeom = URIRef(createStation(stopGUID) + '/geometry')
    return stationGeom
예제 #12
0
from rdflib.namespace import RDF, RDFS, FOAF, OWL, XSD, DC, DCTERMS
import json
from linking import link
import os
path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
data = json.load(open(path + "/DatiPerElaborazione/Monument.geojson", "r"))
g = Graph()

cmo = Namespace("http://www.comune.milano.it/ontology/")
schema = Namespace("https://schema.org/")
g.bind("cmo", cmo)
g.bind("schema", schema)

for element in data:
    uri = element["URI"]
    g.add([URIRef(uri), RDF.type, schema.TouristAttraction])
    g.add([URIRef(uri), RDFS.label, Literal(element["nome"], lang='it')])
    g.add(
        [URIRef(uri), cmo.touristAttractionAuthor,
         Literal(element["autore"])])
    g.add([
        URIRef(uri), cmo.touristAttractionDefinition,
        Literal(element["definizione"], lang='it')
    ])
    g.add([URIRef(uri), schema.description, Literal(element["abstract"])])
    g.add([
        URIRef(uri), cmo.latitude,
        Literal(element["lat"], datatype=XSD.float)
    ])
    g.add([
        URIRef(uri), cmo.longitude,
예제 #13
0
파일: text2rdfgraph.py 프로젝트: saoruy/DCC
def createrdf(filepath, text_dir, year, conference, platform, entity_map, consolidatedGraph):

    config = yaml.safe_load(open('../../conf/conf.yaml'))
    model_dir = config['MODEL_PATH']

    triple_list = []

    dcc_namespace = "https://github.com/deepcurator/DCC/"

    # print(row['paper_title'],row['paper_link'],row['conference'], row['year'], row['Platform'])
    filename = filepath.split('/')[-1]
    if(filename.endswith('.pdf')):
        filename = filename.split('.pdf')[0]
    elif(filename.endswith('.html')):
        filename = filename.split('.html')[0]
    

    ## filename will act as a unique URI to connect all the three graphs
    filesubject = dcc_namespace + filename
    # consolidatedGraph
    consolidatedGraph.add((URIRef(filesubject),RDF.type,URIRef(dcc_namespace + "Publication")))
    triple_list.append(filename + " isa " + "Publication")
    year = Literal(year)
    conference = Literal(conference)
    platform = Literal(platform)

    consolidatedGraph.add((URIRef(filesubject),URIRef(dcc_namespace + "yearOfPublication"),year ))
    consolidatedGraph.add((URIRef(filesubject),URIRef(dcc_namespace + "conferenceSeries"),conference ))
    consolidatedGraph.add((URIRef(filesubject),URIRef(dcc_namespace + "platform"),platform ))

    # Just the triple list
    triple_list.append(filename + " year_of_publication " + str(year))
    triple_list.append(filename + " conference_series " + str(conference))
    triple_list.append(filename + " platform " + str(platform))

    textfilename = text_dir + filename + ".txt"
    #load the spacy nlp model
    nlp = spacy.load(model_dir)
    sents = nltk.sent_tokenize(getabstract(textfilename))
    entity_dict = {}
    for sentence in sents:
        ner_tagged = nlp(sentence)
        tagged_entities = ner_tagged.ents   
        for entity in tagged_entities:
            # print(entity.text, entity.label_)
            if entity.text not in entity_dict:
                entity_dict[entity.text] = entity.label_

    for entitytext, entitylabel in entity_dict.items():
        entitytext = entitytext.replace(" ",'_')
        if(entitytext in entity_map):
            csovalue = entity_map[entitytext]
            str_value = str(csovalue)
            if("cso" in str_value):
                consolidatedGraph.add((URIRef(filesubject + "_" + entitytext),URIRef(dcc_namespace + "hasCSOEquivalent"),csovalue))
            # print("CSO label found for entity text : " + entitytext  + " : and value is " + entity_map[entitytext])
        # print(entitytext)
        # print(filesubject + "_" + entitytext)
        consolidatedGraph.add((URIRef(filesubject + "_" + entitytext),RDF.type,URIRef(dcc_namespace + entitylabel)))
        consolidatedGraph.add((URIRef(filesubject),URIRef(dcc_namespace + "hasEntity"),URIRef(filesubject + "_" + entitytext)))
        textLiteral = Literal(entitytext)
        consolidatedGraph.add((URIRef(filesubject + "_" + entitytext),URIRef(dcc_namespace + 'hasText'),textLiteral))

        triple_list.append(entitytext + " isa " + entitylabel)
        # triple_list.append(filename + " has entity " + )


    print("Done with file " + filename)
    return(filename, triple_list)
예제 #14
0
def generator_persons(newuri):

    preflabel = ''
    altlabel = ''
    viaf = ''
    uriname = ''
    same = ''
    pb = ''
    pd = ''
    dd = ''
    db = ''

    personpage = urllib2.urlopen(newuri)

    soup = BeautifulSoup(personpage)

    personnode = soup.findAll('div', attrs={"class": "node node-person"})
    print(personnode)

    if personnode == []:

        return ('error')

    name = str(personnode[0].find('h1').string)

    if '-' in name:

        gernamefield = name.rsplit('-', 1)[0].strip()
        preflabel = gernamefield

        altlabel = name.rsplit('-', 1)[1].strip()

    else:

        preflabel = name.strip()

    uriname = preflabel.replace(' ', '_')
    uriname = uriname.replace(',', '')
    uriname = uriname.replace('"', '')
    uriname = uriname.replace('\'', '')

    print(uriname)

    jluri = 'http://data.judaicalink.org/data/haskala/' + uriname
    graph.add((URIRef(jluri), RDF.type, foaf.Person))
    graph.add((URIRef(jluri), skos.prefLabel, (Literal(preflabel))))
    if altlabel != '':
        graph.add((URIRef(jluri), skos.altLabel, (Literal(altlabel))))

    book = soup.findAll('div', attrs={"class": "book-title"})
    print(book)
    if book != []:
        for i in range(0, len(book)):
            bookuri = 'https://www.haskala-library.net/' + book[i].find(
                'a').get('href')
            bookuri = parse.unquote(bookuri)
            print(bookuri)
            graph.add((URIRef(jluri), jl.hasPublication, (URIRef(bookuri))))
    else:
        bookuri = ''

    return (jluri)
예제 #15
0
def CreateBIDSParticipantFile(nidm_graph, output_file, participant_fields):
    '''
    Creates participant file based on requested fields

    :param nidm_graph:
    :param output_directory:
    :param fields:
    :return:
    '''

    print("Creating participants.json file...")
    fields = ["participant_id"]
    #fields.extend(participant_fields)
    participants = pd.DataFrame(columns=fields, index=[1])
    participants_json = {}

    #for each Constants.NIDM_SUBJECTID in NIDM file
    row_index = 1
    for subj_uri, subj_id in nidm_graph.subject_objects(
            predicate=URIRef(Constants.NIDM_SUBJECTID.uri)):

        #adding subject ID to data list to append to participants data frame
        participants.loc[row_index, 'participant_id', ] = subj_id

        #for each of the fields in the participants list
        for fields in participant_fields:
            #if field identifier isn't a proper URI then do a fuzzy search on the graph, else an explicit search for the URL
            if (validators.url(fields)):
                #then this is a valid URI so simply query nidm_project document for it
                for subj, obj in nidm_graph.subject_objects(predicate=URIRef(
                        BIDS_Constants.participants[fields].uri)):
                    #add row to the pandas data frame
                    #data.append(obj)
                    participants.loc[
                        row_index,
                        BIDS_Constants.participants[fields].uri] = obj

                    # find Data Element and add metadata to participants_json dictionary

            else:
                #text matching task, remove basepart of URIs and try to fuzzy match the field in the part_fields parameter string
                #to the "term" part of a qname URI...this part let's a user simply ask for "age" for example without knowing the
                #complete URI....hopefully
                #
                #This needs to be a more complex query:
                #   Step(1): For subj_uri query for prov:Activity that were prov:wasAttributedTo subj_uri
                #   Step(2): Query for prov:Entity that were prov:wasGeneratedBy uris from Step(1)
                #   Step(3): For each metadata triple in objects whose subject is uris from Step(2), fuzzy match predicate after
                #   removing base of uri to "fields" in participants list, then add these to data list for appending to pandas
                match_ratio = {}
                #
                #Steps(1):(3)

                query = """
                    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
                    PREFIX prov: <http://www.w3.org/ns/prov#>
                    PREFIX onli: <http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#>
                    PREFIX sio: <http://semanticscience.org/ontology/sio.owl#>
                    PREFIX niiri: <http://iri.nidash.org/>

                SELECT DISTINCT ?pred ?value
                    WHERE {
                        ?asses_activity prov:qualifiedAssociation ?_blank .
    					?_blank rdf:type prov:Association ;
	                		prov:agent <%s> ;
                   			prov:hadRole sio:Subject .

                        ?entities prov:wasGeneratedBy ?asses_activity ;
                            rdf:type onli:assessment-instrument ;
                            ?pred ?value .
                        FILTER (regex(str(?pred) ,"%s","i" ))
                    }""" % (subj_uri, fields)
                #print(query)
                qres = nidm_graph.query(query)

                for row in qres:
                    #use last field in URIs for short column name and add full URI to sidecar participants.json file
                    url_parts = urllib.parse.urlsplit(row[0], scheme='#')

                    if url_parts.fragment == '':
                        #do some parsing of the path URL because this particular one has no fragments
                        url_parts = urllib.parse.urlparse(row[0])
                        path_parts = url_parts[2].rpartition('/')
                        short_name = path_parts[2]
                    else:
                        short_name = url_parts.fragment

                    # find Data Element and add metadata to participants_json dictionary
                    if 'de' not in locals():
                        de = GetDataElementMetadata(nidm_graph, short_name)
                    else:
                        de.update(
                            GetDataElementMetadata(nidm_graph, short_name))

                    participants.loc[row_index, str(short_name)] = str(row[1])
                    #data.append(str(row[1]))

        #add row to participants DataFrame
        #participants=participants.append(pd.DataFrame(data))
        participants
        row_index = row_index + 1

    #save participants.tsv file
    participants.to_csv(output_file + ".tsv", sep='\t', index=False)
    #save participants.json file
    with open(output_file + ".json", 'w') as f:
        json.dump(participants_json, f, sort_keys=True, indent=2)

    # save participant sidecar file
    write_json_mapping_file(de, join(splitext(output_file)[0] + ".json"), True)

    return participants, participants_json
graph.bind('djr', djr)
graph.bind('owl', owl)
graph.bind('gnd', gnd)

#basis_uri = 'http://dijest.technion.ac.il/book/'
#basis_person_uri = 'http://dijest.technion.ac.il/person/'
basis_uri = 'djr:book/'
basis_person_uri = 'djr:person/'

with open('mifalbibl_first_selection.csv', 'r') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        book_uri = basis_uri + row['uri']
        creator_uri = basis_person_uri + row['entityID']

        graph.add((URIRef(book_uri), RDF['type'], djo['Book']))

        graph.add((URIRef(book_uri), dcterms['language'],
                   Literal(row['dcterms:lanugage'])))
        book_label = row['dcterms:title']

        if len(row['dcterms:created']) > 0 and row['dcterms:created'] != 'TBD':
            book_label = row['dcterms:title'] + ' (' + row[
                'dcterms:created'] + ')'
        else:
            book_label = row['dcterms:title']
        graph.add((URIRef(book_uri), rdfs['label'], Literal(book_label)))

        creatorNode = BNode('author-' + row['uri'])
        graph.add((creatorNode, RDF['type'], djo['Person']))
        graph.add((creatorNode, schema['name'], Literal(row['AuthorNoPoint'])))
예제 #17
0
def ProcessFiles(graph, scan_type, output_directory, project_location, args):
    '''
    This function will essentially cycle through the acquisition objects in the NIDM file loaded into graph
    and depending on the scan_type will try and copy the image to the output_directory
    '''

    if scan_type == Constants.NIDM_MRI_DIFFUSION_TENSOR.uri:
        bids_ext = 'dwi'
    elif scan_type == Constants.NIDM_MRI_ANATOMIC_SCAN.uri:
        bids_ext = 'anat'
    elif scan_type == Constants.NIDM_MRI_FUNCTION_SCAN.uri:
        bids_ext = 'func'

    # query NIDM document for acquisition entity "subjects" with predicate nidm:hasImageUsageType and object scan_type
    for acq in graph.subjects(predicate=URIRef(
            Constants.NIDM_IMAGE_USAGE_TYPE.uri),
                              object=URIRef(scan_type)):
        # first see if file exists locally.  Get nidm:Project prov:Location and append the nfo:Filename of the image
        # from the acq acquisition entity.  If that file doesn't exist try the prov:Location in the func acq
        # entity and see if we can download it from the cloud

        # get acquisition uuid from entity uuid
        temp = graph.objects(subject=acq,
                             predicate=Constants.PROV['wasGeneratedBy'])
        for item in temp:
            activity = item
        # get participant ID with sio:Subject role in anat_acq qualified association
        part_id = GetParticipantIDFromAcquisition(
            nidm_file_list=[args.rdf_file], acquisition=activity)

        # make BIDS sub directory
        if 'sub' in (part_id['ID'].values)[0]:
            sub_dir = join(output_directory, (part_id['ID'].values)[0])
        else:
            sub_dir = join(output_directory,
                           "sub-" + (part_id['ID'].values)[0])
        sub_filename_base = "sub-" + (part_id['ID'].values)[0]
        if not os.path.exists(sub_dir):
            os.makedirs(sub_dir)

        # make BIDS scan type directory (bids_ext) directory
        if not os.path.exists(join(sub_dir, bids_ext)):
            os.makedirs(join(sub_dir, bids_ext))

        for filename in graph.objects(subject=acq,
                                      predicate=URIRef(
                                          Constants.NIDM_FILENAME.uri)):
            # check if file exists
            for location in project_location:
                # if MRI exists in this location then copy and rename
                if isfile((location[0] + filename).lstrip("file:")):
                    # copy and rename file to be BIDS compliant
                    copyfile((location[0] + filename).lstrip("file:"),
                             join(sub_dir, bids_ext,
                                  sub_filename_base + splitext(filename)[1]))
                    continue
            # if the file wasn't accessible locally, try with the prov:Location in the acq
            for location in graph.objects(subject=acq,
                                          predicate=URIRef(
                                              Constants.PROV['Location'])):
                # try to download the file and rename
                ret = GetImageFromURL(location)
                if ret == -1:
                    print(
                        "ERROR! Can't download file: %s from url: %s, trying to copy locally...."
                        % (filename, location))
                    if "file" in location:
                        location = str(location).lstrip("file:")
                        print("Trying to copy file from %s" % (location))
                        try:
                            copyfile(
                                location,
                                join(output_directory, sub_dir, bids_ext,
                                     basename(filename)))

                        except:
                            print(
                                "ERROR! Failed to find file %s on filesystem..."
                                % location)
                            if not args.no_downloads:
                                try:
                                    print(
                                        "Running datalad get command on dataset: %s"
                                        % location)
                                    dl.Dataset(os.path.dirname(location)).get(
                                        recursive=True, jobs=1)

                                except:
                                    print(
                                        "ERROR! Datalad returned error: %s for dataset %s."
                                        % (sys.exc_info()[0], location))
                                    GetImageFromAWS(location=location,
                                                    output_file=join(
                                                        output_directory,
                                                        sub_dir, bids_ext,
                                                        basename(filename)),
                                                    args=args)

                else:
                    # copy temporary file to BIDS directory
                    copyfile(
                        ret,
                        join(output_directory, sub_dir, bids_ext,
                             basename(filename)))

                # if we were able to copy the image file then add the json sidecar file with additional metadata
                # available in the NIDM file
                if isfile(
                        join(output_directory, sub_dir, bids_ext,
                             basename(filename))):
                    # get rest of metadata for this acquisition and store in sidecar file
                    if "gz" in basename(filename):
                        image_filename = splitext(
                            splitext(basename(filename))[0])[0]
                    else:
                        image_filename = splitext(basename(filename))[0]
                    AddMetadataToImageSidecar(graph_entity=acq,
                                              graph=graph,
                                              output_directory=join(
                                                  output_directory, sub_dir,
                                                  bids_ext),
                                              image_filename=image_filename)

            # if this is a DWI scan then we should copy over the b-value and b-vector files
            if bids_ext == 'dwi':
                # search for entity uuid with rdf:type nidm:b-value that was generated by activity
                query = """
                    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
                    PREFIX prov: <http://www.w3.org/ns/prov#>
                    PREFIX nidm: <http://purl.org/nidash/nidm#>
    
                    SELECT DISTINCT ?entity
                        WHERE {
                            ?entity rdf:type <http://purl.org/nidash/nidm#b-value> ;
                                prov:wasGeneratedBy <%s> .
                        }""" % activity
                # print(query)
                qres = graph.query(query)

                for row in qres:
                    bval_entity = str(row[0])

                # if the file wasn't accessible locally, try with the prov:Location in the acq
                for location in graph.objects(subject=URIRef(bval_entity),
                                              predicate=URIRef(
                                                  Constants.PROV['Location'])):
                    # try to download the file and rename
                    ret = GetImageFromURL(location)
                    if ret == -1:
                        print(
                            "ERROR! Can't download file: %s from url: %s, trying to copy locally...."
                            % (filename, location))
                        if "file" in location:
                            location = str(location).lstrip("file:")
                            print("Trying to copy file from %s" % (location))
                            try:
                                copyfile(
                                    location,
                                    join(output_directory, sub_dir, bids_ext,
                                         basename(location)))
                            except:
                                print(
                                    "ERROR! Failed to find file %s on filesystem..."
                                    % location)
                                if not args.no_downloads:
                                    try:
                                        print(
                                            "Running datalad get command on dataset: %s"
                                            % location)
                                        dl.Dataset(
                                            os.path.dirname(location)).get(
                                                recursive=True, jobs=1)

                                    except:
                                        print(
                                            "ERROR! Datalad returned error: %s for dataset %s."
                                            % (sys.exc_info()[0], location))
                                        GetImageFromAWS(
                                            location=location,
                                            output_file=join(
                                                output_directory, sub_dir,
                                                bids_ext, basename(location)),
                                            args=args)
                # search for entity uuid with rdf:type nidm:b-value that was generated by activity
                query = """
                    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
                    PREFIX prov: <http://www.w3.org/ns/prov#>
                    PREFIX nidm: <http://purl.org/nidash/nidm#>

                    SELECT DISTINCT ?entity
                        WHERE {
                            ?entity rdf:type <http://purl.org/nidash/nidm#b-vector> ;
                                prov:wasGeneratedBy <%s> .
                        }""" % activity
                # print(query)
                qres = graph.query(query)

                for row in qres:
                    bvec_entity = str(row[0])

                # if the file wasn't accessible locally, try with the prov:Location in the acq
                for location in graph.objects(subject=URIRef(bvec_entity),
                                              predicate=URIRef(
                                                  Constants.PROV['Location'])):
                    # try to download the file and rename
                    ret = GetImageFromURL(location)
                    if ret == -1:
                        print(
                            "ERROR! Can't download file: %s from url: %s, trying to copy locally...."
                            % (filename, location))
                        if "file" in location:
                            location = str(location).lstrip("file:")
                            print("Trying to copy file from %s" % (location))
                            try:
                                copyfile(
                                    location,
                                    join(output_directory, sub_dir, bids_ext,
                                         basename(location)))
                            except:
                                print(
                                    "ERROR! Failed to find file %s on filesystem..."
                                    % location)
                                if not args.no_downloads:
                                    try:
                                        print(
                                            "Running datalad get command on dataset: %s"
                                            % location)
                                        dl.Dataset(
                                            os.path.dirname(location)).get(
                                                recursive=True, jobs=1)

                                    except:
                                        print(
                                            "ERROR! Datalad returned error: %s for dataset %s."
                                            % (sys.exc_info()[0], location))
                                        GetImageFromAWS(
                                            location=location,
                                            output_file=join(
                                                output_directory, sub_dir,
                                                bids_ext, basename(location)),
                                            args=args)
예제 #18
0
    def graph_from_dataset(self, dataset_dict, dataset_ref):

        g = self.g

        for prefix, namespace in namespaces.iteritems():
            g.bind(prefix, namespace)

        g.add((dataset_ref, RDF.type, DCAT.Dataset))

        # Basic fields
        items = [
            ('title', DCT.title, None, Literal),
            ('notes', DCT.description, None, Literal),
            ('url', DCAT.landingPage, None, URIRef),
            ('identifier', DCT.identifier, ['guid', 'id'], Literal),
            ('version', OWL.versionInfo, ['dcat_version'], Literal),
            ('version_notes', ADMS.versionNotes, None, Literal),
            ('frequency', DCT.accrualPeriodicity, None, Literal),
            ('access_rights', DCT.accessRights, None, Literal),
            ('dcat_type', DCT.type, None, Literal),
            ('provenance', DCT.provenance, None, Literal),
        ]
        self._add_triples_from_dict(dataset_dict, dataset_ref, items)

        # Tags
        for tag in dataset_dict.get('tags', []):
            g.add((dataset_ref, DCAT.keyword, Literal(tag['name'])))

        # Dates
        items = [
            ('issued', DCT.issued, ['metadata_created'], Literal),
            ('modified', DCT.modified, ['metadata_modified'], Literal),
        ]
        self._add_date_triples_from_dict(dataset_dict, dataset_ref, items)

        #  Lists
        items = [
            ('language', DCT.language, None, Literal),
            ('theme', DCAT.theme, None, URIRef),
            ('conforms_to', DCT.conformsTo, None, Literal),
            ('alternate_identifier', ADMS.identifier, None, Literal),
            ('documentation', FOAF.page, None, Literal),
            ('related_resource', DCT.relation, None, Literal),
            ('has_version', DCT.hasVersion, None, Literal),
            ('is_version_of', DCT.isVersionOf, None, Literal),
            ('source', DCT.source, None, Literal),
            ('sample', ADMS.sample, None, Literal),
        ]
        self._add_list_triples_from_dict(dataset_dict, dataset_ref, items)

        # Contact details
        if any([
                self._get_dataset_value(dataset_dict, 'contact_uri'),
                self._get_dataset_value(dataset_dict, 'contact_name'),
                self._get_dataset_value(dataset_dict, 'contact_email'),
                self._get_dataset_value(dataset_dict, 'maintainer'),
                self._get_dataset_value(dataset_dict, 'maintainer_email'),
                self._get_dataset_value(dataset_dict, 'author'),
                self._get_dataset_value(dataset_dict, 'author_email'),
        ]):

            contact_uri = self._get_dataset_value(dataset_dict, 'contact_uri')
            if contact_uri:
                contact_details = URIRef(contact_uri)
            else:
                contact_details = BNode()

            g.add((contact_details, RDF.type, VCARD.Organization))
            g.add((dataset_ref, DCAT.contactPoint, contact_details))

            items = [
                ('contact_name', VCARD.fn, ['maintainer', 'author'], Literal),
                ('contact_email', VCARD.hasEmail,
                 ['maintainer_email', 'author_email'], Literal),
            ]

            self._add_triples_from_dict(dataset_dict, contact_details, items)

        # Publisher
        if any([
                self._get_dataset_value(dataset_dict, 'publisher_uri'),
                self._get_dataset_value(dataset_dict, 'publisher_name'),
                dataset_dict.get('organization'),
        ]):

            publisher_uri = publisher_uri_from_dataset_dict(dataset_dict)
            if publisher_uri:
                publisher_details = URIRef(publisher_uri)
            else:
                # No organization nor publisher_uri
                publisher_details = BNode()

            g.add((publisher_details, RDF.type, FOAF.Organization))
            g.add((dataset_ref, DCT.publisher, publisher_details))

            publisher_name = self._get_dataset_value(dataset_dict,
                                                     'publisher_name')
            if not publisher_name and dataset_dict.get('organization'):
                publisher_name = dataset_dict['organization']['title']

            g.add((publisher_details, FOAF.name, Literal(publisher_name)))
            # TODO: It would make sense to fallback these to organization
            # fields but they are not in the default schema and the
            # `organization` object in the dataset_dict does not include
            # custom fields
            items = [
                ('publisher_email', FOAF.mbox, None, Literal),
                ('publisher_url', FOAF.homepage, None, URIRef),
                ('publisher_type', DCT.type, None, Literal),
            ]

            self._add_triples_from_dict(dataset_dict, publisher_details, items)

        # Temporal
        start = self._get_dataset_value(dataset_dict, 'temporal_start')
        end = self._get_dataset_value(dataset_dict, 'temporal_end')
        if start or end:
            temporal_extent = BNode()

            g.add((temporal_extent, RDF.type, DCT.PeriodOfTime))
            if start:
                self._add_date_triple(temporal_extent, SCHEMA.startDate, start)
            if end:
                self._add_date_triple(temporal_extent, SCHEMA.endDate, end)
            g.add((dataset_ref, DCT.temporal, temporal_extent))

        # Spatial
        spatial_uri = self._get_dataset_value(dataset_dict, 'spatial_uri')
        spatial_text = self._get_dataset_value(dataset_dict, 'spatial_text')
        spatial_geom = self._get_dataset_value(dataset_dict, 'spatial')

        if spatial_uri or spatial_text or spatial_geom:
            if spatial_uri:
                spatial_ref = URIRef(spatial_uri)
            else:
                spatial_ref = BNode()

            g.add((spatial_ref, RDF.type, DCT.Location))
            g.add((dataset_ref, DCT.spatial, spatial_ref))

            if spatial_text:
                g.add((spatial_ref, SKOS.prefLabel, Literal(spatial_text)))

            if spatial_geom:
                # GeoJSON
                g.add((spatial_ref, LOCN.geometry,
                       Literal(spatial_geom, datatype=GEOJSON_IMT)))
                # WKT, because GeoDCAT-AP says so
                try:
                    g.add((spatial_ref, LOCN.geometry,
                           Literal(wkt.dumps(json.loads(spatial_geom),
                                             decimals=4),
                                   datatype=GSP.wktLiteral)))
                except (TypeError, ValueError, InvalidGeoJSONException):
                    pass

        # Resources
        for resource_dict in dataset_dict.get('resources', []):

            distribution = URIRef(resource_uri(resource_dict))

            g.add((dataset_ref, DCAT.distribution, distribution))

            g.add((distribution, RDF.type, DCAT.Distribution))

            #  Simple values
            items = [
                ('name', DCT.title, None, Literal),
                ('description', DCT.description, None, Literal),
                ('status', ADMS.status, None, Literal),
                ('rights', DCT.rights, None, Literal),
                ('license', DCT.license, None, Literal),
            ]

            self._add_triples_from_dict(resource_dict, distribution, items)

            #  Lists
            items = [
                ('documentation', FOAF.page, None, Literal),
                ('language', DCT.language, None, Literal),
                ('conforms_to', DCT.conformsTo, None, Literal),
            ]
            self._add_list_triples_from_dict(resource_dict, distribution,
                                             items)

            # Format
            if '/' in resource_dict.get('format', ''):
                g.add((distribution, DCAT.mediaType,
                       Literal(resource_dict['format'])))
            else:
                if resource_dict.get('format'):
                    g.add((distribution, DCT['format'],
                           Literal(resource_dict['format'])))

                if resource_dict.get('mimetype'):
                    g.add((distribution, DCAT.mediaType,
                           Literal(resource_dict['mimetype'])))

            # URL
            url = resource_dict.get('url')
            download_url = resource_dict.get('download_url')
            if download_url:
                g.add((distribution, DCAT.downloadURL, URIRef(download_url)))
            if (url and not download_url) or (url and url != download_url):
                g.add((distribution, DCAT.accessURL, URIRef(url)))

            # Dates
            items = [
                ('issued', DCT.issued, None, Literal),
                ('modified', DCT.modified, None, Literal),
            ]

            self._add_date_triples_from_dict(resource_dict, distribution,
                                             items)

            # Numbers
            if resource_dict.get('size'):
                try:
                    g.add((distribution, DCAT.byteSize,
                           Literal(float(resource_dict['size']),
                                   datatype=XSD.decimal)))
                except (ValueError, TypeError):
                    g.add((distribution, DCAT.byteSize,
                           Literal(resource_dict['size'])))
            # Checksum
            if resource_dict.get('hash'):
                checksum = BNode()
                g.add((checksum, SPDX.checksumValue,
                       Literal(resource_dict['hash'], datatype=XSD.hexBinary)))

                if resource_dict.get('hash_algorithm'):
                    if resource_dict['hash_algorithm'].startswith('http'):
                        g.add((checksum, SPDX.algorithm,
                               URIRef(resource_dict['hash_algorithm'])))
                    else:
                        g.add((checksum, SPDX.algorithm,
                               Literal(resource_dict['hash_algorithm'])))
                g.add((distribution, SPDX.checksum, checksum))
def index_dump():
    for i in tqdm(index.keys()):
        sub_graph = ConjunctiveGraph()
        for s, p, o in KG.triples((URIRef(i), None, None)):
            sub_graph.add((s, p, o))
        index[i] = sub_graph
    def to_skos(self, data, options=None):
        """
        Given some data, converts that data to an rdf skos format in xml.
        """
        # element = {}
        # get scheme: resource being requested. actionTypeCV, methodTypeCV, etc.

        scheme = Scheme.objects.get(name=options['scheme'])
        excluded_fields = [
            u'term', u'resource_uri', u'vocabulary_id', u'vocabulary_status'
        ]

        baseURI = 'http://vocabulary.westernstateswater.org/WaDE/WaDETerms/'
        graph = Graph()
        WaDE = Namespace(baseURI)
        dc = Namespace('http://purl.org/dc/elements/1.1/')

        graph.bind('WaDE', WaDE)
        graph.bind('skos', SKOS)
        graph.bind('dc', dc)

        # If requesting an entire CV.
        if isinstance(data, dict):
            # print data
            # Add a SKOS ConceptScheme class to the graph.
            (graph.add(
                (URIRef(scheme.uri), RDF['type'], SKOS['ConceptScheme'])))
            (graph.add(
                (URIRef(scheme.uri), dc['title'], Literal(scheme.title))))
            (graph.add(
                (URIRef(scheme.uri), dc['creator'], Literal(scheme.creator))))
            (graph.add((URIRef(scheme.uri), dc['description'],
                        Literal(scheme.description))))

            # For each concept in the requested CV, create a SKOS Concept class.
            for concept in data[u'objects']:
                (graph.add((URIRef(scheme.uri + '/' + concept.obj.term),
                            RDF['type'], SKOS['Concept'])))
                (graph.add((URIRef(scheme.uri + '/' + concept.obj.term),
                            SKOS['inScheme'], URIRef(scheme.uri))))

                # Add labels to each concept class.
                for x in concept.data:
                    label = concept.data[x]
                    if isinstance(label, type(None)):
                        label = ''
                    if isinstance(label, int):
                        label = str(label)
                    # Skip excluded field elements.
                    if x in excluded_fields:
                        continue
                    # Skip empty elements.
                    elif label.rstrip('\r\n') == '':
                        continue
                    else:
                        alias = str(
                            FieldRelation.objects.get(
                                field_name=x).node.namespace)
                        if alias == 'WaDE':
                            (graph.add(
                                (URIRef(scheme.uri + '/' + concept.obj.term),
                                 WaDE[FieldRelation.objects.get(
                                     field_name=x).node.name],
                                 Literal(label.rstrip('\r\n')))))
                        else:
                            (graph.add(
                                (URIRef(scheme.uri + '/' + concept.obj.term),
                                 SKOS[FieldRelation.objects.get(
                                     field_name=x).node.name],
                                 Literal(label.rstrip('\r\n')))))

        # If requesting a single Concept
        elif isinstance(data, Bundle):
            # Add a SKOS ConceptScheme class to the graph.
            (graph.add(
                (URIRef(scheme.uri), RDF['type'], SKOS['ConceptScheme'])))
            (graph.add(
                (URIRef(scheme.uri), dc['title'], Literal(scheme.title))))
            (graph.add(
                (URIRef(scheme.uri), dc['creator'], Literal(scheme.creator))))
            (graph.add((URIRef(scheme.uri), dc['description'],
                        Literal(scheme.description))))

            # Add a SKOS Concept class to the graph.
            (graph.add((URIRef(scheme.uri + '/' + data.obj.term), RDF['type'],
                        SKOS['Concept'])))
            (graph.add(
                (URIRef(scheme.uri + '/' + data.obj.term), SKOS['inScheme'],
                 URIRef(scheme.uri))))

            # Add labels within concept class.
            for field in data.data.keys():
                label = data.data[field]
                if isinstance(label, type(None)):
                    label = ''
                if isinstance(label, int):
                    label = str(label)

                if field in excluded_fields:
                    continue
                elif label.rstrip('\r\n') == '':
                    continue
                else:
                    relation = FieldRelation.objects.get(field_name=field)
                    alias = relation.node.namespace.alias
                    if alias == u'WaDE':
                        (graph.add((URIRef(scheme.uri + '/' + data.obj.term),
                                    WaDE[FieldRelation.objects.get(
                                        field_name=field).node.name],
                                    Literal(label.rstrip('\r\n')))))
                    else:
                        (graph.add((URIRef(scheme.uri + '/' + data.obj.term),
                                    SKOS[FieldRelation.objects.get(
                                        field_name=field).node.name],
                                    Literal(label.rstrip('\r\n')))))
        else:
            pass
        # Returning the graph serialized into 'xml' format rather than
        # 'pretty-xml' so that the Concept Scheme remains on its own level,
        # rather than inside one of the concepts.
        return graph.serialize(format='xml')
def create_ontology_graph():
    # Construct ISA trees from triples

    graph = rdflib.Graph()
    graph.parse(os.path.join(ontology_dir, 'inferred_vrd'))

    ontology_labels_nodes = {}
    ontology_labels_equivalent_tmp = set()
    ontology_labels_equivalent = set()

    for s, p, o in graph.triples(
        (None, URIRef("http://www.w3.org/2002/07/owl#equivalentProperty"),
         None)):
        # print s, " -> ", p, " -> ", o
        if "http://" in s and "http://" in o:

            subj_label = str(s.split("#")[1])
            obj_label = str(o.split("#")[1])
            ontology_labels_equivalent.add(subj_label)
            ontology_labels_equivalent.add(obj_label)

            if ontology_labels_nodes:
                new_node = True
                for node_label in ontology_labels_nodes.keys():

                    if subj_label in node_label.split(","):
                        ontology_labels_equivalent_tmp.remove(node_label)
                        ontology_labels_nodes[
                            node_label].name = ontology_labels_nodes[
                                node_label].name + "," + obj_label
                        ontology_labels_equivalent_tmp.add(
                            ontology_labels_nodes[node_label].name)
                        ontology_labels_nodes[
                            ontology_labels_nodes[node_label].
                            name] = ontology_labels_nodes[node_label]
                        del ontology_labels_nodes[node_label]
                        new_node = False

                    elif obj_label in node_label.split(","):
                        ontology_labels_equivalent_tmp.remove(node_label)
                        ontology_labels_nodes[
                            node_label].name = ontology_labels_nodes[
                                node_label].name + "," + subj_label
                        ontology_labels_equivalent_tmp.add(
                            ontology_labels_nodes[node_label].name)
                        ontology_labels_nodes[
                            ontology_labels_nodes[node_label].
                            name] = ontology_labels_nodes[node_label]
                        del ontology_labels_nodes[node_label]
                        new_node = False
                if new_node:
                    ontology_labels_nodes[subj_label + "," +
                                          obj_label] = Node(subj_label + "," +
                                                            obj_label)
                    ontology_labels_equivalent_tmp.add(subj_label + "," +
                                                       obj_label)
            else:
                ontology_labels_nodes[subj_label + "," +
                                      obj_label] = Node(subj_label + "," +
                                                        obj_label)
                ontology_labels_equivalent_tmp.add(subj_label + "," +
                                                   obj_label)

    for s, p, o in graph.triples(
        (None, URIRef("http://www.w3.org/2000/01/rdf-schema#subPropertyOf"),
         None)):
        #print s, " -> ", p, " -> ", o

        if "http://" in s and "http://" in o:

            subj_label = str(s.split("#")[1])
            obj_label = str(o.split("#")[1])

            subj_node_name = ""
            obj_node_name = ""
            for node_label in ontology_labels_equivalent_tmp:
                if subj_label in node_label.split(","):
                    subj_node_name = node_label
                    continue
                if obj_label in node_label.split(","):
                    obj_node_name = node_label
                    continue
            if subj_node_name and obj_node_name:
                ontology_labels_nodes[
                    subj_node_name].parent = ontology_labels_nodes[
                        obj_node_name]

            if subj_label not in ontology_labels_equivalent and obj_label not in ontology_labels_equivalent:

                if subj_label not in ontology_labels_nodes:
                    ontology_labels_nodes[subj_label] = Node(subj_label)
                if obj_label not in ontology_labels_nodes:
                    ontology_labels_nodes[obj_label] = Node(obj_label)

                ontology_labels_nodes[
                    subj_label].parent = ontology_labels_nodes[obj_label]

            if subj_label in ontology_labels_equivalent and obj_label not in ontology_labels_equivalent:
                if obj_label not in ontology_labels_nodes:
                    ontology_labels_nodes[obj_label] = Node(obj_label)

                # retrieve subj node
                for node_label in ontology_labels_nodes.keys():
                    if subj_label in node_label.split(","):
                        ontology_labels_nodes[
                            node_label].parent = ontology_labels_nodes[
                                obj_label]

            if subj_label not in ontology_labels_equivalent and obj_label in ontology_labels_equivalent:
                if subj_label not in ontology_labels_nodes:
                    ontology_labels_nodes[subj_label] = Node(subj_label)

                # retrieve obj node
                for node_label in ontology_labels_nodes.keys():
                    if obj_label in node_label.split(","):
                        ontology_labels_nodes[
                            subj_label].parent = ontology_labels_nodes[
                                node_label]

    tree_list = []
    for node_label in ontology_labels_nodes:
        if ontology_labels_nodes[node_label].is_root:
            tree_list.append(ontology_labels_nodes[node_label])
    return tree_list, ontology_labels_equivalent_tmp
예제 #22
0
def create_vocab_statusfile(userid, vocabprefix, vocabfile, baseuri, update=False, using_uuid=False, refvocab=False):
    vocab_uri = URIRef("http://vocab.ox.ac.uk/%s"%vocabprefix)
    vocabdir = os.path.join(ag.vocabulariesdir, str(vocabprefix))
    vocabstatusfile = os.path.join(vocabdir, "status.rdf")
    vocab_file_name = os.path.basename(vocabfile)
    vocabfile_uri = URIRef("http://vocab.ox.ac.uk/%s/%s"%(vocabprefix, vocab_file_name))

    #Add vocab in mediator file
    graph = Graph()
    mediatorfile = os.path.join(ag.mediatorsdir, '%s.rdf'%userid)
    graph.parse(mediatorfile)
    user_uri = []
    for uri in graph.subjects(namespaces['foaf']['account'], Literal(userid)):
        if not uri in user_uri:
            user_uri.append(uri)
    user_uri = URIRef(user_uri[0])
    graph.add((vocab_uri, namespaces['dcterms']['mediator'], URIRef(user_uri)))
    rdf_str = None
    rdf_str = graph.serialize()
    f = codecs.open(mediatorfile, 'w', 'utf-8')
    f.write(rdf_str)
    f.close()

    #Add vocab in vocab status file
    graph = Graph()
    if update and os.path.isfile(vocabstatusfile):
        graph.parse(vocabstatusfile)
    for prefix, url in namespaces.iteritems():
        graph.bind(prefix, URIRef(url))
    graph.add((vocab_uri, namespaces['dcterms']['mediator'], URIRef(user_uri)))
    graph.add((user_uri, namespaces['foaf']['account'], Literal(userid)))
    graph.add((vocab_uri, namespaces['dcterms']['hasFormat'], URIRef(vocabfile_uri)))
    graph.add((vocab_uri, namespaces['vann']['preferredNamespaceUri'], URIRef(baseuri)))
    graph.add((vocab_uri, namespaces['vann']['preferredNamespacePrefix'], Literal(vocabprefix)))
    graph.add((vocab_uri, namespaces['skos']['editorialNote'], Literal(vocab_editorial_descriptions[0])))
    if refvocab:
        add_ref_vocab(vocabprefix, refvocab)
        graph.add((vocab_uri, namespaces['dcterms']['isVersionOf'], URIRef(refvocab)))
    # get mimetype of file
    if os.path.isfile(vocabfile):
        graph.add((vocabfile_uri, namespaces['nfo']['fileUrl'], Literal('file://%s'%vocabfile)))
        graph.add((vocabfile_uri, namespaces['nfo']['fileName'], Literal(vocab_file_name)))
        mt = None
        if check_rdf(vocabfile):
            mt = 'application/rdf+xml'
            graph.add((vocabfile_uri, namespaces['dcterms']['conformsTo'], Literal(mt)))
            graph.add((vocabfile_uri, namespaces['skos']['editorialNote'], Literal(vocab_editorial_descriptions[3])))
        elif check_n3(vocabfile):
            mt = 'text/rdf+nt'
            root, ext = os.path.splitext(vocabfile)
            if ext == '.rdf':
                rdffile = "%s_2.rdf"%root
            else:
                rdffile = "%s.rdf"%root
            converttordf = convert_n3_rdf(vocabfile, rdffile)
            if converttordf and os.path.isfile(rdffile):
                rdf_file_name = os.path.basename(rdffile)
                rdffile_uri = URIRef("http://vocab.ox.ac.uk/%s/%s"%(vocabprefix, rdf_file_name))
                graph.add((vocab_uri, namespaces['dcterms']['hasFormat'], URIRef(rdffile_uri)))
                graph.add((rdffile_uri, namespaces['nfo']['fileUrl'], Literal('file://%s'%rdffile)))
                graph.add((rdffile_uri, namespaces['nfo']['fileName'], Literal(rdf_file_name)))
                graph.add((rdffile_uri, namespaces['dcterms']['conformsTo'], Literal('application/rdf+xml')))
                graph.add((rdffile_uri, namespaces['skos']['editorialNote'], Literal(vocab_editorial_descriptions[3])))
                graph.add((rdffile_uri, namespaces['dcterms']['format'], Literal('application/rdf+xml')))
        else:
            mt1 = mimetypes.guess_type(vocabfile)
            mt2 = get_file_mimetype(vocabfile)
            if mt1[0]:
                mt = mt1[0]
            else:
                mt = mt2
            if str(mt) == 'application/rdf+xml':
                graph.add((vocabfile_uri, namespaces['skos']['editorialNote'], Literal(vocab_editorial_descriptions[2])))
            else:
                graph.add((vocab_uri, namespaces['skos']['editorialNote'], Literal(vocab_editorial_descriptions[1])))
        if mt:
            graph.add((vocabfile_uri, namespaces['dcterms']['format'], Literal(mt)))
    rdf_str = None
    rdf_str = graph.serialize()
    f = codecs.open(vocabstatusfile, 'w', 'utf-8')
    f.write(rdf_str)
    f.close()
    return True
예제 #23
0
import sys

from rdflib import Graph, URIRef

rdffile = sys.argv[1]
datafile = sys.argv[2]
tblfile = datafile.rstrip("csv").rstrip(".") + "out" + ".csv"

g = Graph()
g.load(rdffile)

for ns in g.namespaces():
    if ns[0] == "ex":
        pfx = ns[1]

syn = URIRef(pfx + "synonym")
isa = URIRef(pfx + "is_a")

with open(datafile, 'r') as f:
    reader = csv.reader(f)
    data = list(reader)

for item in data:
    pi = pfx + item[0]
    ur = URIRef(pi)

    alt = g.subjects(None, ur)
    for sn in alt:
        sub = g.subjects(syn, sn)
        for s in sub:
            item.append(s.lstrip(pfx))
예제 #24
0
from rdflib import Graph, Literal, Namespace, RDF, RDFS, BNode, URIRef
from rdflib.namespace import FOAF, RDFS, OWL, XSD
import csv
import spotlight
import time
import re
graph = Graph()
graph.parse("rdfSchema.ttl", format="n3")
UNIV = Namespace("http://example.org/schema#")
g = Graph()
u = Namespace("http://example.org/university/")
g.add((u.Concordia, RDF.type, UNIV.University))
g.add((u.Concordia, UNIV.hasName, Literal("Concordia University")))
g.add((u.Concordia, UNIV.hasDBPediaLink,
       URIRef("http://dbpedia.org/page/Concordia_University")))

with open('data.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    line_count = 0
    for row in csv_reader:
        if row and len(row[0]) > 0:
            try:
                subject = " ".join(row[0].split()[0].split())
                no = " ".join(row[0].split()[1].split())
                # no= re.sub("\s\s+", " ", row[0].split()[1])
                name = " ".join(row[1].split())
                description = " ".join(row[2].split())
            except:
                no = subject[4:]
                subject = subject[0:4]
            x = URIRef(f"http://example.org/course/{subject}+{no}")
                "disambiguated-organization", {})
                         or {}).get("disambiguated-organization-identifier"))
            log.debug(u'Affiliation: {}, ID: {}'.format(
                organization, ringgold))

            title = affiliation["role-title"] or {}
            log.debug(u'Position title: {}'.format(title))

            start_year = (affiliation["start-date"] or {}).get("year",
                                                               {}).get("value")
            end_year = (affiliation["end-date"] or {}).get("year",
                                                           {}).get("value")
            log.debug(u'Date range: {} - {}'.format(start_year, end_year))

            if affiliation["type"] == "EDUCATION":
                relatedBy = g_profile.objects(URIRef(uri), OBO.RO_0000056)
            else:
                relatedBy = g_profile.objects(URIRef(uri), VIVO.relatedBy)

            position_exists = False
            for obj in relatedBy:
                log.debug(u'{} vs. {}'.format(put_code, obj))
                if put_code in obj:
                    log.debug(u'The position is already in VIVO, skipping.')
                    position_exists = True
                    break

            if not position_exists:
                # Look up URI based on Ringgold ID
                org_uri = (g_orgs.value(predicate=VLOCAL.ringgoldID,
                                        object=Literal(ringgold)))
예제 #26
0
def addGameTriples(graph, ontURI):
    with open('../Data/games.csv', 'r') as csvfile:
        csv_reader = csv.reader(csvfile, delimiter=',')
        flag = True

        for row in csv_reader:
            if flag:
                flag = False
                continue

            # Game
            title = formatURI(str(row[0]))
            graph.add((URIRef(ontURI + title), RDF.type, URIRef(ontURI + "#Game")))

            # Title
            graph.add((URIRef(ontURI + title), URIRef(ontURI + "#game_name"), Literal(str(row[0]))))

            # Platform
            if row[1]:
                graph.add((URIRef(ontURI + title), URIRef(ontURI + "#platform"), Literal(str(row[1]))))

            # Year
            if row[3]:
                graph.add((URIRef(ontURI + title), URIRef(ontURI + "#game_year"), Literal(row[3])))

            # Genre
            if row[4]:
                graph.add((URIRef(ontURI + title), URIRef(ontURI + "#game_genre"), Literal(str(row[4]))))

            # ESRB
            if row[6]:
                graph.add((URIRef(ontURI + title), URIRef(ontURI + "#esrb"), Literal(str(row[6]))))

            # Sales
            if row[7]:
                graph.add((URIRef(ontURI + title), URIRef(ontURI + "#na_sales"), Literal(float(row[7]))))

            if row[8]:
                graph.add((URIRef(ontURI + title), URIRef(ontURI + "#eu_sales"), Literal(float(row[8]))))

            if row[9]:
                graph.add((URIRef(ontURI + title), URIRef(ontURI + "#jp_sales"), Literal(float(row[9]))))

            if row[10]:
                graph.add((URIRef(ontURI + title), URIRef(ontURI + "#glob_sales"), Literal(float(row[10]))))

            # Rating
            if row[11]:
                graph.add((URIRef(ontURI + title), URIRef(ontURI + "#critic_score"), Literal(float(row[11]) % 10)))

            if row[12]:
                graph.add((URIRef(ontURI + title), URIRef(ontURI + "#user_score"), Literal(float(row[12]))))

            # Publisher
            if row[2]:
                pub = formatURI(str(row[2]))
                graph.add((URIRef(ontURI + pub), RDF.type, URIRef(ontURI + "#GamePublisher")))

                graph.add((URIRef(ontURI + pub), URIRef(ontURI + "#org_name"), Literal(str(row[2]))))

                graph.add((URIRef(ontURI + title), URIRef(ontURI + "#publishedBy"), URIRef(ontURI + pub)))

            # Developer
            if row[5]:
                for elem in row[5].split(", "):
                    dev = formatURI(elem)
                    graph.add((URIRef(ontURI + dev), RDF.type, URIRef(ontURI + "#GameDeveloper")))

                    graph.add((URIRef(ontURI + dev), URIRef(ontURI + "#org_name"), Literal(elem)))

                    graph.add((URIRef(ontURI + title), URIRef(ontURI + "#developedBy"), URIRef(ontURI + dev)))
 def test_distribution_dct_format_other_uri(self):
     resources = self._build_and_parse_format_mediatype_graph(
         format_item=URIRef("https://example.com/my/format"))
     eq_(u'https://example.com/my/format', resources[0].get('format'))
     eq_(None, resources[0].get('mimetype'))
예제 #28
0
#! /usr/bin/python
# makeTriples.py: demonstrate the creation of an RDFLib TripleStore
#******************************************************************
#           Ejercicio para probar metodos de la clase Graph  ******
#             -objects, predicates, subjects                 ******
#******************************************************************

from rdflib import Namespace, BNode, Literal, URIRef
from rdflib import Graph, ConjunctiveGraph
from rdflib.store.IOMemory import IOMemory

ns = Namespace("http://love.com#")

mary = URIRef("http://love.com/lovers/mary#")
john = URIRef("http://love.com/lovers/john#")

cmary = URIRef("http://love.com/lovers/mary#")
cjohn = URIRef("http://love.com/lovers/john#")

store = IOMemory()

g = ConjunctiveGraph(store=store)
g.bind("love", ns)

gmary = Graph(store=store, identifier=cmary)

gmary.add((mary, ns['hasName'], Literal("Mary")))
gmary.add((mary, ns['loves'], john))

gjohn = Graph(store=store, identifier=cjohn)
gjohn.add((john, ns['hasName'], Literal("John")))
예제 #29
0
# -*- coding: utf-8 -*-

# Copyright (C) 2019  David Arroyo Menéndez

# Author: David Arroyo Menéndez <*****@*****.**>
# Maintainer: David Arroyo Menéndez <*****@*****.**>

# This file is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.

# This file is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with urirefs; see the file LICENSE.  If not, write to
# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
# Boston, MA 02110-1301 USA,

# URIRef is the class to manage uris in rdflib

from rdflib import URIRef
aref = URIRef('')
print(aref)
aref = URIRef('http://example.com')
print(aref)
print(aref.n3())
예제 #30
0
from rdflib.namespace import RDF, RDFS, FOAF, OWL, XSD, DC, DCTERMS
import json
from linking import link
import os
path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
data = json.load(open(path + "/DatiPerElaborazione/Bar.geojson", "r"))
g = Graph()

cmo = Namespace("http://www.comune.milano.it/ontology/")
schema = Namespace("https://schema.org/")
g.bind("cmo", cmo)
g.bind("schema", schema)

for element in data:
    uri = element["URI"]
    g.add([URIRef(uri), RDF.type, cmo.Bar])
    g.add([URIRef(uri), RDFS.label, Literal(element["nome"])])
    g.add([URIRef(uri), cmo.localBusinessWebsite, Literal(element["website"])])
    g.add([URIRef(uri), cmo.localBusinessPostalCode, Literal(element["cap"])])
    g.add([URIRef(uri), schema.address, Literal(element["indirizzo"])])
    g.add([
        URIRef(uri), cmo.latitude,
        Literal(element["lat"], datatype=XSD.float)
    ])
    g.add([
        URIRef(uri), cmo.longitude,
        Literal(element["long"], datatype=XSD.float)
    ])

g.serialize(destination=path + '/Turtles/bar.ttl', format='turtle')