data = csv.reader( open('C:\\Users\\Maral\\Desktop\\Assignments responsible persons_Maral.csv' )) for row in data: uri = row[0] print(uri) jlu = generator_persons(uri) print(jlu) if jlu != 'error': if row[3].strip() != 'None': graph.add((URIRef(jlu), owl.sameAs, (URIRef(row[3])))) if row[4].strip() != 'None': gnd = row[4].rsplit('/', 1)[1] graph.add((URIRef(jlu), owl.sameAs, (URIRef(row[4])))) graph.add((URIRef(jlu), gndo.gndIdentifier, (Literal(gnd)))) urisame = parse.unquote(uri) graph.add((URIRef(jlu), owl.sameAs, (URIRef(urisame)))) graph.serialize(destination='Haskala.ttl', format="turtle")
import csv from rdflib import Graph, Literal, Namespace, URIRef from rdflib.namespace import DCTERMS, RDF, RDFS, SKOS, XSD input_file = csv.DictReader(open("test_sheet.csv")) # make a graph for row in input_file: # convert it from an OrderedDict to a regular dict row = dict(row) # print(row) #{'Subject Label': 'Pearl Wilmer Booker', 'Subject URI': 'None', 'Predicate Label': 'Daughter Of', 'Predicate URI': '', 'Predicate Symmetry': 'Asymmetric', 'Object Label': 'Mary Booker', 'Object URI': 'None'} # make a literal and add it # output_graph.add( (URIRef(row['Subject URI']), RDFS.label, Literal(row['Subject Label'], lang='en')) ) output_graph = Graph() # make a triple with the object as uri output_graph.add((URIRef(row['Subject URI']), URIRef(row['Predicate URI']), URIRef(row['Object URI']))) triple = output_graph.serialize(format='nt') triple = str(triple, 'utf-8').strip() triple = triple.replace('.', '') triple = f"{triple} <{row['Context']}> ." print(triple) # output_graph.serialize(destination='my_graph.nt', format='nt')
def urijoin(item): base, ext = item return URIRef(Namespace(base)[ext])
def reports_rdf(): g = Graph() count = [0] properties = {} get_schema_items(schema.ReportSchema(), properties, count) get_schema_items(schema.SerisReviewSchema(), properties, count) export = get_reports() bibo = Namespace('http://uri.gbv.de/ontology/bibo/') nao = Namespace( 'http://www.semanticdesktop.org/ontologies/2007/08/15/nao#') theme = Namespace('http://www.eea.europa.eu/themes/') bibtex = Namespace('http://purl.org/net/nknouf/ns/bibtex#') seris = Namespace( 'http://forum.eionet.europa.eu/nrc-state-environment/seris/ontology/' 'schema#') for entry in export: current_id = entry['report_id'] current_uri = flask.url_for('views.report_view', report_id=current_id, _external=True) node = URIRef(current_uri) g.add((node, RDF.type, seris.SERISReport)) g.add((node, DCTERMS.identifier, Literal(current_id))) for region in entry['header_region']: item = BNode() g.add((node, DCTERMS.spatial, item)) g.add((item, RDF.type, DCTERMS.Location)) g.add((item, RDFS.label, Literal('Region of report'))) g.add((item, DCTERMS.subject, Literal(region))) for country in entry['header_country']: g.add((node, DC.coverage, Literal(country))) if 'header_subregion' in entry.keys(): for subregion in entry['header_subregion']: item = BNode() g.add((node, DCTERMS.spatial, item)) g.add((item, RDF.type, DCTERMS.Location)) g.add((item, RDFS.label, Literal('Subregion of country'))) g.add((item, DCTERMS.subject, Literal(subregion))) if entry['header_soer_cover']: g.add((node, DCTERMS.source, Literal(entry['header_soer_cover']))) if entry['details_original_name']: g.add((node, DC.title, Literal(entry['details_original_name']))) for language in entry['details_original_language']: g.add((node, DC.language, Literal(language))) lang_id = 0 lang_field = 'details_translated_in_%s' % lang_id while lang_field in entry.keys(): item = BNode() g.add((node, DCTERMS.language, item)) g.add((item, RDF.type, DCTERMS.LinguisticSystem)) g.add((item, RDFS.label, Literal('Language in which the report was translated'))) g.add((item, DCTERMS.subject, Literal(entry[lang_field]))) lang_id += 1 lang_field = 'details_translated_in_%s' % lang_id if entry['details_english_name']: g.add((node, DC.title, Literal(entry['details_english_name'], lang="en"))) if entry['details_publisher']: g.add((node, DC.publisher, Literal(entry['details_publisher']))) if entry['format_report_type']: g.add((node, DC.type, Literal(entry['format_report_type']))) if entry['format_date_of_publication']: g.add((node, DCTERMS.issued, Literal(entry['format_date_of_publication']))) if entry['format_freq_of_pub']: item = BNode() g.add((node, DCTERMS.accrualPeriodicity, item)) g.add((item, RDF.type, DCTERMS.Frequency)) g.add((item, RDFS.label, Literal('Frequency of publication'))) g.add((item, RDF.value, Literal(entry['format_freq_of_pub']))) if entry['format_date_of_last_update']: g.add((node, DCTERMS.modified, Literal(entry['format_date_of_last_update']))) if entry['format_freq_of_upd']: item = BNode() g.add((node, DCTERMS.accrualPeriodicity, item)) g.add((item, RDF.type, DCTERMS.Frequency)) g.add((item, RDFS.label, Literal('Frequency of update'))) g.add((item, RDF.value, Literal(entry['format_freq_of_upd']))) if entry['format_no_of_pages']: g.add((node, bibo.numpages, Literal(entry['format_no_of_pages']))) if entry['format_size']: item = BNode() g.add((node, DCTERMS.extent, item)) g.add((item, RDF.type, DCTERMS.SizeOrDuration)) g.add((item, RDFS.label, Literal('Size in MBytes'))) g.add((item, RDF.value, Literal(entry['format_size']))) if entry['format_availability_paper_or_web']: g.add((node, DC['format'], Literal(entry['format_availability_paper_or_web']))) if entry['format_availability_url']: g.add((node, bibtex.hasURL, Literal(entry['format_availability_url']))) if entry['format_availability_registration_required']: g.add( (node, DC.rights, Literal(entry['format_availability_registration_required']))) if entry['format_availability_costs']: g.add((node, RDFS.comment, Literal('(cost)' + entry['format_availability_costs']))) if 'links_target_audience' in entry.keys(): for audience in entry['links_target_audience']: item = BNode() g.add((node, DCTERMS.audience, item)) g.add((item, RDF.type, DCTERMS.AgentClass)) g.add((item, RDFS.label, Literal('Target audience'))) g.add((item, RDF.value, Literal(audience))) if entry['links_legal_reference']: item = BNode() g.add((node, DCTERMS.conformTo, item)) g.add((item, RDF.type, DCTERMS.Standard)) g.add((item, RDFS.label, Literal('Legal reference'))) g.add((item, RDF.value, Literal(entry['links_legal_reference']))) if entry['links_explanatory_text']: g.add((node, bibo.shortDescription, Literal(entry['links_explanatory_text']))) topics = { 'env_issues': [ 'air', 'biodiversity', 'chemicals', 'climate', 'human', 'landuse', 'natural', 'noise', 'soil', 'waste', 'water', 'other_issues' ], 'sectors_and_activities': [ 'agriculture', 'energy', 'fishery', 'households', 'industry', 'economy', 'tourism', 'transport' ], 'across_env': ['technology', 'policy', 'scenarios'], 'env_regions': ['coast_sea', 'regions', 'urban'] } for key in topics.keys(): for topic in topics[key]: focus = 'topics_' + key + '_' + topic + '_focus' indicators = 'topics_' + key + '_' + topic + '_indicators' current_item = "http://www.eea.europa.eu/themes/%(topic)s" % { "topic": topic } item = BNode() if (entry[focus] or entry[indicators]): g.add((node, nao.hasTopic, item)) g.add((item, RDFS.label, Literal(topic))) g.add((item, RDF.type, bibtex.Entry)) g.add((item, bibtex.hasURL, Literal(current_item))) if entry[focus]: g.add( (item, seris.hasFocusValue, Literal(entry[focus]))) if entry[indicators]: g.add((item, seris.hasIndicatorCount, Literal(entry[indicators]))) topic = 'topics_' + key + '_extra_topic_extra_topic_input' focus = 'topics_' + key + '_extra_topic_other_radio_focus' indicators = ('topics_' + key + '_extra_topic_other_radio_indicators') if entry[topic]: item = BNode() g.add((node, nao.hasTopic, item)) g.add((item, RDFS.label, Literal(entry[topic]))) g.add((item, RDF.type, bibtex.Entry)) if entry[focus]: g.add((item, seris.hasFocusValue, Literal(entry[focus]))) if entry[indicators]: g.add((item, seris.hasIndicatorCount, Literal(entry[indicators]))) if entry['structure_indicator_based']: item = BNode() g.add((node, seris.structure, item)) g.add((item, RDFS.label, Literal('indicator based'))) if entry['structure_indicators_estimation']: g.add((item, RDF.value, Literal(entry['structure_indicators_estimation']))) usage = '' if entry['structure_indicators_usage_to_assess_progress']: usage += entry['structure_indicators_usage_to_assess_progress'] usage += ' to assess progress to target/treshold.' if entry['structure_indicators_usage_to_compare_countries']: usage += entry[ 'structure_indicators_usage_to_compare_countries'] usage += ' to compare with other countries/EU.' if entry['structure_indicators_usage_to_compare_subnational']: usage += entry[ 'structure_indicators_usage_to_compare_subnational'] usage += ' to compare at subnational level.' if entry['structure_indicators_usage_to_compare_eea']: usage += entry['structure_indicators_usage_to_compare_eea'] usage += ' to relate with EEA/EU developments.' if entry['structure_indicators_usage_to_compare_global']: usage += entry['structure_indicators_usage_to_compare_global'] usage += ' to relate to global developments.' if entry['structure_indicators_usage_to_evaluate']: usage += entry['structure_indicators_usage_to_evaluate'] usage += ' to rank/evaluate.' if entry['structure_indicators_usage_evaluation_method']: usage += 'evaluation method: ' usage += entry[ 'structure_indicators_usage_evaluation_method'] if usage: g.add((item, SKOS.scopeNote, Literal(usage))) if entry['structure_policy_recommendations']: g.add((node, seris.policyRecommendationsQuantifier, Literal(entry['structure_policy_recommendations']))) if entry['structure_reference']: quantifier = entry['structure_reference'] if quantifier[0] == 'N': quantifier = 'No' text = '[%s] DPSIR framework used' % quantifier g.add((node, DCTERMS.references, Literal(text))) if entry['short_description']: g.add((node, DCTERMS.description, Literal(entry['short_description']))) if entry['table_of_contents']: g.add((node, DCTERMS.tableOfContents, Literal(entry['table_of_contents']))) g.bind("dcterms", DCTERMS) g.bind("dc", DC) g.bind("bibo", bibo) g.bind("foaf", FOAF) g.bind("nao", nao) g.bind("theme", theme) g.bind("bibtex", bibtex) g.bind("skos", SKOS) g.bind("rdfs", RDFS) g.bind("seris", seris) return flask.Response(g.serialize(format='xml'), mimetype='text/xml')
annotationItemCntr = 1 annotationBodyCntr = 1 annotationEvidenceCntr = 1 annotatedCache = { } # indexes annotation ids so that multiple bodies can be attached currentAnnotation = annotationItemCntr currentAnnotSet = 'ohdsi-eu-spc-annotation-set-%s' % annotationSetCntr annotationSetCntr += 1 graph.add((poc[currentAnnotSet], RDF.type, oa["DataAnnotation"] )) # TODO: find out what is being used for collections in OA graph.add( (poc[currentAnnotSet], oa["annotatedAt"], Literal(datetime.date.today()))) graph.add((poc[currentAnnotSet], oa["annotatedBy"], URIRef(u"http://www.pitt.edu/~rdb20/triads-lab.xml#TRIADS"))) outf = codecs.open(OUTPUT_FILE, "w", "utf8") s = graph.serialize(format="n3", encoding="utf8", errors="replace") outf.write(s) # DEBUG cntr = 0 inf = open(DATA_FILE, 'r') buf = inf.read() inf.close() lines = buf.split("\n") it = [unicode(x.strip(), 'utf-8', 'replace').split("\t") for x in lines[1:]] # skip header for elt in it:
# import glob # input_files = glob.glob("./split_*.ttl") KG = ConjunctiveGraph() KG.parse(filename, format="turtle") # index of biotools {IDs: rdf KG} index = {} for s, p, o in KG.triples((None, RDF.type, schema.SoftwareApplication)): index[str(s)] = None for bio_tools_Id in tqdm(index.keys()): sub_graph = ConjunctiveGraph() for s, p, o in KG.triples((URIRef(bio_tools_Id), None, None)): sub_graph.add((s, p, o)) index[bio_tools_Id] = sub_graph # for each index, FAIR evaluation of all entries df = pd.DataFrame() df_time = pd.DataFrame() evals, exec_time = mass_eval(index) df = pd.concat([df, pd.DataFrame.from_records(evals)]) df_time = pd.concat([df_time, pd.DataFrame.from_records(exec_time)]) head, tail = path.split(filename) df.to_csv("../results/biotools_all/FC_results_" + tail + ".csv") df_time.to_csv("../results/biotools_all/exec_time_" + tail + ".csv")
def get_RDF(bio_tools_Id): sub_graph = ConjunctiveGraph() for s, p, o in KG.triples((URIRef(bio_tools_Id), None, None)): sub_graph.add((s, p, o)) print(sub_graph.serialize(format="turtle"))
def createLine(line): # singleLine = URIRef( 'http://data.linkedevents.org/transit/london/subwayRoute/' + Literal(line).replace(" ", "")) return singleLine
def createAddress(addressGUID): singleAddress = URIRef( ('http://data.linkedevents.org/location/%s/address') % addressGUID) return singleAddress
def context_id(self, uri, context_id=None): """URI#context""" uri = uri.split("#", 1)[0] if context_id is None: context_id = "#context" return URIRef(context_id, base=uri)
def createStationGeom(stopGUID): #createTubeSGraph stationGeom = URIRef(createStation(stopGUID) + '/geometry') return stationGeom
from rdflib.namespace import RDF, RDFS, FOAF, OWL, XSD, DC, DCTERMS import json from linking import link import os path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) data = json.load(open(path + "/DatiPerElaborazione/Monument.geojson", "r")) g = Graph() cmo = Namespace("http://www.comune.milano.it/ontology/") schema = Namespace("https://schema.org/") g.bind("cmo", cmo) g.bind("schema", schema) for element in data: uri = element["URI"] g.add([URIRef(uri), RDF.type, schema.TouristAttraction]) g.add([URIRef(uri), RDFS.label, Literal(element["nome"], lang='it')]) g.add( [URIRef(uri), cmo.touristAttractionAuthor, Literal(element["autore"])]) g.add([ URIRef(uri), cmo.touristAttractionDefinition, Literal(element["definizione"], lang='it') ]) g.add([URIRef(uri), schema.description, Literal(element["abstract"])]) g.add([ URIRef(uri), cmo.latitude, Literal(element["lat"], datatype=XSD.float) ]) g.add([ URIRef(uri), cmo.longitude,
def createrdf(filepath, text_dir, year, conference, platform, entity_map, consolidatedGraph): config = yaml.safe_load(open('../../conf/conf.yaml')) model_dir = config['MODEL_PATH'] triple_list = [] dcc_namespace = "https://github.com/deepcurator/DCC/" # print(row['paper_title'],row['paper_link'],row['conference'], row['year'], row['Platform']) filename = filepath.split('/')[-1] if(filename.endswith('.pdf')): filename = filename.split('.pdf')[0] elif(filename.endswith('.html')): filename = filename.split('.html')[0] ## filename will act as a unique URI to connect all the three graphs filesubject = dcc_namespace + filename # consolidatedGraph consolidatedGraph.add((URIRef(filesubject),RDF.type,URIRef(dcc_namespace + "Publication"))) triple_list.append(filename + " isa " + "Publication") year = Literal(year) conference = Literal(conference) platform = Literal(platform) consolidatedGraph.add((URIRef(filesubject),URIRef(dcc_namespace + "yearOfPublication"),year )) consolidatedGraph.add((URIRef(filesubject),URIRef(dcc_namespace + "conferenceSeries"),conference )) consolidatedGraph.add((URIRef(filesubject),URIRef(dcc_namespace + "platform"),platform )) # Just the triple list triple_list.append(filename + " year_of_publication " + str(year)) triple_list.append(filename + " conference_series " + str(conference)) triple_list.append(filename + " platform " + str(platform)) textfilename = text_dir + filename + ".txt" #load the spacy nlp model nlp = spacy.load(model_dir) sents = nltk.sent_tokenize(getabstract(textfilename)) entity_dict = {} for sentence in sents: ner_tagged = nlp(sentence) tagged_entities = ner_tagged.ents for entity in tagged_entities: # print(entity.text, entity.label_) if entity.text not in entity_dict: entity_dict[entity.text] = entity.label_ for entitytext, entitylabel in entity_dict.items(): entitytext = entitytext.replace(" ",'_') if(entitytext in entity_map): csovalue = entity_map[entitytext] str_value = str(csovalue) if("cso" in str_value): consolidatedGraph.add((URIRef(filesubject + "_" + entitytext),URIRef(dcc_namespace + "hasCSOEquivalent"),csovalue)) # print("CSO label found for entity text : " + entitytext + " : and value is " + entity_map[entitytext]) # print(entitytext) # print(filesubject + "_" + entitytext) consolidatedGraph.add((URIRef(filesubject + "_" + entitytext),RDF.type,URIRef(dcc_namespace + entitylabel))) consolidatedGraph.add((URIRef(filesubject),URIRef(dcc_namespace + "hasEntity"),URIRef(filesubject + "_" + entitytext))) textLiteral = Literal(entitytext) consolidatedGraph.add((URIRef(filesubject + "_" + entitytext),URIRef(dcc_namespace + 'hasText'),textLiteral)) triple_list.append(entitytext + " isa " + entitylabel) # triple_list.append(filename + " has entity " + ) print("Done with file " + filename) return(filename, triple_list)
def generator_persons(newuri): preflabel = '' altlabel = '' viaf = '' uriname = '' same = '' pb = '' pd = '' dd = '' db = '' personpage = urllib2.urlopen(newuri) soup = BeautifulSoup(personpage) personnode = soup.findAll('div', attrs={"class": "node node-person"}) print(personnode) if personnode == []: return ('error') name = str(personnode[0].find('h1').string) if '-' in name: gernamefield = name.rsplit('-', 1)[0].strip() preflabel = gernamefield altlabel = name.rsplit('-', 1)[1].strip() else: preflabel = name.strip() uriname = preflabel.replace(' ', '_') uriname = uriname.replace(',', '') uriname = uriname.replace('"', '') uriname = uriname.replace('\'', '') print(uriname) jluri = 'http://data.judaicalink.org/data/haskala/' + uriname graph.add((URIRef(jluri), RDF.type, foaf.Person)) graph.add((URIRef(jluri), skos.prefLabel, (Literal(preflabel)))) if altlabel != '': graph.add((URIRef(jluri), skos.altLabel, (Literal(altlabel)))) book = soup.findAll('div', attrs={"class": "book-title"}) print(book) if book != []: for i in range(0, len(book)): bookuri = 'https://www.haskala-library.net/' + book[i].find( 'a').get('href') bookuri = parse.unquote(bookuri) print(bookuri) graph.add((URIRef(jluri), jl.hasPublication, (URIRef(bookuri)))) else: bookuri = '' return (jluri)
def CreateBIDSParticipantFile(nidm_graph, output_file, participant_fields): ''' Creates participant file based on requested fields :param nidm_graph: :param output_directory: :param fields: :return: ''' print("Creating participants.json file...") fields = ["participant_id"] #fields.extend(participant_fields) participants = pd.DataFrame(columns=fields, index=[1]) participants_json = {} #for each Constants.NIDM_SUBJECTID in NIDM file row_index = 1 for subj_uri, subj_id in nidm_graph.subject_objects( predicate=URIRef(Constants.NIDM_SUBJECTID.uri)): #adding subject ID to data list to append to participants data frame participants.loc[row_index, 'participant_id', ] = subj_id #for each of the fields in the participants list for fields in participant_fields: #if field identifier isn't a proper URI then do a fuzzy search on the graph, else an explicit search for the URL if (validators.url(fields)): #then this is a valid URI so simply query nidm_project document for it for subj, obj in nidm_graph.subject_objects(predicate=URIRef( BIDS_Constants.participants[fields].uri)): #add row to the pandas data frame #data.append(obj) participants.loc[ row_index, BIDS_Constants.participants[fields].uri] = obj # find Data Element and add metadata to participants_json dictionary else: #text matching task, remove basepart of URIs and try to fuzzy match the field in the part_fields parameter string #to the "term" part of a qname URI...this part let's a user simply ask for "age" for example without knowing the #complete URI....hopefully # #This needs to be a more complex query: # Step(1): For subj_uri query for prov:Activity that were prov:wasAttributedTo subj_uri # Step(2): Query for prov:Entity that were prov:wasGeneratedBy uris from Step(1) # Step(3): For each metadata triple in objects whose subject is uris from Step(2), fuzzy match predicate after # removing base of uri to "fields" in participants list, then add these to data list for appending to pandas match_ratio = {} # #Steps(1):(3) query = """ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX prov: <http://www.w3.org/ns/prov#> PREFIX onli: <http://neurolog.unice.fr/ontoneurolog/v3.0/instrument.owl#> PREFIX sio: <http://semanticscience.org/ontology/sio.owl#> PREFIX niiri: <http://iri.nidash.org/> SELECT DISTINCT ?pred ?value WHERE { ?asses_activity prov:qualifiedAssociation ?_blank . ?_blank rdf:type prov:Association ; prov:agent <%s> ; prov:hadRole sio:Subject . ?entities prov:wasGeneratedBy ?asses_activity ; rdf:type onli:assessment-instrument ; ?pred ?value . FILTER (regex(str(?pred) ,"%s","i" )) }""" % (subj_uri, fields) #print(query) qres = nidm_graph.query(query) for row in qres: #use last field in URIs for short column name and add full URI to sidecar participants.json file url_parts = urllib.parse.urlsplit(row[0], scheme='#') if url_parts.fragment == '': #do some parsing of the path URL because this particular one has no fragments url_parts = urllib.parse.urlparse(row[0]) path_parts = url_parts[2].rpartition('/') short_name = path_parts[2] else: short_name = url_parts.fragment # find Data Element and add metadata to participants_json dictionary if 'de' not in locals(): de = GetDataElementMetadata(nidm_graph, short_name) else: de.update( GetDataElementMetadata(nidm_graph, short_name)) participants.loc[row_index, str(short_name)] = str(row[1]) #data.append(str(row[1])) #add row to participants DataFrame #participants=participants.append(pd.DataFrame(data)) participants row_index = row_index + 1 #save participants.tsv file participants.to_csv(output_file + ".tsv", sep='\t', index=False) #save participants.json file with open(output_file + ".json", 'w') as f: json.dump(participants_json, f, sort_keys=True, indent=2) # save participant sidecar file write_json_mapping_file(de, join(splitext(output_file)[0] + ".json"), True) return participants, participants_json
graph.bind('djr', djr) graph.bind('owl', owl) graph.bind('gnd', gnd) #basis_uri = 'http://dijest.technion.ac.il/book/' #basis_person_uri = 'http://dijest.technion.ac.il/person/' basis_uri = 'djr:book/' basis_person_uri = 'djr:person/' with open('mifalbibl_first_selection.csv', 'r') as csvfile: reader = csv.DictReader(csvfile) for row in reader: book_uri = basis_uri + row['uri'] creator_uri = basis_person_uri + row['entityID'] graph.add((URIRef(book_uri), RDF['type'], djo['Book'])) graph.add((URIRef(book_uri), dcterms['language'], Literal(row['dcterms:lanugage']))) book_label = row['dcterms:title'] if len(row['dcterms:created']) > 0 and row['dcterms:created'] != 'TBD': book_label = row['dcterms:title'] + ' (' + row[ 'dcterms:created'] + ')' else: book_label = row['dcterms:title'] graph.add((URIRef(book_uri), rdfs['label'], Literal(book_label))) creatorNode = BNode('author-' + row['uri']) graph.add((creatorNode, RDF['type'], djo['Person'])) graph.add((creatorNode, schema['name'], Literal(row['AuthorNoPoint'])))
def ProcessFiles(graph, scan_type, output_directory, project_location, args): ''' This function will essentially cycle through the acquisition objects in the NIDM file loaded into graph and depending on the scan_type will try and copy the image to the output_directory ''' if scan_type == Constants.NIDM_MRI_DIFFUSION_TENSOR.uri: bids_ext = 'dwi' elif scan_type == Constants.NIDM_MRI_ANATOMIC_SCAN.uri: bids_ext = 'anat' elif scan_type == Constants.NIDM_MRI_FUNCTION_SCAN.uri: bids_ext = 'func' # query NIDM document for acquisition entity "subjects" with predicate nidm:hasImageUsageType and object scan_type for acq in graph.subjects(predicate=URIRef( Constants.NIDM_IMAGE_USAGE_TYPE.uri), object=URIRef(scan_type)): # first see if file exists locally. Get nidm:Project prov:Location and append the nfo:Filename of the image # from the acq acquisition entity. If that file doesn't exist try the prov:Location in the func acq # entity and see if we can download it from the cloud # get acquisition uuid from entity uuid temp = graph.objects(subject=acq, predicate=Constants.PROV['wasGeneratedBy']) for item in temp: activity = item # get participant ID with sio:Subject role in anat_acq qualified association part_id = GetParticipantIDFromAcquisition( nidm_file_list=[args.rdf_file], acquisition=activity) # make BIDS sub directory if 'sub' in (part_id['ID'].values)[0]: sub_dir = join(output_directory, (part_id['ID'].values)[0]) else: sub_dir = join(output_directory, "sub-" + (part_id['ID'].values)[0]) sub_filename_base = "sub-" + (part_id['ID'].values)[0] if not os.path.exists(sub_dir): os.makedirs(sub_dir) # make BIDS scan type directory (bids_ext) directory if not os.path.exists(join(sub_dir, bids_ext)): os.makedirs(join(sub_dir, bids_ext)) for filename in graph.objects(subject=acq, predicate=URIRef( Constants.NIDM_FILENAME.uri)): # check if file exists for location in project_location: # if MRI exists in this location then copy and rename if isfile((location[0] + filename).lstrip("file:")): # copy and rename file to be BIDS compliant copyfile((location[0] + filename).lstrip("file:"), join(sub_dir, bids_ext, sub_filename_base + splitext(filename)[1])) continue # if the file wasn't accessible locally, try with the prov:Location in the acq for location in graph.objects(subject=acq, predicate=URIRef( Constants.PROV['Location'])): # try to download the file and rename ret = GetImageFromURL(location) if ret == -1: print( "ERROR! Can't download file: %s from url: %s, trying to copy locally...." % (filename, location)) if "file" in location: location = str(location).lstrip("file:") print("Trying to copy file from %s" % (location)) try: copyfile( location, join(output_directory, sub_dir, bids_ext, basename(filename))) except: print( "ERROR! Failed to find file %s on filesystem..." % location) if not args.no_downloads: try: print( "Running datalad get command on dataset: %s" % location) dl.Dataset(os.path.dirname(location)).get( recursive=True, jobs=1) except: print( "ERROR! Datalad returned error: %s for dataset %s." % (sys.exc_info()[0], location)) GetImageFromAWS(location=location, output_file=join( output_directory, sub_dir, bids_ext, basename(filename)), args=args) else: # copy temporary file to BIDS directory copyfile( ret, join(output_directory, sub_dir, bids_ext, basename(filename))) # if we were able to copy the image file then add the json sidecar file with additional metadata # available in the NIDM file if isfile( join(output_directory, sub_dir, bids_ext, basename(filename))): # get rest of metadata for this acquisition and store in sidecar file if "gz" in basename(filename): image_filename = splitext( splitext(basename(filename))[0])[0] else: image_filename = splitext(basename(filename))[0] AddMetadataToImageSidecar(graph_entity=acq, graph=graph, output_directory=join( output_directory, sub_dir, bids_ext), image_filename=image_filename) # if this is a DWI scan then we should copy over the b-value and b-vector files if bids_ext == 'dwi': # search for entity uuid with rdf:type nidm:b-value that was generated by activity query = """ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX prov: <http://www.w3.org/ns/prov#> PREFIX nidm: <http://purl.org/nidash/nidm#> SELECT DISTINCT ?entity WHERE { ?entity rdf:type <http://purl.org/nidash/nidm#b-value> ; prov:wasGeneratedBy <%s> . }""" % activity # print(query) qres = graph.query(query) for row in qres: bval_entity = str(row[0]) # if the file wasn't accessible locally, try with the prov:Location in the acq for location in graph.objects(subject=URIRef(bval_entity), predicate=URIRef( Constants.PROV['Location'])): # try to download the file and rename ret = GetImageFromURL(location) if ret == -1: print( "ERROR! Can't download file: %s from url: %s, trying to copy locally...." % (filename, location)) if "file" in location: location = str(location).lstrip("file:") print("Trying to copy file from %s" % (location)) try: copyfile( location, join(output_directory, sub_dir, bids_ext, basename(location))) except: print( "ERROR! Failed to find file %s on filesystem..." % location) if not args.no_downloads: try: print( "Running datalad get command on dataset: %s" % location) dl.Dataset( os.path.dirname(location)).get( recursive=True, jobs=1) except: print( "ERROR! Datalad returned error: %s for dataset %s." % (sys.exc_info()[0], location)) GetImageFromAWS( location=location, output_file=join( output_directory, sub_dir, bids_ext, basename(location)), args=args) # search for entity uuid with rdf:type nidm:b-value that was generated by activity query = """ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX prov: <http://www.w3.org/ns/prov#> PREFIX nidm: <http://purl.org/nidash/nidm#> SELECT DISTINCT ?entity WHERE { ?entity rdf:type <http://purl.org/nidash/nidm#b-vector> ; prov:wasGeneratedBy <%s> . }""" % activity # print(query) qres = graph.query(query) for row in qres: bvec_entity = str(row[0]) # if the file wasn't accessible locally, try with the prov:Location in the acq for location in graph.objects(subject=URIRef(bvec_entity), predicate=URIRef( Constants.PROV['Location'])): # try to download the file and rename ret = GetImageFromURL(location) if ret == -1: print( "ERROR! Can't download file: %s from url: %s, trying to copy locally...." % (filename, location)) if "file" in location: location = str(location).lstrip("file:") print("Trying to copy file from %s" % (location)) try: copyfile( location, join(output_directory, sub_dir, bids_ext, basename(location))) except: print( "ERROR! Failed to find file %s on filesystem..." % location) if not args.no_downloads: try: print( "Running datalad get command on dataset: %s" % location) dl.Dataset( os.path.dirname(location)).get( recursive=True, jobs=1) except: print( "ERROR! Datalad returned error: %s for dataset %s." % (sys.exc_info()[0], location)) GetImageFromAWS( location=location, output_file=join( output_directory, sub_dir, bids_ext, basename(location)), args=args)
def graph_from_dataset(self, dataset_dict, dataset_ref): g = self.g for prefix, namespace in namespaces.iteritems(): g.bind(prefix, namespace) g.add((dataset_ref, RDF.type, DCAT.Dataset)) # Basic fields items = [ ('title', DCT.title, None, Literal), ('notes', DCT.description, None, Literal), ('url', DCAT.landingPage, None, URIRef), ('identifier', DCT.identifier, ['guid', 'id'], Literal), ('version', OWL.versionInfo, ['dcat_version'], Literal), ('version_notes', ADMS.versionNotes, None, Literal), ('frequency', DCT.accrualPeriodicity, None, Literal), ('access_rights', DCT.accessRights, None, Literal), ('dcat_type', DCT.type, None, Literal), ('provenance', DCT.provenance, None, Literal), ] self._add_triples_from_dict(dataset_dict, dataset_ref, items) # Tags for tag in dataset_dict.get('tags', []): g.add((dataset_ref, DCAT.keyword, Literal(tag['name']))) # Dates items = [ ('issued', DCT.issued, ['metadata_created'], Literal), ('modified', DCT.modified, ['metadata_modified'], Literal), ] self._add_date_triples_from_dict(dataset_dict, dataset_ref, items) # Lists items = [ ('language', DCT.language, None, Literal), ('theme', DCAT.theme, None, URIRef), ('conforms_to', DCT.conformsTo, None, Literal), ('alternate_identifier', ADMS.identifier, None, Literal), ('documentation', FOAF.page, None, Literal), ('related_resource', DCT.relation, None, Literal), ('has_version', DCT.hasVersion, None, Literal), ('is_version_of', DCT.isVersionOf, None, Literal), ('source', DCT.source, None, Literal), ('sample', ADMS.sample, None, Literal), ] self._add_list_triples_from_dict(dataset_dict, dataset_ref, items) # Contact details if any([ self._get_dataset_value(dataset_dict, 'contact_uri'), self._get_dataset_value(dataset_dict, 'contact_name'), self._get_dataset_value(dataset_dict, 'contact_email'), self._get_dataset_value(dataset_dict, 'maintainer'), self._get_dataset_value(dataset_dict, 'maintainer_email'), self._get_dataset_value(dataset_dict, 'author'), self._get_dataset_value(dataset_dict, 'author_email'), ]): contact_uri = self._get_dataset_value(dataset_dict, 'contact_uri') if contact_uri: contact_details = URIRef(contact_uri) else: contact_details = BNode() g.add((contact_details, RDF.type, VCARD.Organization)) g.add((dataset_ref, DCAT.contactPoint, contact_details)) items = [ ('contact_name', VCARD.fn, ['maintainer', 'author'], Literal), ('contact_email', VCARD.hasEmail, ['maintainer_email', 'author_email'], Literal), ] self._add_triples_from_dict(dataset_dict, contact_details, items) # Publisher if any([ self._get_dataset_value(dataset_dict, 'publisher_uri'), self._get_dataset_value(dataset_dict, 'publisher_name'), dataset_dict.get('organization'), ]): publisher_uri = publisher_uri_from_dataset_dict(dataset_dict) if publisher_uri: publisher_details = URIRef(publisher_uri) else: # No organization nor publisher_uri publisher_details = BNode() g.add((publisher_details, RDF.type, FOAF.Organization)) g.add((dataset_ref, DCT.publisher, publisher_details)) publisher_name = self._get_dataset_value(dataset_dict, 'publisher_name') if not publisher_name and dataset_dict.get('organization'): publisher_name = dataset_dict['organization']['title'] g.add((publisher_details, FOAF.name, Literal(publisher_name))) # TODO: It would make sense to fallback these to organization # fields but they are not in the default schema and the # `organization` object in the dataset_dict does not include # custom fields items = [ ('publisher_email', FOAF.mbox, None, Literal), ('publisher_url', FOAF.homepage, None, URIRef), ('publisher_type', DCT.type, None, Literal), ] self._add_triples_from_dict(dataset_dict, publisher_details, items) # Temporal start = self._get_dataset_value(dataset_dict, 'temporal_start') end = self._get_dataset_value(dataset_dict, 'temporal_end') if start or end: temporal_extent = BNode() g.add((temporal_extent, RDF.type, DCT.PeriodOfTime)) if start: self._add_date_triple(temporal_extent, SCHEMA.startDate, start) if end: self._add_date_triple(temporal_extent, SCHEMA.endDate, end) g.add((dataset_ref, DCT.temporal, temporal_extent)) # Spatial spatial_uri = self._get_dataset_value(dataset_dict, 'spatial_uri') spatial_text = self._get_dataset_value(dataset_dict, 'spatial_text') spatial_geom = self._get_dataset_value(dataset_dict, 'spatial') if spatial_uri or spatial_text or spatial_geom: if spatial_uri: spatial_ref = URIRef(spatial_uri) else: spatial_ref = BNode() g.add((spatial_ref, RDF.type, DCT.Location)) g.add((dataset_ref, DCT.spatial, spatial_ref)) if spatial_text: g.add((spatial_ref, SKOS.prefLabel, Literal(spatial_text))) if spatial_geom: # GeoJSON g.add((spatial_ref, LOCN.geometry, Literal(spatial_geom, datatype=GEOJSON_IMT))) # WKT, because GeoDCAT-AP says so try: g.add((spatial_ref, LOCN.geometry, Literal(wkt.dumps(json.loads(spatial_geom), decimals=4), datatype=GSP.wktLiteral))) except (TypeError, ValueError, InvalidGeoJSONException): pass # Resources for resource_dict in dataset_dict.get('resources', []): distribution = URIRef(resource_uri(resource_dict)) g.add((dataset_ref, DCAT.distribution, distribution)) g.add((distribution, RDF.type, DCAT.Distribution)) # Simple values items = [ ('name', DCT.title, None, Literal), ('description', DCT.description, None, Literal), ('status', ADMS.status, None, Literal), ('rights', DCT.rights, None, Literal), ('license', DCT.license, None, Literal), ] self._add_triples_from_dict(resource_dict, distribution, items) # Lists items = [ ('documentation', FOAF.page, None, Literal), ('language', DCT.language, None, Literal), ('conforms_to', DCT.conformsTo, None, Literal), ] self._add_list_triples_from_dict(resource_dict, distribution, items) # Format if '/' in resource_dict.get('format', ''): g.add((distribution, DCAT.mediaType, Literal(resource_dict['format']))) else: if resource_dict.get('format'): g.add((distribution, DCT['format'], Literal(resource_dict['format']))) if resource_dict.get('mimetype'): g.add((distribution, DCAT.mediaType, Literal(resource_dict['mimetype']))) # URL url = resource_dict.get('url') download_url = resource_dict.get('download_url') if download_url: g.add((distribution, DCAT.downloadURL, URIRef(download_url))) if (url and not download_url) or (url and url != download_url): g.add((distribution, DCAT.accessURL, URIRef(url))) # Dates items = [ ('issued', DCT.issued, None, Literal), ('modified', DCT.modified, None, Literal), ] self._add_date_triples_from_dict(resource_dict, distribution, items) # Numbers if resource_dict.get('size'): try: g.add((distribution, DCAT.byteSize, Literal(float(resource_dict['size']), datatype=XSD.decimal))) except (ValueError, TypeError): g.add((distribution, DCAT.byteSize, Literal(resource_dict['size']))) # Checksum if resource_dict.get('hash'): checksum = BNode() g.add((checksum, SPDX.checksumValue, Literal(resource_dict['hash'], datatype=XSD.hexBinary))) if resource_dict.get('hash_algorithm'): if resource_dict['hash_algorithm'].startswith('http'): g.add((checksum, SPDX.algorithm, URIRef(resource_dict['hash_algorithm']))) else: g.add((checksum, SPDX.algorithm, Literal(resource_dict['hash_algorithm']))) g.add((distribution, SPDX.checksum, checksum))
def index_dump(): for i in tqdm(index.keys()): sub_graph = ConjunctiveGraph() for s, p, o in KG.triples((URIRef(i), None, None)): sub_graph.add((s, p, o)) index[i] = sub_graph
def to_skos(self, data, options=None): """ Given some data, converts that data to an rdf skos format in xml. """ # element = {} # get scheme: resource being requested. actionTypeCV, methodTypeCV, etc. scheme = Scheme.objects.get(name=options['scheme']) excluded_fields = [ u'term', u'resource_uri', u'vocabulary_id', u'vocabulary_status' ] baseURI = 'http://vocabulary.westernstateswater.org/WaDE/WaDETerms/' graph = Graph() WaDE = Namespace(baseURI) dc = Namespace('http://purl.org/dc/elements/1.1/') graph.bind('WaDE', WaDE) graph.bind('skos', SKOS) graph.bind('dc', dc) # If requesting an entire CV. if isinstance(data, dict): # print data # Add a SKOS ConceptScheme class to the graph. (graph.add( (URIRef(scheme.uri), RDF['type'], SKOS['ConceptScheme']))) (graph.add( (URIRef(scheme.uri), dc['title'], Literal(scheme.title)))) (graph.add( (URIRef(scheme.uri), dc['creator'], Literal(scheme.creator)))) (graph.add((URIRef(scheme.uri), dc['description'], Literal(scheme.description)))) # For each concept in the requested CV, create a SKOS Concept class. for concept in data[u'objects']: (graph.add((URIRef(scheme.uri + '/' + concept.obj.term), RDF['type'], SKOS['Concept']))) (graph.add((URIRef(scheme.uri + '/' + concept.obj.term), SKOS['inScheme'], URIRef(scheme.uri)))) # Add labels to each concept class. for x in concept.data: label = concept.data[x] if isinstance(label, type(None)): label = '' if isinstance(label, int): label = str(label) # Skip excluded field elements. if x in excluded_fields: continue # Skip empty elements. elif label.rstrip('\r\n') == '': continue else: alias = str( FieldRelation.objects.get( field_name=x).node.namespace) if alias == 'WaDE': (graph.add( (URIRef(scheme.uri + '/' + concept.obj.term), WaDE[FieldRelation.objects.get( field_name=x).node.name], Literal(label.rstrip('\r\n'))))) else: (graph.add( (URIRef(scheme.uri + '/' + concept.obj.term), SKOS[FieldRelation.objects.get( field_name=x).node.name], Literal(label.rstrip('\r\n'))))) # If requesting a single Concept elif isinstance(data, Bundle): # Add a SKOS ConceptScheme class to the graph. (graph.add( (URIRef(scheme.uri), RDF['type'], SKOS['ConceptScheme']))) (graph.add( (URIRef(scheme.uri), dc['title'], Literal(scheme.title)))) (graph.add( (URIRef(scheme.uri), dc['creator'], Literal(scheme.creator)))) (graph.add((URIRef(scheme.uri), dc['description'], Literal(scheme.description)))) # Add a SKOS Concept class to the graph. (graph.add((URIRef(scheme.uri + '/' + data.obj.term), RDF['type'], SKOS['Concept']))) (graph.add( (URIRef(scheme.uri + '/' + data.obj.term), SKOS['inScheme'], URIRef(scheme.uri)))) # Add labels within concept class. for field in data.data.keys(): label = data.data[field] if isinstance(label, type(None)): label = '' if isinstance(label, int): label = str(label) if field in excluded_fields: continue elif label.rstrip('\r\n') == '': continue else: relation = FieldRelation.objects.get(field_name=field) alias = relation.node.namespace.alias if alias == u'WaDE': (graph.add((URIRef(scheme.uri + '/' + data.obj.term), WaDE[FieldRelation.objects.get( field_name=field).node.name], Literal(label.rstrip('\r\n'))))) else: (graph.add((URIRef(scheme.uri + '/' + data.obj.term), SKOS[FieldRelation.objects.get( field_name=field).node.name], Literal(label.rstrip('\r\n'))))) else: pass # Returning the graph serialized into 'xml' format rather than # 'pretty-xml' so that the Concept Scheme remains on its own level, # rather than inside one of the concepts. return graph.serialize(format='xml')
def create_ontology_graph(): # Construct ISA trees from triples graph = rdflib.Graph() graph.parse(os.path.join(ontology_dir, 'inferred_vrd')) ontology_labels_nodes = {} ontology_labels_equivalent_tmp = set() ontology_labels_equivalent = set() for s, p, o in graph.triples( (None, URIRef("http://www.w3.org/2002/07/owl#equivalentProperty"), None)): # print s, " -> ", p, " -> ", o if "http://" in s and "http://" in o: subj_label = str(s.split("#")[1]) obj_label = str(o.split("#")[1]) ontology_labels_equivalent.add(subj_label) ontology_labels_equivalent.add(obj_label) if ontology_labels_nodes: new_node = True for node_label in ontology_labels_nodes.keys(): if subj_label in node_label.split(","): ontology_labels_equivalent_tmp.remove(node_label) ontology_labels_nodes[ node_label].name = ontology_labels_nodes[ node_label].name + "," + obj_label ontology_labels_equivalent_tmp.add( ontology_labels_nodes[node_label].name) ontology_labels_nodes[ ontology_labels_nodes[node_label]. name] = ontology_labels_nodes[node_label] del ontology_labels_nodes[node_label] new_node = False elif obj_label in node_label.split(","): ontology_labels_equivalent_tmp.remove(node_label) ontology_labels_nodes[ node_label].name = ontology_labels_nodes[ node_label].name + "," + subj_label ontology_labels_equivalent_tmp.add( ontology_labels_nodes[node_label].name) ontology_labels_nodes[ ontology_labels_nodes[node_label]. name] = ontology_labels_nodes[node_label] del ontology_labels_nodes[node_label] new_node = False if new_node: ontology_labels_nodes[subj_label + "," + obj_label] = Node(subj_label + "," + obj_label) ontology_labels_equivalent_tmp.add(subj_label + "," + obj_label) else: ontology_labels_nodes[subj_label + "," + obj_label] = Node(subj_label + "," + obj_label) ontology_labels_equivalent_tmp.add(subj_label + "," + obj_label) for s, p, o in graph.triples( (None, URIRef("http://www.w3.org/2000/01/rdf-schema#subPropertyOf"), None)): #print s, " -> ", p, " -> ", o if "http://" in s and "http://" in o: subj_label = str(s.split("#")[1]) obj_label = str(o.split("#")[1]) subj_node_name = "" obj_node_name = "" for node_label in ontology_labels_equivalent_tmp: if subj_label in node_label.split(","): subj_node_name = node_label continue if obj_label in node_label.split(","): obj_node_name = node_label continue if subj_node_name and obj_node_name: ontology_labels_nodes[ subj_node_name].parent = ontology_labels_nodes[ obj_node_name] if subj_label not in ontology_labels_equivalent and obj_label not in ontology_labels_equivalent: if subj_label not in ontology_labels_nodes: ontology_labels_nodes[subj_label] = Node(subj_label) if obj_label not in ontology_labels_nodes: ontology_labels_nodes[obj_label] = Node(obj_label) ontology_labels_nodes[ subj_label].parent = ontology_labels_nodes[obj_label] if subj_label in ontology_labels_equivalent and obj_label not in ontology_labels_equivalent: if obj_label not in ontology_labels_nodes: ontology_labels_nodes[obj_label] = Node(obj_label) # retrieve subj node for node_label in ontology_labels_nodes.keys(): if subj_label in node_label.split(","): ontology_labels_nodes[ node_label].parent = ontology_labels_nodes[ obj_label] if subj_label not in ontology_labels_equivalent and obj_label in ontology_labels_equivalent: if subj_label not in ontology_labels_nodes: ontology_labels_nodes[subj_label] = Node(subj_label) # retrieve obj node for node_label in ontology_labels_nodes.keys(): if obj_label in node_label.split(","): ontology_labels_nodes[ subj_label].parent = ontology_labels_nodes[ node_label] tree_list = [] for node_label in ontology_labels_nodes: if ontology_labels_nodes[node_label].is_root: tree_list.append(ontology_labels_nodes[node_label]) return tree_list, ontology_labels_equivalent_tmp
def create_vocab_statusfile(userid, vocabprefix, vocabfile, baseuri, update=False, using_uuid=False, refvocab=False): vocab_uri = URIRef("http://vocab.ox.ac.uk/%s"%vocabprefix) vocabdir = os.path.join(ag.vocabulariesdir, str(vocabprefix)) vocabstatusfile = os.path.join(vocabdir, "status.rdf") vocab_file_name = os.path.basename(vocabfile) vocabfile_uri = URIRef("http://vocab.ox.ac.uk/%s/%s"%(vocabprefix, vocab_file_name)) #Add vocab in mediator file graph = Graph() mediatorfile = os.path.join(ag.mediatorsdir, '%s.rdf'%userid) graph.parse(mediatorfile) user_uri = [] for uri in graph.subjects(namespaces['foaf']['account'], Literal(userid)): if not uri in user_uri: user_uri.append(uri) user_uri = URIRef(user_uri[0]) graph.add((vocab_uri, namespaces['dcterms']['mediator'], URIRef(user_uri))) rdf_str = None rdf_str = graph.serialize() f = codecs.open(mediatorfile, 'w', 'utf-8') f.write(rdf_str) f.close() #Add vocab in vocab status file graph = Graph() if update and os.path.isfile(vocabstatusfile): graph.parse(vocabstatusfile) for prefix, url in namespaces.iteritems(): graph.bind(prefix, URIRef(url)) graph.add((vocab_uri, namespaces['dcterms']['mediator'], URIRef(user_uri))) graph.add((user_uri, namespaces['foaf']['account'], Literal(userid))) graph.add((vocab_uri, namespaces['dcterms']['hasFormat'], URIRef(vocabfile_uri))) graph.add((vocab_uri, namespaces['vann']['preferredNamespaceUri'], URIRef(baseuri))) graph.add((vocab_uri, namespaces['vann']['preferredNamespacePrefix'], Literal(vocabprefix))) graph.add((vocab_uri, namespaces['skos']['editorialNote'], Literal(vocab_editorial_descriptions[0]))) if refvocab: add_ref_vocab(vocabprefix, refvocab) graph.add((vocab_uri, namespaces['dcterms']['isVersionOf'], URIRef(refvocab))) # get mimetype of file if os.path.isfile(vocabfile): graph.add((vocabfile_uri, namespaces['nfo']['fileUrl'], Literal('file://%s'%vocabfile))) graph.add((vocabfile_uri, namespaces['nfo']['fileName'], Literal(vocab_file_name))) mt = None if check_rdf(vocabfile): mt = 'application/rdf+xml' graph.add((vocabfile_uri, namespaces['dcterms']['conformsTo'], Literal(mt))) graph.add((vocabfile_uri, namespaces['skos']['editorialNote'], Literal(vocab_editorial_descriptions[3]))) elif check_n3(vocabfile): mt = 'text/rdf+nt' root, ext = os.path.splitext(vocabfile) if ext == '.rdf': rdffile = "%s_2.rdf"%root else: rdffile = "%s.rdf"%root converttordf = convert_n3_rdf(vocabfile, rdffile) if converttordf and os.path.isfile(rdffile): rdf_file_name = os.path.basename(rdffile) rdffile_uri = URIRef("http://vocab.ox.ac.uk/%s/%s"%(vocabprefix, rdf_file_name)) graph.add((vocab_uri, namespaces['dcterms']['hasFormat'], URIRef(rdffile_uri))) graph.add((rdffile_uri, namespaces['nfo']['fileUrl'], Literal('file://%s'%rdffile))) graph.add((rdffile_uri, namespaces['nfo']['fileName'], Literal(rdf_file_name))) graph.add((rdffile_uri, namespaces['dcterms']['conformsTo'], Literal('application/rdf+xml'))) graph.add((rdffile_uri, namespaces['skos']['editorialNote'], Literal(vocab_editorial_descriptions[3]))) graph.add((rdffile_uri, namespaces['dcterms']['format'], Literal('application/rdf+xml'))) else: mt1 = mimetypes.guess_type(vocabfile) mt2 = get_file_mimetype(vocabfile) if mt1[0]: mt = mt1[0] else: mt = mt2 if str(mt) == 'application/rdf+xml': graph.add((vocabfile_uri, namespaces['skos']['editorialNote'], Literal(vocab_editorial_descriptions[2]))) else: graph.add((vocab_uri, namespaces['skos']['editorialNote'], Literal(vocab_editorial_descriptions[1]))) if mt: graph.add((vocabfile_uri, namespaces['dcterms']['format'], Literal(mt))) rdf_str = None rdf_str = graph.serialize() f = codecs.open(vocabstatusfile, 'w', 'utf-8') f.write(rdf_str) f.close() return True
import sys from rdflib import Graph, URIRef rdffile = sys.argv[1] datafile = sys.argv[2] tblfile = datafile.rstrip("csv").rstrip(".") + "out" + ".csv" g = Graph() g.load(rdffile) for ns in g.namespaces(): if ns[0] == "ex": pfx = ns[1] syn = URIRef(pfx + "synonym") isa = URIRef(pfx + "is_a") with open(datafile, 'r') as f: reader = csv.reader(f) data = list(reader) for item in data: pi = pfx + item[0] ur = URIRef(pi) alt = g.subjects(None, ur) for sn in alt: sub = g.subjects(syn, sn) for s in sub: item.append(s.lstrip(pfx))
from rdflib import Graph, Literal, Namespace, RDF, RDFS, BNode, URIRef from rdflib.namespace import FOAF, RDFS, OWL, XSD import csv import spotlight import time import re graph = Graph() graph.parse("rdfSchema.ttl", format="n3") UNIV = Namespace("http://example.org/schema#") g = Graph() u = Namespace("http://example.org/university/") g.add((u.Concordia, RDF.type, UNIV.University)) g.add((u.Concordia, UNIV.hasName, Literal("Concordia University"))) g.add((u.Concordia, UNIV.hasDBPediaLink, URIRef("http://dbpedia.org/page/Concordia_University"))) with open('data.csv') as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') line_count = 0 for row in csv_reader: if row and len(row[0]) > 0: try: subject = " ".join(row[0].split()[0].split()) no = " ".join(row[0].split()[1].split()) # no= re.sub("\s\s+", " ", row[0].split()[1]) name = " ".join(row[1].split()) description = " ".join(row[2].split()) except: no = subject[4:] subject = subject[0:4] x = URIRef(f"http://example.org/course/{subject}+{no}")
"disambiguated-organization", {}) or {}).get("disambiguated-organization-identifier")) log.debug(u'Affiliation: {}, ID: {}'.format( organization, ringgold)) title = affiliation["role-title"] or {} log.debug(u'Position title: {}'.format(title)) start_year = (affiliation["start-date"] or {}).get("year", {}).get("value") end_year = (affiliation["end-date"] or {}).get("year", {}).get("value") log.debug(u'Date range: {} - {}'.format(start_year, end_year)) if affiliation["type"] == "EDUCATION": relatedBy = g_profile.objects(URIRef(uri), OBO.RO_0000056) else: relatedBy = g_profile.objects(URIRef(uri), VIVO.relatedBy) position_exists = False for obj in relatedBy: log.debug(u'{} vs. {}'.format(put_code, obj)) if put_code in obj: log.debug(u'The position is already in VIVO, skipping.') position_exists = True break if not position_exists: # Look up URI based on Ringgold ID org_uri = (g_orgs.value(predicate=VLOCAL.ringgoldID, object=Literal(ringgold)))
def addGameTriples(graph, ontURI): with open('../Data/games.csv', 'r') as csvfile: csv_reader = csv.reader(csvfile, delimiter=',') flag = True for row in csv_reader: if flag: flag = False continue # Game title = formatURI(str(row[0])) graph.add((URIRef(ontURI + title), RDF.type, URIRef(ontURI + "#Game"))) # Title graph.add((URIRef(ontURI + title), URIRef(ontURI + "#game_name"), Literal(str(row[0])))) # Platform if row[1]: graph.add((URIRef(ontURI + title), URIRef(ontURI + "#platform"), Literal(str(row[1])))) # Year if row[3]: graph.add((URIRef(ontURI + title), URIRef(ontURI + "#game_year"), Literal(row[3]))) # Genre if row[4]: graph.add((URIRef(ontURI + title), URIRef(ontURI + "#game_genre"), Literal(str(row[4])))) # ESRB if row[6]: graph.add((URIRef(ontURI + title), URIRef(ontURI + "#esrb"), Literal(str(row[6])))) # Sales if row[7]: graph.add((URIRef(ontURI + title), URIRef(ontURI + "#na_sales"), Literal(float(row[7])))) if row[8]: graph.add((URIRef(ontURI + title), URIRef(ontURI + "#eu_sales"), Literal(float(row[8])))) if row[9]: graph.add((URIRef(ontURI + title), URIRef(ontURI + "#jp_sales"), Literal(float(row[9])))) if row[10]: graph.add((URIRef(ontURI + title), URIRef(ontURI + "#glob_sales"), Literal(float(row[10])))) # Rating if row[11]: graph.add((URIRef(ontURI + title), URIRef(ontURI + "#critic_score"), Literal(float(row[11]) % 10))) if row[12]: graph.add((URIRef(ontURI + title), URIRef(ontURI + "#user_score"), Literal(float(row[12])))) # Publisher if row[2]: pub = formatURI(str(row[2])) graph.add((URIRef(ontURI + pub), RDF.type, URIRef(ontURI + "#GamePublisher"))) graph.add((URIRef(ontURI + pub), URIRef(ontURI + "#org_name"), Literal(str(row[2])))) graph.add((URIRef(ontURI + title), URIRef(ontURI + "#publishedBy"), URIRef(ontURI + pub))) # Developer if row[5]: for elem in row[5].split(", "): dev = formatURI(elem) graph.add((URIRef(ontURI + dev), RDF.type, URIRef(ontURI + "#GameDeveloper"))) graph.add((URIRef(ontURI + dev), URIRef(ontURI + "#org_name"), Literal(elem))) graph.add((URIRef(ontURI + title), URIRef(ontURI + "#developedBy"), URIRef(ontURI + dev)))
def test_distribution_dct_format_other_uri(self): resources = self._build_and_parse_format_mediatype_graph( format_item=URIRef("https://example.com/my/format")) eq_(u'https://example.com/my/format', resources[0].get('format')) eq_(None, resources[0].get('mimetype'))
#! /usr/bin/python # makeTriples.py: demonstrate the creation of an RDFLib TripleStore #****************************************************************** # Ejercicio para probar metodos de la clase Graph ****** # -objects, predicates, subjects ****** #****************************************************************** from rdflib import Namespace, BNode, Literal, URIRef from rdflib import Graph, ConjunctiveGraph from rdflib.store.IOMemory import IOMemory ns = Namespace("http://love.com#") mary = URIRef("http://love.com/lovers/mary#") john = URIRef("http://love.com/lovers/john#") cmary = URIRef("http://love.com/lovers/mary#") cjohn = URIRef("http://love.com/lovers/john#") store = IOMemory() g = ConjunctiveGraph(store=store) g.bind("love", ns) gmary = Graph(store=store, identifier=cmary) gmary.add((mary, ns['hasName'], Literal("Mary"))) gmary.add((mary, ns['loves'], john)) gjohn = Graph(store=store, identifier=cjohn) gjohn.add((john, ns['hasName'], Literal("John")))
# -*- coding: utf-8 -*- # Copyright (C) 2019 David Arroyo Menéndez # Author: David Arroyo Menéndez <*****@*****.**> # Maintainer: David Arroyo Menéndez <*****@*****.**> # This file is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3, or (at your option) # any later version. # This file is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with urirefs; see the file LICENSE. If not, write to # the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, # Boston, MA 02110-1301 USA, # URIRef is the class to manage uris in rdflib from rdflib import URIRef aref = URIRef('') print(aref) aref = URIRef('http://example.com') print(aref) print(aref.n3())
from rdflib.namespace import RDF, RDFS, FOAF, OWL, XSD, DC, DCTERMS import json from linking import link import os path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) data = json.load(open(path + "/DatiPerElaborazione/Bar.geojson", "r")) g = Graph() cmo = Namespace("http://www.comune.milano.it/ontology/") schema = Namespace("https://schema.org/") g.bind("cmo", cmo) g.bind("schema", schema) for element in data: uri = element["URI"] g.add([URIRef(uri), RDF.type, cmo.Bar]) g.add([URIRef(uri), RDFS.label, Literal(element["nome"])]) g.add([URIRef(uri), cmo.localBusinessWebsite, Literal(element["website"])]) g.add([URIRef(uri), cmo.localBusinessPostalCode, Literal(element["cap"])]) g.add([URIRef(uri), schema.address, Literal(element["indirizzo"])]) g.add([ URIRef(uri), cmo.latitude, Literal(element["lat"], datatype=XSD.float) ]) g.add([ URIRef(uri), cmo.longitude, Literal(element["long"], datatype=XSD.float) ]) g.serialize(destination=path + '/Turtles/bar.ttl', format='turtle')