def convert(self, namespace): '''Converts the JSON and XML attributes into a RDFLib graph. Parameters @namespace: A RDFLib Namespace. Returns @graph: The RDFLib Graph of the provenance.''' if (self.defaults['id'] == None) or (self.defaults['id'] == ""): return None converter = IatiElements.ProvenanceElements(self.defaults, namespace) for entry in self.json: try: funcname = entry.replace("-","_").replace("id","func_id").replace("version", "func_version") update = getattr(converter, funcname) update(self.json[entry]) except AttributeError as e: print "Error in " + funcname + ", " + self.defaults['id'] + ": " + str(e) # Add prov model start_time = if self.defaults['type'] == 'activity': script = "conversion%20scripts/" elif self.defaults['type'] == 'organisation': script = "conversion%20scripts/" else: script = None provenance = converter.get_result() provenance = AddProvenance.addProv(namespace, provenance, self.defaults['type'], self.defaults['document_name'], start_time, self.json['download_url'], self.ids, script) return provenance
def main(): '''Retrieves all locations from a local file and matches the labels, country labels and coordinates (if available) of a location to Geonames.''' # Settings turtle_folder = "/media/Acer/School/IATI-data/mappings/Geonames/" locations_file = "/media/Acer/School/IATI-data/mappings/" Iati = Namespace("") start_time = found = 0 not_found = 0 # Read location file print "Retrieving locations from file..." all_locations = retrieve_locations(locations_file) # Classify locations print "Classifying locations..." all_locations = classify_locations(all_locations) # Initialize graph locations_graph = Graph() locations_graph.bind('iati', "") locations_graph.bind('gn', "") locations_graph.bind('owl', "") # Retrieve all general country information country_info = connect( "") country_info = ET.fromstring(country_info) if country_info == None: print "Could not retrieve country information, exiting..." sys.exit(0) # Retrieve location match from Geonames for location in all_locations: if not location['classification'] == 0: print "Looking for " + location['link'] + "..." match = find_location(location, country_info) if (not match == None) and (not match == 0): locations_graph.add( (URIRef(location['link']), OWL.sameAs, URIRef(match))) found += 1 elif match == 0: # Did not find any results not_found += 1 elif match == None: # No more credits print "Credits are gone.." locations_turtle = locations_graph.serialize(format='turtle') with open(turtle_folder + 'geonames-locations.ttl', 'w') as turtle_file: turtle_file.write(locations_turtle) sys.exit(0) # Write to file print "Done, writing " + str(found) + " mappings to file..." locations_turtle = locations_graph.serialize(format='turtle') with open(turtle_folder + 'geonames-locations.ttl', 'w') as turtle_file: turtle_file.write(locations_turtle) print "Did not find " + str(not_found) + " mappings..." # Add provenance provenance = Graph() provenance = AddProvenance.addProv( Iati, provenance, 'Geonames', start_time, "", ['Geonames'], "mapping%20scripts/") provenance_turtle = provenance.serialize(format='turtle') with open(turtle_folder + 'provenance-geonames-locations.ttl', 'w') as turtle_file: turtle_file.write(provenance_turtle) print print "Done, added provenance."
countries_turtle_ecb = countries_ecb.serialize(format='turtle') with open(turtle_folder_ecb + 'ecb-countries.ttl', 'w') as turtle_file_ecb: turtle_file_ecb.write(countries_turtle_ecb) countries_turtle_fao = countries_fao.serialize(format='turtle') with open(turtle_folder_fao + 'fao-countries.ttl', 'w') as turtle_file_fao: turtle_file_fao.write(countries_turtle_fao) # Add provenance OECD provenance_oecd = Graph() provenance_oecd = AddProvenance.addProv(Iati, provenance_oecd, 'OECD', start_time, "", ['OECD'], "mapping%20scripts/") provenance_turtle_oecd = provenance_oecd.serialize(format='turtle') with open(turtle_folder_oecd + 'provenance-oecd.ttl', 'w') as turtle_file_oecd: turtle_file_oecd.write(provenance_turtle_oecd) # Add provenance BFS provenance_bfs = Graph() provenance_bfs = AddProvenance.addProv(Iati, provenance_bfs, 'BFS', start_time, "", ['BFS'], "mapping%20scripts/") provenance_turtle_bfs = provenance_bfs.serialize(format='turtle')
.replace(">", "") ) else: factbook_item = line_list[2].replace("cia:", "") factbook_url = "" + factbook_item sources.append(factbook_url) turtle_data = urllib2.urlopen(factbook_url).read() print "Retrieved data from " + factbook_url + ", writing to file..." with open(factbook_folder + factbook_item + ".ttl", "w") as turtle_f: turtle_f.write(turtle_data) # Add provenance print "Adding provenance..." provenance = Graph() provenance = AddProvenance.addProv( Iati, provenance, "Factbook", start_time, sources, ["Factbook"], "gather%20data%20scripts/" ) provenance_turtle = provenance.serialize(format="turtle") with open(factbook_folder + "provenance-factbook.ttl", "w") as turtle_file_prov: turtle_file_prov.write(provenance_turtle) print "Done!"
def main(): '''Converts Codelist XMLs to Turtle files and stores these to local folder.''' # Settings xml_folder = "/home/iati/xml/codelists/" turtle_folder = "/home/iati/codelist/" Iati = Namespace("") if not os.path.isdir(turtle_folder): os.makedirs(turtle_folder) document_count = 1 total_elapsed_time = 0 # Retrieve XML files from the XML folder for document in glob.glob(xml_folder + '*.xml'): doc_id = str(document.rsplit('/',1)[1])[:-4] doc_folder = turtle_folder + doc_id + '/' if not os.path.isdir(doc_folder): os.makedirs(doc_folder) provenance = Graph() provenance.bind('iati', Iati) xml = ET.parse(document) root = xml.getroot() version = AttributeHelper.attribute_key(root, 'version') try: # Convert each codelist in XML file to RDFLib Graph converter = IatiConverter.ConvertCodelist(root) graph, id, last_updated = converter.convert(Iati) except TypeError as e: print "Error in " + document + ":" + str(e) graph = None if not graph == None: # Write codelist to Turtle and store in local folder graph_turtle = graph.serialize(format='turtle') with open(doc_folder + id.replace('/','%2F') + '.ttl', 'w') as turtle_file: turtle_file.write(graph_turtle) # The following outputs enable the Virutuoso Bulk loader process to put files into the right graphs. with open(doc_folder + id.replace('/','%2F') + '.ttl.graph','w') as graph_file: graph_file.write(str(Iati) + 'graph/codelist/' + str(id)) # Add provenance of last-updated, version and source document provenance.add((URIRef(Iati + 'graph/codelist/' + str(id)), URIRef(Iati + 'last-updated'), Literal(last_updated))) provenance.add((URIRef(Iati + 'graph/codelist/' + str(id)), URIRef(Iati + 'version'), Literal(version))) provenance.add((URIRef(Iati + 'graph/codelist/' + str(id)), URIRef(Iati + 'source-document-id'), Literal(str(id)))) provenance.add((URIRef(Iati + 'graph/codelist/' + str(id)), URIRef(Iati + 'source-document-download-url'), URIRef('' + str(id) + '.xml'))) print "Progress: Document #" + str(document_count) document_count += 1 # Add prov model start_time = source_xml = str('' + str(id) + '.xml') entities = [] entities.append(str(id)) script = "conversion%20scripts/" provenance = AddProvenance.addProv(Iati, provenance, 'codelist', doc_id, start_time, source_xml, entities, script) # Write provenance graph to Turtle and store in local folder provenance_turtle = provenance.serialize(format='turtle') with open(doc_folder + 'provenance-' + str(id) + '.ttl', 'w') as turtle_file: turtle_file.write(provenance_turtle) # The following outputs enable the Virutuoso Bulk loader process to put files into the right graphs. with open(doc_folder + 'provenance-' + str(id) + '.ttl.graph','w') as graph_file: graph_file.write(str(Iati) + 'graph/provenance/') print "Done!"
def main(): '''Retrieves all locations from a local file and matches the labels, country labels and coordinates (if available) of a location to Geonames.''' # Settings turtle_folder = "/home/iati/mappings/Geonames/" locations_file = "/home/iati/mappings/" Iati = Namespace("") start_time = found = 0 not_found = 0 # Read location file print "Retrieving locations from file..." all_locations = retrieve_locations(locations_file) # Classify locations print "Classifying locations..." all_locations = classify_locations(all_locations) # Initialize graph locations_graph = Graph() locations_graph.bind('iati', "") locations_graph.bind('gn', "") locations_graph.bind('owl', "") # Retrieve all general country information country_info = connect("") country_info = ET.fromstring(country_info) if country_info == None: print "Could not retrieve country information, exiting..." sys.exit(0) # Retrieve location match from Geonames for location in all_locations: if not location['classification'] == 0: print "Looking for " + location['link'] + "..." match = find_location(location, country_info) if (not match == None) and (not match == 0): locations_graph.add((URIRef(location['link']), OWL.sameAs, URIRef(match))) found += 1 elif match == 0: # Did not find any results not_found += 1 elif match == None: # No more credits print "Credits are gone.." locations_turtle = locations_graph.serialize(format='turtle') with open(turtle_folder + 'geonames-locations.ttl', 'w') as turtle_file: turtle_file.write(locations_turtle) sys.exit(0) # Write to file print "Done, writing " + str(found) + " mappings to file..." locations_turtle = locations_graph.serialize(format='turtle') with open(turtle_folder + 'geonames-locations.ttl', 'w') as turtle_file: turtle_file.write(locations_turtle) print "Did not find " + str(not_found) + " mappings..." # Add provenance provenance = Graph() provenance = AddProvenance.addProv(Iati, provenance, 'Geonames', start_time, "", ['Geonames'], "mapping%20scripts/") provenance_turtle = provenance.serialize(format='turtle') with open(turtle_folder + 'provenance-geonames-locations.ttl', 'w') as turtle_file: turtle_file.write(provenance_turtle) print print "Done, added provenance."
print "Adding to file..." print turtle = data.serialize(format='turtle') with open(worldbank_folder + 'worldbank-' + indicator + '.ttl', 'w') as turtle_file: turtle_file.write(turtle) print "Adding indicator data to file..." print turtle = indicator_data.serialize(format='turtle') with open(worldbank_folder + 'worldbank-indicators.ttl', 'w') as turtle_file: turtle_file.write(turtle) # Add provenance provenance = Graph() provenance = AddProvenance.addProv(Iati, provenance, 'WorldBank', start_time, sources, ['WorldBank'], "gather%20data%20scripts/") provenance_turtle = provenance.serialize(format='turtle') with open(worldbank_folder + 'provenance-worldbank.ttl', 'w') as turtle_file_prov: turtle_file_prov.write(provenance_turtle) print "Done! " + str(errors) + " errors."
total_count) + " of " + str(total_from_file) + ")..." with open( geonames_folder + str(geonames_item_id) + "-about.rdf", 'w') as turtle_f: turtle_f.write(rdf_about_data) print "Retrieved data from " + geonames_contains_url + ", writing to file (" + str( total_count) + " of " + str(total_from_file) + ")..." with open( geonames_folder + str(geonames_item_id) + "-contains.rdf", 'w') as turtle_f: turtle_f.write(rdf_contains_data) # Add provenance print "Adding provenance..." provenance = Graph() provenance = AddProvenance.addProv(Iati, provenance, 'Geonames', start_time, source_rdfs, ['Geonames'], "gather%20data%20scripts/") provenance_turtle = provenance.serialize(format='turtle') with open(geonames_folder + 'provenance-geonames.ttl', 'w') as turtle_file_prov: turtle_file_prov.write(provenance_turtle) print "Done!"
not_found.append((name, code)) not_found_countries += 1 # Adding mappings to file print print "Adding to file..." countries_turtle = countries.serialize(format='turtle') with open(turtle_folder + 'transparency-countries.ttl', 'w') as turtle_file: turtle_file.write(countries_turtle) # Add provenance provenance = Graph() provenance = AddProvenance.addProv( Iati, provenance, 'Transparency', start_time, "", ['Transparency'], "mapping%20scripts/") provenance_turtle = provenance.serialize(format='turtle') with open(turtle_folder + 'provenance-transparency.ttl', 'w') as turtle_file: turtle_file.write(provenance_turtle) print print "Added provenance..." print print "Total: " + str(total_countries) print "Done, found: " + str(found_countries) + ", not found: " + str( not_found_countries) + "."
factbook_url = "" + factbook_item sources.append(factbook_url) turtle_data = urllib2.urlopen(factbook_url).read() print "Retrieved data from " + factbook_url + ", writing to file..." with open(factbook_folder + factbook_item + ".ttl", 'w') as turtle_f: turtle_f.write(turtle_data) # Add provenance print "Adding provenance..." provenance = Graph() provenance = AddProvenance.addProv(Iati, provenance, 'Factbook', start_time, sources, ['Factbook'], "gather%20data%20scripts/") provenance_turtle = provenance.serialize(format='turtle') with open(factbook_folder + 'provenance-factbook.ttl', 'w') as turtle_file_prov: turtle_file_prov.write(provenance_turtle) print "Done!"
else: not_found_again.append((not_found_country[0], not_found_country[1])) print "Adding to file..." countries_turtle = countries.serialize(format='turtle') with open(turtle_folder + 'geonames-countries.ttl', 'w') as turtle_file: turtle_file.write(countries_turtle) # Add provenance provenance = Graph() provenance = AddProvenance.addProv(Iati, provenance, 'Geonames', start_time, "", ['Geonames'], "mapping%20scripts/") provenance_turtle = provenance.serialize(format='turtle') with open(turtle_folder + 'provenance-geonames-countries.ttl', 'w') as turtle_file: turtle_file.write(provenance_turtle) print print "Total countries: " + str(total) print "Done, found: " + str(found_count) + ", not found: " + str(not_found_count) + "." print print "Could not find:"
with open(turtle_folder + 'schema-foaf.ttl', 'w') as turtle_file: turtle_file.write(graph_turtle_foaf) with open(turtle_folder + 'schema-org.ttl', 'w') as turtle_file: turtle_file.write(graph_turtle_org) with open(turtle_folder + 'schema-geo.ttl', 'w') as turtle_file: turtle_file.write(graph_turtle_geo) with open(turtle_folder + 'schema-cc.ttl', 'w') as turtle_file: turtle_file.write(graph_turtle_cc) print "Adding provenance..." # Add provenance provenance = Graph() provenance = AddProvenance.addProv(Iati, provenance, 'Schema', start_time, "", ['Schema'], "mapping%20scripts/") provenance_turtle = provenance.serialize(format='turtle') with open(turtle_folder + 'provenance-schema.ttl', 'w') as turtle_file: turtle_file.write(provenance_turtle) print "Done!"
countries_turtle = countries.serialize(format='turtle') with open(turtle_folder + 'factbook-countries.ttl', 'w') as turtle_file: turtle_file.write(countries_turtle) countries_turtle_db = countries_db.serialize(format='turtle') with open(turtle_folder_db + 'dbpedia-countries-via-factbook.ttl', 'w') as turtle_file_db: turtle_file_db.write(countries_turtle_db) # Add provenance provenance = Graph() provenance = AddProvenance.addProv(Iati, provenance, 'Factbook', start_time, "", ['Factbook'], "mapping%20scripts/") provenance_turtle = provenance.serialize(format='turtle') with open(turtle_folder + 'provenance-factbook-countries.ttl', 'w') as turtle_file: turtle_file.write(provenance_turtle) # Add provenance provenance_db = Graph() provenance_db = AddProvenance.addProv( Iati, provenance_db, 'DBPedia', start_time, "", ['DBPedia'], "mapping%20scripts/")
with open(worldbank_folder + 'worldbank-'+ indicator +'.ttl', 'w') as turtle_file: turtle_file.write(turtle) print "Adding indicator data to file..." print turtle = indicator_data.serialize(format='turtle') with open(worldbank_folder + 'worldbank-indicators.ttl', 'w') as turtle_file: turtle_file.write(turtle) # Add provenance provenance = Graph() provenance = AddProvenance.addProv(Iati, provenance, 'WorldBank', start_time, sources, ['WorldBank'], "gather%20data%20scripts/") provenance_turtle = provenance.serialize(format='turtle') with open(worldbank_folder + 'provenance-worldbank.ttl', 'w') as turtle_file_prov: turtle_file_prov.write(provenance_turtle) print "Done! " + str(errors) + " errors."
print "Adding to file..." countries_turtle = countries.serialize(format='turtle') with open(turtle_folder + 'worldbank-countries.ttl', 'w') as turtle_file: turtle_file.write(countries_turtle) countries_turtle_euro = countries_euro.serialize(format='turtle') with open(turtle_folder_euro + 'eurostat-countries.ttl', 'w') as turtle_file: turtle_file.write(countries_turtle) # Add provenance provenance = Graph() provenance = AddProvenance.addProv(Iati, provenance, 'WorldBank', start_time, "", ['WorldBank'], "mapping%20scripts/") provenance_turtle = provenance.serialize(format='turtle') with open(turtle_folder + 'provenance-worldbank.ttl', 'w') as turtle_file: turtle_file.write(provenance_turtle) # Add provenance provenance_euro = Graph() provenance_euro = AddProvenance.addProv( Iati, provenance_euro, 'EuroStat', start_time, "", ['EuroStat'], "mapping%20scripts/") provenance_turtle_euro = provenance_euro.serialize(format='turtle')
source_ttls.append(dbpedia_url) turtle_response = urllib2.urlopen(dbpedia_url) turtle_data = print "Retrieved data from " + dbpedia_url + ", writing to file..." with open(dbpedia_folder + dbpedia_item + ".ttl", 'w') as turtle_f: turtle_f.write(turtle_data) # Add provenance print "Adding provenance..." provenance = Graph() provenance = AddProvenance.addProv(Iati, provenance, 'DBPedia', start_time, source_ttls, ['DBPedia'], "gather%20data%20scripts/") provenance_turtle = provenance.serialize(format='turtle') with open(dbpedia_folder + 'provenance-dbpedia.ttl', 'w') as turtle_file_prov: turtle_file_prov.write(provenance_turtle) print "Done!"
with open(turtle_folder + 'factbook-countries.ttl', 'w') as turtle_file: turtle_file.write(countries_turtle) countries_turtle_db = countries_db.serialize(format='turtle') with open(turtle_folder_db + 'dbpedia-countries-via-factbook.ttl', 'w') as turtle_file_db: turtle_file_db.write(countries_turtle_db) # Add provenance provenance = Graph() provenance = AddProvenance.addProv(Iati, provenance, 'Factbook', start_time, "", ['Factbook'], "mapping%20scripts/") provenance_turtle = provenance.serialize(format='turtle') with open(turtle_folder + 'provenance-factbook-countries.ttl', 'w') as turtle_file: turtle_file.write(provenance_turtle) # Add provenance provenance_db = Graph() provenance_db = AddProvenance.addProv(Iati, provenance_db, 'DBPedia',
# Adding mappings to file print print "Adding to file..." countries_turtle = countries.serialize(format='turtle') with open(turtle_folder + 'transparency-countries.ttl', 'w') as turtle_file: turtle_file.write(countries_turtle) # Add provenance provenance = Graph() provenance = AddProvenance.addProv(Iati, provenance, 'Transparency', start_time, "", ['Transparency'], "mapping%20scripts/") provenance_turtle = provenance.serialize(format='turtle') with open(turtle_folder + 'provenance-transparency.ttl', 'w') as turtle_file: turtle_file.write(provenance_turtle) print print "Added provenance..." print print "Total: " + str(total_countries) print "Done, found: " + str(found_countries) + ", not found: " + str(not_found_countries) + "."
with open(turtle_folder + 'worldbank-countries.ttl', 'w') as turtle_file: turtle_file.write(countries_turtle) countries_turtle_euro = countries_euro.serialize(format='turtle') with open(turtle_folder_euro + 'eurostat-countries.ttl', 'w') as turtle_file: turtle_file.write(countries_turtle) # Add provenance provenance = Graph() provenance = AddProvenance.addProv(Iati, provenance, 'WorldBank', start_time, "", ['WorldBank'], "mapping%20scripts/") provenance_turtle = provenance.serialize(format='turtle') with open(turtle_folder + 'provenance-worldbank.ttl', 'w') as turtle_file: turtle_file.write(provenance_turtle) # Add provenance provenance_euro = Graph() provenance_euro = AddProvenance.addProv(Iati, provenance_euro, 'EuroStat',
else: dbpedia_item = line_list[2].replace("dbpedia:", "") dbpedia_url = "" + dbpedia_item + ".ttl" source_ttls.append(dbpedia_url) turtle_response = urllib2.urlopen(dbpedia_url) turtle_data = print "Retrieved data from " + dbpedia_url + ", writing to file..." with open(dbpedia_folder + dbpedia_item + ".ttl", 'w') as turtle_f: turtle_f.write(turtle_data) # Add provenance print "Adding provenance..." provenance = Graph() provenance = AddProvenance.addProv(Iati, provenance, 'DBPedia', start_time, source_ttls, ['DBPedia'], "gather%20data%20scripts/") provenance_turtle = provenance.serialize(format='turtle') with open(dbpedia_folder + 'provenance-dbpedia.ttl', 'w') as turtle_file_prov: turtle_file_prov.write(provenance_turtle) print "Done!"
with open(turtle_folder + 'schema-dct.ttl', 'w') as turtle_file: turtle_file.write(graph_turtle_dct) with open(turtle_folder + 'schema-foaf.ttl', 'w') as turtle_file: turtle_file.write(graph_turtle_foaf) with open(turtle_folder + 'schema-org.ttl', 'w') as turtle_file: turtle_file.write(graph_turtle_org) with open(turtle_folder + 'schema-geo.ttl', 'w') as turtle_file: turtle_file.write(graph_turtle_geo) with open(turtle_folder + 'schema-cc.ttl', 'w') as turtle_file: turtle_file.write(graph_turtle_cc) print "Adding provenance..." # Add provenance provenance = Graph() provenance = AddProvenance.addProv(Iati, provenance, 'Schema', start_time, "", ['Schema'], "mapping%20scripts/") provenance_turtle = provenance.serialize(format='turtle') with open(turtle_folder + 'provenance-schema.ttl', 'w') as turtle_file: turtle_file.write(provenance_turtle) print "Done!"
rdf_contains_data = urllib2.urlopen(geonames_contains_url).read() print "Retrieved data from " + geonames_about_url + ", writing to file (" + str(total_count) + " of " + str(total_from_file) + ")..." with open(geonames_folder + str(geonames_item_id) + "-about.rdf", 'w') as turtle_f: turtle_f.write(rdf_about_data) print "Retrieved data from " + geonames_contains_url + ", writing to file (" + str(total_count) + " of " + str(total_from_file) + ")..." with open(geonames_folder + str(geonames_item_id) + "-contains.rdf", 'w') as turtle_f: turtle_f.write(rdf_contains_data) # Add provenance print "Adding provenance..." provenance = Graph() provenance = AddProvenance.addProv(Iati, provenance, 'Geonames', start_time, source_rdfs, ['Geonames'], "gather%20data%20scripts/") provenance_turtle = provenance.serialize(format='turtle') with open(geonames_folder + 'provenance-geonames.ttl', 'w') as turtle_file_prov: turtle_file_prov.write(provenance_turtle) print "Done!"
with open(turtle_folder_ecb + 'ecb-countries.ttl', 'w') as turtle_file_ecb: turtle_file_ecb.write(countries_turtle_ecb) countries_turtle_fao = countries_fao.serialize(format='turtle') with open(turtle_folder_fao + 'fao-countries.ttl', 'w') as turtle_file_fao: turtle_file_fao.write(countries_turtle_fao) # Add provenance OECD provenance_oecd = Graph() provenance_oecd = AddProvenance.addProv(Iati, provenance_oecd, 'OECD', start_time, "", ['OECD'], "mapping%20scripts/") provenance_turtle_oecd = provenance_oecd.serialize(format='turtle') with open(turtle_folder_oecd + 'provenance-oecd.ttl', 'w') as turtle_file_oecd: turtle_file_oecd.write(provenance_turtle_oecd) # Add provenance BFS provenance_bfs = Graph() provenance_bfs = AddProvenance.addProv(Iati, provenance_bfs, 'BFS',