logging.info("\nAppending RDF to: %s ...", rdffilename) self.append_rdf_to_file(rdffilename, rdffilename_tmp) logging.info("done") # clear temporary graph self.g_temp.remove((None, None, None)) logging.debug("\ng_temp cleared: \n%s", str(self.g_temp.serialize(format="turtle"))) else: # track already in graph logging.info("skipping") # clean up at the end # correct olo:indexx to olo:index logging.info("\nCorrecting olo:indexx to olo:index ...") self.indexx2index() logging.info("Writing RDF to: %s ...", rdffilename) self.write_rdf_to_file(rdffilename, rdffilename_tmp) logging.info("done") #logging.debug(str(g.serialize(format='turtle'))) # create graphs g = dtlutil.create_graph('JEGraphID') g_temp = dtlutil.create_graph('JE_temp_GraphID') dtlutil.read_in_rdf(g, rdffilename) # create RDF RDFcreator = JEtoRDF(g, g_temp) RDFcreator.create_rdf()
import logging MIN_LEVEL = logging.DEBUG dtlutil.setup_log(MIN_LEVEL) ############################################################## #%% read in rdf graph import rdflib from rdflib.graph import Graph, Store, URIRef, Literal, BNode from rdflib.namespace import RDF, FOAF, DC MO, TL, EVENT, OLO, DTL, initNs = dtlutil.init_namespaces() DBP, REL, LJ = dtlutil.LOD_namespaces() g = dtlutil.create_graph() dtlutil.read_in_rdf(g, RDFfile_merged) gDTL1000 = dtlutil.create_graph() #%% gDTL1000 = copy.deepcopy(g) #%% read in fingerprints dtl1000_fprints = [] logging.info("\nReading fingerprints from %s", FPRINTfile) with open(FPRINTfile, 'r') as csvfile: csvreader = csv.reader(csvfile, delimiter=',') count = 0 for row in csvreader: if len(row) > 0 and row[0].startswith(".."):
############################################################## # read in rdf graph import rdflib from rdflib.graph import Graph, Store, URIRef, Literal, BNode from rdflib.namespace import Namespace, RDFS from rdflib import plugin from rdflib.plugins import sparql from rdflib import Namespace from rdflib.namespace import RDF, FOAF, RDFS, DC, XSD, OWL MO, TL, EVENT, OLO, DTL, initNs = dtlutil.init_namespaces() DBP, REL, LJ = dtlutil.LOD_namespaces() g = dtlutil.create_graph() dtlutil.read_in_rdf(g, RDFfile) ljg = dtlutil.create_graph("LJgraph") dtlutil.read_in_rdf(ljg, NTfile, myformat = "nt") ############################################################## mcount = 0 lodcount = 0 for musician in g.subjects(RDF.type, MO.MusicArtist): name = g.value(musician, FOAF.name) mcount +=1 for LODmusician in ljg.subjects(predicate=FOAF.name, object=Literal(name, lang = "en")): logging.info("found match for musician %s in DB: %s", name, LODmusician) g.add( (musician, OWL.sameAs, LODmusician) ) lodcount +=1
except DateParser.YearOutOfRangeWarning as e1: logging.warning(e1.message) logging.info("\ndates and areas added to sessions") ## area is not further processed, sometimes has venue info, sometimes country (not always), not very consistent ################################################################## CLASSES = [ "sessions", "releases", "bands", "tunes", "musicians", "instruments", "tracks", "media", "signals_performances" ] CONNECTIONS = ["bands_sessions", "tracks_sessions_releases", "performers_musicians_instruments", \ "performances_tunes"] ATTRIBUTES = ["time_area"] dtlutil.read_in_rdf(g, RDFfile) #for entity in CLASSES + CONNECTIONS + ATTRIBUTES: for entity in [ "performers_musicians_instruments", "bands_sessions", "time_area" ]: # convert table(s) to RDF globals()['process_' + entity]() dtlutil.write_rdf(g, RDFfile) logging.info("\nSUCCESS --- all tables processed, RDF created successfully")
############################################################## # read in rdf graph import rdflib from rdflib.graph import Graph, Store, URIRef, Literal, BNode from rdflib.namespace import Namespace, RDFS from rdflib import plugin from rdflib.plugins import sparql from rdflib import Namespace from rdflib.namespace import RDF, FOAF, RDFS, DC, XSD MO, TL, EVENT, OLO, DTL, initNs = dtlutil.init_namespaces() DBP, REL, LJ = dtlutil.LOD_namespaces() gJE = dtlutil.create_graph() dtlutil.read_in_rdf(gJE, RDFfile_JE) gILL = dtlutil.create_graph() dtlutil.read_in_rdf(gILL, RDFfile_ILL) ############################################################## def merge(uri_JE, uri_ILL): for s, p, o in gILL.triples((uri_ILL, None, None)): #logging.debug("\nremoving: (%s, %s, %s)", str(s), str(p), str(o)) g_merge.remove((s, p, o)) if not (uri_JE, p, o) in g_merge: #logging.debug("adding: (%s, %s, %s)", str(uri_JE), str(p), str(o)) g_merge.add((uri_JE, p, o))