gDTL1000 -= g.triples( (None, None, performanceURI) ) count+=1 newnumber = len(set(list(gDTL1000.subjects(RDF.type,MO.Performance)))) - len(set(list(gDTL1000.subjects(RDF.type,DTL.Session)))) logging.debug("removed %i performances, remaining %i", count, newnumber) oldnumber = len(set(list(g.subjects(RDF.type,MO.Signal)))) logging.debug("removing from %i signals", oldnumber) count = 0 for signalURI in g.subjects(RDF.type, MO.Signal): signal_fprint = g.value(signalURI, DTL.fingerprint_short) if dtl1000_fprints.count(str(signal_fprint)) == 0: gDTL1000 -= g.triples( (signalURI, None, None) ) gDTL1000 -= g.triples( (None, None, signalURI) ) count+=1 newnumber = len(set(list(gDTL1000.subjects(RDF.type,MO.Signal)))) logging.debug("removed %i signals, remaining %i", count, newnumber) ############################################################## #%% logging.debug("#########################") logging.info("graph originally had %i triples, dtl1000 graph has %i triples", len(g), len(gDTL1000)) logging.info("dtl1000 graph has %i Signals", len(list(gDTL1000.subjects(RDF.type, MO.Signal)))) dtlutil.write_rdf(gDTL1000, RDFnewfile)
MO, TL, EVENT, OLO, DTL, initNs = dtlutil.init_namespaces() g = dtlutil.create_graph() dtlutil.read_in_rdf(g, RDFfile) logging.debug("\ngraph has %i triples", len(g)) ############################################################## logging.info("\nReading styles from %s", STYLESfile) with open(STYLESfile, 'r') as csvfile: csvreader = csv.reader(csvfile, delimiter=',') count = 0 for row in csvreader: if len(row) > 0: fprint = row[0] style = row[1] signalURI = g.value(subject=None, predicate=DTL.fingerprint_short, \ object=Literal(fprint), default=None, any=True) if signalURI != None: g.add( (signalURI, DTL.style, Literal(style)) ) track = g.value(signalURI, MO.published_as) title = g.value(track, DC.title) logging.debug("style %s added to the signal of track %s", style, title) logging.debug("\ngraph has %i triples", len(g)) dtlutil.write_rdf(g, RDFnewfile)
except DateParser.YearOutOfRangeWarning as e1: logging.warning(e1.message) logging.info("\ndates and areas added to sessions") ## area is not further processed, sometimes has venue info, sometimes country (not always), not very consistent ################################################################## CLASSES = [ "sessions", "releases", "bands", "tunes", "musicians", "instruments", "tracks", "media", "signals_performances" ] CONNECTIONS = ["bands_sessions", "tracks_sessions_releases", "performers_musicians_instruments", \ "performances_tunes"] ATTRIBUTES = ["time_area"] dtlutil.read_in_rdf(g, RDFfile) #for entity in CLASSES + CONNECTIONS + ATTRIBUTES: for entity in [ "performers_musicians_instruments", "bands_sessions", "time_area" ]: # convert table(s) to RDF globals()['process_' + entity]() dtlutil.write_rdf(g, RDFfile) logging.info("\nSUCCESS --- all tables processed, RDF created successfully")
logging.info("musicians merged, found %i matches", count_merged) logging.debug("merged graph has %i triples", len(g_merge)) count_merged_all += count_merged count_merged = 0 # merge tunes logging.info("\nmerging tunes") for uri_JE in gJE.subjects(RDF.type, MO.MusicalWork): for uri_ILL in gILL.subjects(RDF.type, MO.MusicalWork): if same_tunes(uri_JE, uri_ILL): logging.debug("merging tunes: %s and %s", gJE.value(uri_JE, DC.title), gILL.value(uri_ILL, DC.title)) merge(uri_JE, uri_ILL) count_merged += 1 logging.info("tunes merged, found %i matches", count_merged) logging.debug("merged graph has %i triples", len(g_merge)) count_merged_all += count_merged count_merged = 0 ############################################################## logging.debug("#########################") logging.info("\nmerged: found %i matches", count_merged_all) logging.debug("merged graph originally had %i triples, now has %i triples", len_orig, len(g_merge)) dtlutil.write_rdf(g_merge, RDFnewfile)