コード例 #1
0
def create_time_interval(start, end, timeline=TL.UniversalTimeline):
    eventnode = rdflib.BNode()
    g.add( (eventnode, RDF.type, TL.Interval) )
    g.add( (eventnode, TL.beginsAt, Literal(start)) )
    g.add( (eventnode, TL.endsAt, Literal(end)) )
    g.add( (eventnode, TL.onTimeLine, timeline) )
    return eventnode
コード例 #2
0
    def format_adjective_satelites(self):
        """"""

        count = 0

        self.logger.info(f"start formatting AdjectiveSatelliteSynset")
        satellite_synsets = self.graph.subjects(
            RDF.type, SCHEMA.AdjectiveSatelliteSynset)
        for synset in satellite_synsets:
            if synset.endswith("-a"):
                count += 1
                new_synset = URIRef(synset.replace("-a", "-s"))
                self.logger.debug(
                    f"replacing '{synset.n3()}' by '{new_synset.n3()}'")
                self._replace_node(synset, new_synset,
                                   "format_adjective_satelites")
                # replace synset id
                synset_id = synset.split("synset-")[-1]
                new_synset_id = new_synset.split("synset-")[-1]
                self._drop_triple(
                    (new_synset, SCHEMA.synsetId, Literal(synset_id)),
                    "format_adjective_satelites")
                self._add_triple(
                    (new_synset, SCHEMA.synsetId, Literal(new_synset_id)),
                    "format_adjective_satelites")
            else:
                self.logger.warning(
                    f"ill formed AdjectiveSatelliteSynset '{synset.n3()}'")

        # resulting added and removed triples
        self.logger.info(f"action applied to {count} valid synsets"
                         f"\n\ttotal: {self.added_triples} triples added"
                         f"\n\ttotal: {self.removed_triples} triples removed")
コード例 #3
0
    def format_synset_id(self):
        """"""

        count = 0

        self.logger.info(f"start formatting property synsetId")
        synsets = self._get_all_synsets()
        for synset, in synsets:
            count += 1

            # removes old property
            synset_id = self.graph.value(synset, SCHEMA.synsetId)
            if synset_id:
                self._drop_triple((synset, SCHEMA.synsetId, synset_id))

            # replaces property
            synset_id = Literal(synset.split("/synset-")[-1])
            synset_offset = Literal(synset_id.split("-")[0])
            self._add_triple((synset, SCHEMA.offset, synset_offset))
            self._add_triple((synset, SCHEMA.synsetId, synset_id))

        # resulting added and removed triples
        self.logger.info(f"action applied to {count} cases"
                         f"\n\ttotal: {self.added_triples} triples added"
                         f"\n\ttotal: {self.removed_triples} triples removed")
コード例 #4
0
ファイル: dtlutil.py プロジェクト: ppquadrat/DigThatLick
def create_time_interval(g, start, end):
    eventnode = rdflib.BNode()
    g.add((eventnode, RDF.type, TL.Interval))
    g.add((eventnode, TL.at, Literal(start)))
    g.add((eventnode, TL.duration, Literal(str(end - start))))
    g.add((eventnode, TL.timelime, TL.universaltimeline))
    return eventnode
コード例 #5
0
def create_instrument(inst_label):
    # create instrument URI
    instrumentURI = create_uri("instruments", uuid.uuid4())
    # add instrument metadata
    g.add((instrumentURI, RDF.type, MO.Instrument))
    g.add((instrumentURI, DTL.orig_inst_label, Literal(inst_label)))
    g.add((instrumentURI, DTL.dtl_inst_label, Literal(inst_label)))
    logging.debug("Instrument %s created", inst_label)
    return instrumentURI
コード例 #6
0
 def create_track(self, trackTitle, tnum):
     # create track URI
     trackURI = self.create_uri("tracks", uuid.uuid4())
     #add track metadata
     self.add((trackURI, RDF.type, MO.Track))
     self.add((trackURI, DC.title, Literal(trackTitle)))
     self.add((trackURI, MO.track_number, Literal(str(tnum))))
     logging.debug("Track created")
     return trackURI
コード例 #7
0
 def create_release(self, title, dcount):
     # create release URI
     releaseURI = self.create_uri("releases", uuid.uuid4())
     # add release metadata
     self.add((releaseURI, RDF.type, MO.Release))
     self.add((releaseURI, DC.title, Literal(title)))
     self.add((releaseURI, MO.record_count, Literal(str(dcount))))
     self.add((releaseURI, DTL.is_remastered, Literal("0")))
     logging.debug("Release created")
     return releaseURI
コード例 #8
0
def process_tunes():
    tune_ids = tunes_table.id
    tune_titles = tunes_table.name
    logging.info("\ncreating %i tunes", len(tune_ids))
    for tune_id, title in zip(tune_ids, tune_titles):
        logging.debug("creating tune %s", title)
        tuneURI = create_uri("tunes", tune_id)
        g.add((tuneURI, RDF.type, MO.MusicalWork))
        g.add((tuneURI, DC.title, Literal(title)))
        g.add((tuneURI, DTL.lord_id, Literal(tune_id)))
    logging.info("tunes created")
コード例 #9
0
def process_bands():
    leader_ids = leaders_table.id
    leader_names = leaders_table.name
    logging.info("\ncreating %i bands", len(leader_ids))
    for leader_id, name in zip(leader_ids, leader_names):
        logging.debug("creating band %s", name)
        bandURI = create_uri("bands", leader_id)
        g.add((bandURI, RDF.type, MO.MusicGroup))
        g.add((bandURI, FOAF.name, Literal(name)))
        g.add((bandURI, DTL.lord_id, Literal(leader_id)))
    logging.info("bands created")
コード例 #10
0
def process_musicians():
    musician_ids = musicians_table.id
    musician_names = musicians_table.name
    logging.info("\ncreating %i musicians", len(musician_ids))
    for musician_id, name in zip(musician_ids, musician_names):
        logging.debug("creating musician %s", name)
        musicianURI = create_uri("musicians", musician_id)
        g.add((musicianURI, RDF.type, MO.MusicArtist))
        g.add((musicianURI, FOAF.name, Literal(name)))
        g.add((musicianURI, DTL.lord_id, Literal(musician_id)))
    logging.info("musicians created")
コード例 #11
0
 def create_album(self, title):
     # create album URI
     albumURI = self.create_uri("albums", uuid.uuid4())
     # add album metadata
     self.add((albumURI, RDF.type, MO.SignalGroup))
     self.add((albumURI, DC.title, Literal(title)))
     self.add((albumURI, DTL.is_compilation, Literal("1")))
     self.add((albumURI, DTL.is_remix, Literal("0")))
     self.add((albumURI, DTL.is_live, Literal("0")))
     logging.debug("Album created")
     return albumURI
コード例 #12
0
 def create_release_event(self, title, date):
     # create uri
     releaseEventURI = self.create_uri("release_events", uuid.uuid4())
     # add metadata
     self.add((releaseEventURI, RDF.type, MO.ReleaseEvent))
     self.add((releaseEventURI, DC.title, Literal(title)))
     # add date
     self.add((releaseEventURI, EVENT.time, create_date(g, "2008-10-24")))
     # add place
     self.add((releaseEventURI, EVENT.place, Literal("Germany")))
     logging.debug("Release event created")
     return releaseEventURI
コード例 #13
0
 def create_medium(self, title, dnum, tcount):
     # create medium URI
     mediumURI = self.create_uri("mediums", uuid.uuid4())
     ######## should medium uid be defined through release?
     # add medium metadata
     self.add((mediumURI, RDF.type, MO.Record))
     self.add((mediumURI, MO.record_number, Literal(str(dnum))))
     self.add((mediumURI, DC.title, Literal(title)))
     if tcount > 0:
         self.add((mediumURI, MO.track_count, Literal(str(tcount))))
     logging.debug("Medium created")
     return mediumURI
コード例 #14
0
 def find_instrument(self, inst_label):
     # instruments are matched by instrument name (label). That will not mtch everything since there
     # might be multiple names and spelings for the same instrument, also typos
     if (None, DTL.je_inst_label, Literal(inst_label)) in self.g:
         instrument = self.g.value(subject=None,
                                   predicate=DTL.je_inst_label,
                                   object=Literal(inst_label),
                                   default=None,
                                   any=False)
         logging.debug("Instrument %s found: %s", inst_label, instrument)
         return instrument
     else:
         return None
コード例 #15
0
 def create_performer(self, artistURI, instrument, confidence):
     performerURI = self.create_uri("Performers", uuid.uuid4())
     self.add((performerURI, RDF.type, DTL.Performer))
     self.add((performerURI, DTL.musician, artistURI))
     self.add((performerURI, DTL.instrument, Literal(instrument)))
     # confidence
     confnode = rdflib.BNode()
     self.add((confnode, RDF.type, DTL.PerformerConfidence))
     self.add((confnode, DTL.musician_confidence, Literal(confidence[0])))
     self.add((confnode, DTL.instrument_confidence, Literal(confidence[1])))
     self.add((performerURI, DTL.performer_confidence, confnode))
     logging.debug("Performer created")
     return performerURI
コード例 #16
0
def process_instruments():
    instrument_ids = instruments_table.id
    instrument_titles = instruments_table.name
    logging.info("\ncreating %i instruments", len(instrument_ids))
    for instrument_id, title in zip(instrument_ids, instrument_titles):
        logging.debug("creating instrument %s", title)
        instrumentURI = create_uri("instruments", instrument_id)
        g.add((instrumentURI, RDF.type, MO.Instrument))
        g.add((instrumentURI, DTL.lord_inst_label, Literal(title)))
        g.add((instrumentURI, DTL.orig_inst_label, Literal(title)))
        g.add((instrumentURI, DTL.lord_id, Literal(instrument_id)))
        ######## are these lord uids?
    logging.info("instruments created")
コード例 #17
0
def process_tracks():
    track_ids = tracks_table.id
    track_titles = tracks_table.name
    track_numbers = tracks_table.track_nr
    logging.info("\ncreating %i tracks", len(track_ids))
    for track_id, title, tnum in zip(track_ids, track_titles, track_numbers):
        logging.debug("creating track %s with number %s", title, tnum)
        trackURI = create_uri("tracks", track_id)
        g.add((trackURI, RDF.type, MO.Track))
        g.add((trackURI, DC.title, Literal(title)))
        g.add((trackURI, MO.track_number, Literal(str(tnum))))
        g.add((trackURI, DTL.lord_id, Literal(track_id)))
        ######## are these lord uids?
    logging.info("tracks created")
コード例 #18
0
ファイル: Lord2RDF.py プロジェクト: ppquadrat/DigThatLick
def process_time_area():
    # parse areadate strings
    session_areadate_strings = sessions_table.location_time_str
    logging.info("\nparsing %i area date strings",
                 len(session_areadate_strings))
    session_area_strings = []
    session_date_strings = []
    for session_areadate_str in session_areadate_strings:
        areastr, datestr = Lord_time_area_parser.parse_location_time_str(
            session_areadate_str)
        logging.debug("area: %s, date: %s", areastr, datestr)
        session_area_strings.append(areastr)
        session_date_strings.append(datestr)
    logging.debug("area - date strings parsed")

    # add area and date to sessions
    session_idxs = sessions_table.sessionId
    logging.info("\nadding areas and dates to %i sessions", len(session_idxs))
    for session_id, session_area_str, session_date_str in zip(
            session_ids, session_area_strings, session_date_strings):
        sessionURI = find_by_id("sessions", session_id)
        logging.debug('add place: %s', areastr)
        add((sessionURI, EVENT.place, Literal(areaString)))
        logging.debug('datestr: %s', datestr)
        dtlutil.add_datestr(sessionURI, datestr)
    logging.debug("\ndates and areas added to sessions")
コード例 #19
0
 def create_label(self, title):
     # create URI
     labelURI = self.create_uri("labels", uuid.uuid4())
     self.add((labelURI, RDF.type, MO.Label))
     self.add((labelURI, DC.title, Literal(title)))
     logging.debug("Label created")
     return labelURI
コード例 #20
0
 def create_artist(self, artistName):
     # create artist URI
     artistURI = self.create_uri("artists", uuid.uuid4())
     # add artist metadata
     self.add((artistURI, RDF.type, MO.MusicArtist))
     self.add((artistURI, FOAF.name, Literal(artistName)))
     logging.debug("Artist created")
     return artistURI
コード例 #21
0
 def create_band(self, bandName):
     # create URI
     bandURI = self.create_uri("bands", uuid.uuid4())
     #add metadata
     self.add((bandURI, RDF.type, MO.MusicGroup))
     self.add((bandURI, FOAF.name, Literal(bandName)))
     logging.debug("Band created")
     return bandURI
コード例 #22
0
 def create_arranger(self, name):
     # create URI
     arrangerURI = self.create_uri("arrangers", uuid.uuid4())
     #add metadata
     self.add((arrangerURI, RDF.type, MO.MusicArtist))
     self.add((arrangerURI, FOAF.name, Literal(name)))
     logging.debug("Arranger created")
     return arrangerURI
コード例 #23
0
 def create_composer(self, name):
     # create URI
     composerURI = self.create_uri("composers", uuid.uuid4())
     #add metadata
     self.add((composerURI, RDF.type, MO.MusicArtist))
     self.add((composerURI, FOAF.name, Literal(name)))
     logging.debug("Composer created")
     return composerURI
コード例 #24
0
 def create_instrument(self, inst_label):
     # create instrument URI
     instrumentURI = self.create_uri("instruments", uuid.uuid4())
     # add instrument metadata
     self.add((instrumentURI, RDF.type, MO.Instrument))
     self.add((instrumentURI, DTL.je_inst_label, Literal(inst_label)))
     logging.debug("Instrument %s created", inst_label)
     return instrumentURI
コード例 #25
0
def get_instrument(instrument):
    instrumentURI = g.value(subject=None, predicate=DTL.orig_inst_label, \
                                object=Literal(instrument), default=None, any=False)
    if instrumentURI == None:
        instrumentURI = create_instrument(instrument)
    else:
        logging.debug("found instrument %s", instrument)
    return instrumentURI
コード例 #26
0
def process_releases():
    release_ids = releases_table.full_id
    release_titles = releases_table.title
    release_label_id_strs = releases_table.label_id_str
    release_notes = releases_table.notes_str
    logging.info("\ncreating %i releases", len(release_ids))
    for release_id, release_title, label_id_str, note in \
            zip(release_ids, release_titles, release_label_id_strs, release_notes):
        logging.debug("creating release %s", release_title)
        releaseURI = create_uri("releases", release_id)
        g.add((releaseURI, RDF.type, MO.Release))
        g.add((releaseURI, DC.title, Literal(release_title)))
        g.add((releaseURI, DTL.lord_label_id_str, Literal(label_id_str)))
        g.add((releaseURI, DTL.lord_release_notes, Literal(note)))
        g.add((releaseURI, DTL.lord_id, Literal(release_id)))
        #label_id_str? notes_str?
    logging.info("releases created")
コード例 #27
0
    def add_adjective_markers(self, senses: Graph, adjective_lines):
        """"""

        self.logger.info(f"start processing {len(adjective_lines)} lines")

        # format data
        adjective_data = []
        for line in adjective_lines:
            synset_id, _, _, words_count, *tail = line.split()
            for i in range(int(words_count, base=16)):
                adjective_data.append((synset_id, tail[2 * i]))

        # find and add adjective markers
        count = 0
        for synset_id, word in adjective_data:
            marker = None
            if word.endswith("(a)"): marker = "a"  # predicate position
            elif word.endswith("(p)"):
                marker = "p"  # prenominal (attributive) position
            elif word.endswith("(ip)"):
                marker = "ip"  # immediately postnominal position
            else:
                continue

            # adds marker
            valid = False
            word = self._format_lexical(word[:word.find(f"({marker})")], True)
            for synset in self.graph.subjects(SCHEMA.synsetId,
                                              Literal(synset_id)):
                sense = self._get_sense(synset, word)
                if sense is not None:
                    valid = True
                    count += 1
                    self.logger.debug(
                        f"adding marker '{marker}' from word '{word}' to sense '{sense.n3()}'"
                    )
                    senses.add((sense, SCHEMA.adjPosition, Literal(marker)))
            # validates the result
            if not valid:
                self.logger.warning(
                    f"could not add marker '{marker}' from word '{word}' to synset '{synset_id}'"
                )

        # print statistics
        self.logger.info(f"after action {count} triples were added")
コード例 #28
0
ファイル: Lord2RDF_V2.py プロジェクト: ppquadrat/DigThatLick
def process_sessions():
    session_ids = sessions_table.full_id
    logging.info("\ncreating %i sessions", len(session_ids))
    for counter, session_id in enumerate(session_ids):
        logging.debug(str(counter))
        sessionURI = create_uri("sessions", session_id)
        g.add((sessionURI, RDF.type, MO.Performance))
        g.add((sessionURI, DTL.lord_id, Literal(session_id)))
    logging.info("sessions created")
コード例 #29
0
ファイル: dtlutil.py プロジェクト: ppquadrat/DigThatLick
def create_qualified_date(g,
                          freetext_date,
                          startdate,
                          enddate,
                          is_apprx=True,
                          apprxq=None):
    if startdate != enddate:
        timespan_node = create_time_interval(g, startdate, enddate)
        g.add((timespan_node, RDF.type, DTL.QualifiedDateInterval))
    else:
        timespan_node = create_date(g, startdate)
        g.add((timespan_node, RDF.type, DTL.QualifiedDateInstant))
    g.add((timespan_node, DTL.freetext_timespan, Literal(freetext_date)))
    g.add((timespan_node, DTL.is_approximate, Literal(str(int(is_apprx)))))
    if apprxq != None:
        g.add((timespan_node, DTL.approximation_qualifier, Literal(apprxq)))


#    logging.debug("Qualified date created")
    return timespan_node
コード例 #30
0
def process_time_area():
    parser = LordAreaDateParser()
    # parse areadate strings
    session_areadate_strings = sessions_table.location_time_str
    logging.info("\nparsing %i area date strings",
                 len(session_areadate_strings))
    session_area_strings = []
    session_date_strings = []
    for session_areadate_str in session_areadate_strings:
        areastr, datestr = parser.parse_area_date_str(session_areadate_str)
        ##        except parser.UnparsableAreaDateStringWarning as e:
        ##            logging.warning(e.message)
        ##            areastr = session_areadate_str
        ##            datestr = ""
        logging.debug("area: %s, date: %s", areastr, datestr)
        session_area_strings.append(areastr)
        session_date_strings.append(datestr)
    logging.info("area - date strings parsed")

    # g.add area and date to sessions
    from dateParser import DateParser

    session_idxs = sessions_table.id
    logging.info("\nadding areas and dates to %i sessions", len(session_idxs))
    for session_idx, session_area_str, session_date_str in zip(
            session_idxs, session_area_strings, session_date_strings):
        logging.debug("session %i", session_idx)
        session_id = get_session_id_by_idx(session_idx)
        sessionURI = find_by_id("sessions", session_id)
        logging.debug('add place: %s', session_area_str)
        g.add((sessionURI, EVENT.place, Literal(session_area_str)))
        g.add((sessionURI, DTL.orig_date, Literal(session_date_str)))
        logging.debug('datestr: %s', session_date_str)
        try:
            dtlutil.add_datestr(g, sessionURI, session_date_str)
        except DateParser.UnparsableDateWarning as e0:
            logging.warning(e0.message)
        except DateParser.YearOutOfRangeWarning as e1:
            logging.warning(e1.message)

    logging.info("\ndates and areas added to sessions")