def assign_keys(wn, wn_file): swn = parse_wordnet(wn_file) for e in swn.entries: for s in e.senses: if not s.sense_key: s.sense_key = sense_keys.get_sense_key(wn, swn, e, s, wn_file) with open(wn_file, "w") as outp: swn.to_xml(outp, True)
def check_lex_files(wn, fix): pos_map = { "nou": PartOfSpeech.NOUN, "ver": PartOfSpeech.VERB, "adj": PartOfSpeech.ADJECTIVE, "adv": PartOfSpeech.ADVERB } errors = 0 for f in glob.glob("src/xml/wn-*.xml"): lexfile = f[11:-4] lex_pos = pos_map[lexfile[:3]] swn = parse_wordnet(f) for synset in swn.synsets: if synset.lex_name != lexfile: print("%s declared in %s but listed as %s" % (synset.id, lexfile, synset.lex_name)) errors += 1 if not equal_pos(lex_pos, synset.part_of_speech): print("%s declared in %s but has wrong POS %s" % (synset.id, lexfile, synset.part_of_speech)) errors += 1 for entry in swn.entries: if len(entry.senses) == 0: print("%s is empty in %s" % (entry.id, lexfile)) errors += 1 for sense in entry.senses: if not sense.id: print("%s does not have a sense key" % (sense.id)) errors += 1 calc_sense_key = sense_keys.get_sense_key(wn, entry, sense, f) sense_key = unmap_sense_key(sense.id) if sense_key != calc_sense_key: if fix: print("sed -i 's/%s/%s/' src/xml/*" % (sense_key, calc_sense_key)) else: print("%s has declared key %s but should be %s" % (sense.id, sense_key, calc_sense_key)) errors += 1 return errors
def add_entry(wn, synset, lemma, idx=0, n=-1, change_list=None): """Add a new lemma to a synset""" print("Adding %s to synset %s" % (lemma, synset.id)) n_entries = len(empty_if_none(wn.members_by_id(synset.id))) entry_global = [ entry for entry in empty_if_none(wn.entry_by_lemma(lemma)) if wn.entry_by_id(entry).lemma.part_of_speech == synset.part_of_speech or wn.entry_by_id(entry).lemma.part_of_speech == PartOfSpeech.ADJECTIVE and synset.part_of_speech == PartOfSpeech.ADJECTIVE_SATELLITE or wn.entry_by_id(entry).lemma.part_of_speech == PartOfSpeech. ADJECTIVE_SATELLITE and synset.part_of_speech == PartOfSpeech.ADJECTIVE ] if len(entry_global) == 1: entry_global = wn.entry_by_id(entry_global[0]) n_senses = len(entry_global.senses) else: entry_global = None n_senses = 0 if idx <= 0: idx = n_entries + 1 elif idx > n_entries + 1: raise Exception("IDX value specified is higher than number of entries") elif idx == n_entries + 1: pass else: for sense_id in sense_ids_for_synset(wn, synset): this_idx = int(sense_id[-2:]) if this_idx >= idx: change_sense_idx(wn, sense_id, this_idx + 1) if n < 0: n = n_senses elif n > n_senses: raise Exception("n value exceeds number of senses for lemma") elif n == n_senses: pass else: sense_n = 0 for sense in entry_global.senses: if sense_n >= n: change_sense_n(wn, entry_global, sense.id, sense_n + 1) sense_n += 1 wn_synset = wn entries = [ entry for entry in empty_if_none(wn_synset.entry_by_lemma(lemma)) if wn.entry_by_id(entry).lemma.part_of_speech == synset.part_of_speech ] if entries: if len(entries) != 1: raise Exception("More than one entry for part of speech") print("Found an entry!") wn_entry = wn.entry_by_id(entries[0]) entry = wn_synset.entry_by_id(entries[0]) sense = Sense(id="ewn-%s-%s-%s-%02d" % (escape_lemma(lemma), synset.part_of_speech.value, synset_key(synset.id), idx), synset=synset.id, n=n, sense_key=None) wn_entry.senses.append(sense) entry.senses.append(sense) sense.sense_key = get_sense_key(wn, entry, sense, synset.lex_name) if sense.synset not in wn.members: wn.members[sense.synset] = [] wn.members[sense.synset].append(wn_entry.lemma.written_form) else: n = 0 print("Creating new entry") entry = LexicalEntry( "ewn-%s-%s" % (escape_lemma(lemma), synset.part_of_speech.value)) entry.set_lemma(Lemma(lemma, synset.part_of_speech)) sense = Sense(id="ewn-%s-%s-%s-%02d" % (escape_lemma(lemma), synset.part_of_speech.value, synset_key(synset.id), idx), synset=synset.id, n=n, sense_key=None) entry.add_sense(sense) sense.sense_key = get_sense_key(wn, entry, sense, synset.lex_name) wn.add_entry(entry) if change_list: change_list.change_entry(wn, entry) return entry
from wordnet import * import change_manager from glob import glob import re from sys import exit import sense_keys if __name__ == "__main__": wn = change_manager.load_wordnet() for e in wn.entries: for s in e.senses: if not s.sense_key: s.sense_key = sense_keys.get_sense_key(wn, e, s, wn.synset_by_id(s.synset).lex_name) change_manager.save(wn)