예제 #1
0
def assign_keys(wn, wn_file):
    swn = parse_wordnet(wn_file)
    for e in swn.entries:
        for s in e.senses:
            if not s.sense_key:
                s.sense_key = sense_keys.get_sense_key(wn, swn, e, s, wn_file)
    with open(wn_file, "w") as outp:
        swn.to_xml(outp, True)
예제 #2
0
def check_lex_files(wn, fix):
    pos_map = {
        "nou": PartOfSpeech.NOUN,
        "ver": PartOfSpeech.VERB,
        "adj": PartOfSpeech.ADJECTIVE,
        "adv": PartOfSpeech.ADVERB
    }
    errors = 0
    for f in glob.glob("src/xml/wn-*.xml"):
        lexfile = f[11:-4]
        lex_pos = pos_map[lexfile[:3]]
        swn = parse_wordnet(f)
        for synset in swn.synsets:
            if synset.lex_name != lexfile:
                print("%s declared in %s but listed as %s" %
                      (synset.id, lexfile, synset.lex_name))
                errors += 1
            if not equal_pos(lex_pos, synset.part_of_speech):
                print("%s declared in %s but has wrong POS %s" %
                      (synset.id, lexfile, synset.part_of_speech))
                errors += 1
        for entry in swn.entries:
            if len(entry.senses) == 0:
                print("%s is empty in %s" % (entry.id, lexfile))
                errors += 1
            for sense in entry.senses:
                if not sense.id:
                    print("%s does not have a sense key" % (sense.id))
                    errors += 1
                calc_sense_key = sense_keys.get_sense_key(wn, entry, sense, f)
                sense_key = unmap_sense_key(sense.id)
                if sense_key != calc_sense_key:
                    if fix:
                        print("sed -i 's/%s/%s/' src/xml/*" %
                              (sense_key, calc_sense_key))
                    else:
                        print("%s has declared key %s but should be %s" %
                              (sense.id, sense_key, calc_sense_key))
                    errors += 1

    return errors
예제 #3
0
def add_entry(wn, synset, lemma, idx=0, n=-1, change_list=None):
    """Add a new lemma to a synset"""
    print("Adding %s to synset %s" % (lemma, synset.id))
    n_entries = len(empty_if_none(wn.members_by_id(synset.id)))
    entry_global = [
        entry for entry in empty_if_none(wn.entry_by_lemma(lemma))
        if wn.entry_by_id(entry).lemma.part_of_speech == synset.part_of_speech
        or wn.entry_by_id(entry).lemma.part_of_speech == PartOfSpeech.ADJECTIVE
        and synset.part_of_speech == PartOfSpeech.ADJECTIVE_SATELLITE
        or wn.entry_by_id(entry).lemma.part_of_speech == PartOfSpeech.
        ADJECTIVE_SATELLITE and synset.part_of_speech == PartOfSpeech.ADJECTIVE
    ]

    if len(entry_global) == 1:
        entry_global = wn.entry_by_id(entry_global[0])
        n_senses = len(entry_global.senses)
    else:
        entry_global = None
        n_senses = 0

    if idx <= 0:
        idx = n_entries + 1
    elif idx > n_entries + 1:
        raise Exception("IDX value specified is higher than number of entries")
    elif idx == n_entries + 1:
        pass
    else:
        for sense_id in sense_ids_for_synset(wn, synset):
            this_idx = int(sense_id[-2:])
            if this_idx >= idx:
                change_sense_idx(wn, sense_id, this_idx + 1)

    if n < 0:
        n = n_senses
    elif n > n_senses:
        raise Exception("n value exceeds number of senses for lemma")
    elif n == n_senses:
        pass
    else:
        sense_n = 0
        for sense in entry_global.senses:
            if sense_n >= n:
                change_sense_n(wn, entry_global, sense.id, sense_n + 1)
            sense_n += 1

    wn_synset = wn
    entries = [
        entry for entry in empty_if_none(wn_synset.entry_by_lemma(lemma))
        if wn.entry_by_id(entry).lemma.part_of_speech == synset.part_of_speech
    ]

    if entries:
        if len(entries) != 1:
            raise Exception("More than one entry for part of speech")
        print("Found an entry!")
        wn_entry = wn.entry_by_id(entries[0])
        entry = wn_synset.entry_by_id(entries[0])
        sense = Sense(id="ewn-%s-%s-%s-%02d" %
                      (escape_lemma(lemma), synset.part_of_speech.value,
                       synset_key(synset.id), idx),
                      synset=synset.id,
                      n=n,
                      sense_key=None)

        wn_entry.senses.append(sense)
        entry.senses.append(sense)
        sense.sense_key = get_sense_key(wn, entry, sense, synset.lex_name)
        if sense.synset not in wn.members:
            wn.members[sense.synset] = []
        wn.members[sense.synset].append(wn_entry.lemma.written_form)
    else:
        n = 0
        print("Creating new entry")
        entry = LexicalEntry(
            "ewn-%s-%s" % (escape_lemma(lemma), synset.part_of_speech.value))
        entry.set_lemma(Lemma(lemma, synset.part_of_speech))
        sense = Sense(id="ewn-%s-%s-%s-%02d" %
                      (escape_lemma(lemma), synset.part_of_speech.value,
                       synset_key(synset.id), idx),
                      synset=synset.id,
                      n=n,
                      sense_key=None)
        entry.add_sense(sense)
        sense.sense_key = get_sense_key(wn, entry, sense, synset.lex_name)
        wn.add_entry(entry)
    if change_list:
        change_list.change_entry(wn, entry)
    return entry
예제 #4
0
from wordnet import *
import change_manager
from glob import glob
import re
from sys import exit
import sense_keys


if __name__ == "__main__":
    wn = change_manager.load_wordnet()
    for e in wn.entries:
        for s in e.senses:
            if not s.sense_key:
                s.sense_key = sense_keys.get_sense_key(wn, e, s, 
                        wn.synset_by_id(s.synset).lex_name)
    change_manager.save(wn)