def wordnet_get_gloss_byref(_ref):
    """
    given a wordnet reference number returns term gloss
    _ref is the wordnet referenace number (e.g friday%1:28:00::)
    
    """
    if not _ref: # if ref is empty
        return ''
    try:
        return wn.synset_from_sense_key(_ref).definition()
    except (AttributeError,WordNetError,ValueError) as err:
        return 'WN Error'
Exemple #2
0
 def get_gloss_relation(self, name):
     synset = wn.synset(name)
     synset_pos = synset.pos()
     if synset_pos == "s":
         synset_pos = "a"
     synset_id = 'wn:{}{}'.format(str(synset.offset()).zfill(8), synset_pos)
     related_synset_keys = self.sense.getGlossRelatedWordNetSynsetIds(
         synset_id)
     related_gloss_synsets = []
     for synset_key in related_synset_keys:
         try:
             related_gloss_synset = wn.synset_from_sense_key(synset_key)
             related_gloss_synsets.append(related_gloss_synset)
         except:
             continue
     return related_gloss_synsets
def get_other_senses_byref(_ref,select_name=False):
    """
    given a wordnet reference number returns homonyms list in the form of
    other synsets
    _ref is the wordnet reference number (e.g friday%1:28:00::)  
    """

        
    try:
        ref_syn = wn.synset_from_sense_key(_ref)
        ref_word = ref_syn.name().split('.')[0]
        if not select_name:
            return [syn for syn in wn.synsets(ref_word) if syn != ref_syn]
        return [syn for syn in wn.synsets(ref_word) if (syn != ref_syn and ref_word in syn.name())]
    except (AttributeError,WordNetError,KeyError,ValueError) as err:
        return 'WN Error'   
Exemple #4
0
def create_hypernynm_gloss_data(config):
    train_files = config['train_raw_files']
    gloss_data, hyp_data = [], []
    oversample_ratio = config['oversample_ratio']
    triplet_data = {}
    for file in train_files:
        train_file = open(os.path.join(config['train_raw_dir'], file),
                          'r',
                          encoding='utf8')
        for line in train_file.readlines()[1:]:
            info = line.strip().split('\t')
            target_id, label, sentence, gloss, sense_key = info[0], info[
                1], info[2], info[3], info[4]
            gloss_data.append([target_id, label, sentence, gloss, sense_key])
            if sentence in triplet_data:
                triplet_obj = triplet_data[sentence]
            else:
                triplet_obj = TripletSample(sentence)
                triplet_data[sentence] = triplet_obj
            if label == '1':
                triplet_obj.positives.add(gloss)
            else:
                triplet_obj.negatives.add(gloss)
            if label == '1':
                for _ in range(oversample_ratio - 1):
                    gloss_data.append(
                        [target_id, label, sentence, gloss, sense_key])
            try:
                wn_synset = wn.synset_from_sense_key(sense_key)
                if len(wn_synset.hypernyms()) > 0:
                    for hyp in wn_synset.hypernyms():
                        defs = hyp.definition()
                        word = hyp.lemmas()[0].name().replace('_', ' ')
                        new_gloss = word + ' : ' + defs
                        hyp_data.append(
                            [target_id, label, sentence, new_gloss, sense_key])
                        if label == '1':
                            for _ in range(oversample_ratio - 1):
                                hyp_data.append([
                                    target_id, label, sentence, new_gloss,
                                    sense_key
                                ])
            except:
                continue
        train_file.close()
    return gloss_data, hyp_data, triplet_data
Exemple #5
0
def accuracy(y, y_pred, by_synset=False):
    n = len(y)
    k = 0
    for key in y.keys():
        try:
            senses = [
                wn.synset_from_sense_key(sense) if by_synset else sense
                for sense in y[key]
            ]
            if y_pred[key].lemmas()[0].key() in senses:
                k += 1
            #else: print(y_pred[key], senses)   # Why does wordnet.synset_from_sense_key return 'brazil_nut.n.02'?
            # Are these labels all right?
        except WordNetError:
            pass  # This should only occur for "budget"
            # BAD PRACTICE

    return k / n
Exemple #6
0
def get_lemma_classifiers(instances,
                          keys,
                          classifier=MultinomialNB,
                          binary=False,
                          idf=True):
    '''
	:type instances: dict of WSDInstance
	:type keys: dict of list
	'''
    classifiers = {}
    data = {}
    for k, inst in instances.items():
        if inst.lemma not in data.keys():
            data[inst.lemma] = [[], []]  # [[context lists], [sense label]]
        try:
            for sense in keys[k]:
                data[inst.lemma][0].append(inst.context)
                data[inst.lemma][1].append(
                    wn.synset_from_sense_key(sense).name())
        except WordNetError as e:  # "budget" again.
            pass

    for k, d in data.items():
        if not d[1]: continue  # 'budget' strikes again
        vctr = TfidfVectorizer(lowercase=False,
                               preprocessor=lambda x: x,
                               tokenizer=lambda x: x,
                               binary=binary,
                               use_idf=idf)
        x = vctr.fit_transform(d[0])
        clfr = classifier()
        clfr.fit(x.toarray(), d[1])
        if len(
                clfr.classes_
        ) > 1:  # Oh great... Should have used SemCor but don't have time.
            classifiers[k] = {'vct': vctr, 'clf': clfr}
    return classifiers
def get_synset_id_from_sense_key(sense_key):
    return wn.synset_from_sense_key(sense_key).name()
from nltk.corpus import wordnet as wn

dataset = 'ALL'

if __name__ == "__main__":

    with open(f'./data/mono/evaluation/{dataset}/{dataset}_n.gold.key.txt',
              'w') as out:
        for line in open(
                f'./data/mono/evaluation/{dataset}/{dataset}.gold.key.txt'):
            line = line.replace('\n', '')
            id = line.split(' ')[0]
            sense_key = line.split(' ')[1]

            syn = wn.synset_from_sense_key(sense_key)

            if syn.pos() != 'n':
                continue

            syn_id = syn.name()

            out.write(f"{id} {syn_id}\n")