Python of2ss Examples, nltk.corpus.wordnet.of2ss Python Examples

Example #1

0

Show file

def get_synset(word):
    lemma, pos = word[:-2], word[-1]
    if pos == 'j':
        pos = "s"
    try:
        offsets = wn._lemma_pos_offset_map[lemma][pos]

    except KeyError:
        offsets = [syn._offset for syn in wn.synsets(lemma)]

    padding = [pad(ss) for ss in offsets]
    omw_list = [str(ss) + "-" + str(pos) for ss in padding]
    syn_list = []
    for offset in omw_list:
        syn = wn.synset("oven.n.01")
        try:
            syn = wn.of2ss(offset)
        except StopIteration:
            pass
        except AssertionError:
            pass
        except nltk.corpus.reader.wordnet.WordNetError:
            continue
        except ValueError:
            continue
        finally:
            syn_list.append(syn)

    return syn_list

Example #2

0

Show file

File: man_clus.py Project: frankier/finn-sense-clust

def decompile(inf, out_dir):
    session = get_session()
    for lemma, grouping in gen_groupings(inf):
        with open(pjoin(out_dir, lemma), "w") as outf:
            first = True
            for group_num, synsets in grouping.items():
                if not first:
                    outf.write("\n")
                else:
                    first = False
                for synset in synsets:
                    outf.write(synset)
                    outf.write(" # ")
                    if is_wn_ref(synset):
                        sense = wordnet.of2ss(synset).definition()
                    else:
                        sense = session.execute(select([
                            word_sense.c.sense,
                        ]).select_from(joined).where(
                            (headword.c.name == lemma) &
                            (word_sense.c.sense_id == synset)
                        )).fetchone()["sense"]
                    tokens = word_tokenize(sense)
                    outf.write(" ".join(tokens))
                    outf.write("\n")

Example #3

0

Show file

File: dump-synsets.py Project: frankier/FinnLink

def main():
    inf = fileinput.input()
    next(inf)
    for line in inf:
        frame, ssof = line.strip().split(",", 1)
        ss = wordnet.of2ss(ssof)
        print(frame, " ".join((l.name() for l in ss.lemmas(lang="fin"))))

Example #4

0

Show file

def get_wn_ss(imagenet_id):
    """
    Transforms an imagenet id into a wordnet synset
    :param imagenet_id:
    :return:
    """
    return wn.of2ss(imagenet_id[1:] + '-' + imagenet_id[0])

Example #5

0

Show file

 def load_csi(self):
     with open('data/csi_data/wn_synset2csi.txt') as csi_map_f:
         for line in csi_map_f:
             elems = line.strip().split('\t')
             wn_offset, csi_labels = elems[0], elems[1:]
             wn_offset = wn_offset.lstrip('wn:')
             syn = wn.of2ss(wn_offset)
             self.map_syn2csi[syn.name()] = csi_labels[0]

Example #6

0

Show file

        def ann2ss(ann):
            from stiff.munge.utils import synset_id_of_ann
            from nltk.corpus import wordnet
            from finntk.wordnet.utils import pre_id_to_post

            synset_id = pre_id_to_post(synset_id_of_ann(ann))
            # TODO: proper handling of new FinnWordNet synsets
            if synset_id[0] == "9":
                return
            return wordnet.of2ss(synset_id)

Example #7

0

Show file

File: wordnet.py Project: frankier/finn-sense-clust

def get_lemma_names(ssof, wns):
    wns = list(wns)
    lemmas = []
    if "qf2" in wns:
        fi_ssof = en2fi_post(ssof)
        ss = fiwn.of2ss(fi_ssof)
        lemmas.extend(ss.lemmas())
        wns.remove("qf2")
    for wnref in wns:
        ss = wordnet.of2ss(ssof)
        lemmas.extend(ss.lemmas(lang=wnref))
    return {l.name() for l in lemmas}

Example #8

0

Show file

    def bbl2wn(self, babelSynsetID):
        service_url = 'https://babelnet.io/v4/getSynset'
        params = {'id': babelSynsetID, 'key': BABEL_KEY}

        data = self.get(service_url, params)
        wnOffsets = data['wnOffsets']
        if len(wnOffsets) != 0:
            wnOffsets = data['wnOffsets'][0]['mapping']['WN_30'][0]
            sense = wn.of2ss(wnOffsets)
        else:
            sense = None

        return sense

Example #9

0

Show file

def is_bird(model, img_path):
    # Load image and transform for model input
    x = load_img(img_path, target_size=(224, 224))
    x = img_to_array(x)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)

    # Run image through network and decode result
    preds = model.predict(x)
    top_pred = decode_predictions(preds,
                                  top=1)[0][0]  # (offset_id, name, prob)

    return synset_is_bird(wn.of2ss(top_pred[0][1:] + top_pred[0][0]))

Example #10

0

Show file

 def pessimistic_score(synsets):
     selected_synset = None
     max_score = 0
     for synset in synsets:
         if not synset[0][0] == '8':
             # ignore synsets offsets 8.......-.
             # they are odd synsets that WordNet can't find...
             synset_name = wn.of2ss(synset[0]).name()
             # Get score from SentiWordNet
             neg_score = swn.senti_synset(synset_name).neg_score()
             if neg_score > max_score:
                 max_score = neg_score
                 selected_synset = synset_name
     if selected_synset is not None:
         return (swn.senti_synset(selected_synset).pos_score(),
                 swn.senti_synset(selected_synset).neg_score(),
                 swn.senti_synset(selected_synset).pos_score())
     else:
         return 0, 0, 0

Example #11

0

Show file

def offset_to_label(wnet_offset):
    return wn.of2ss(wnet_offset.split('n')[1]+'-n')

Example #12

0

Show file

File: build_graph2.py Project: dariogarcia/tiramisu

#  for c2 in range(1000):
#    if sim[c1][c2] != sim[c2][c1]:
#      print "diff " + str(labels[c1]) + " " + str(labels[c2])

#Read labels and transform into NLTK compatible
inf = open('labels.txt', 'r')
labels = list()
synsets = list()
labelsNLTK = list()
for line in inf:
  labels.append(line)
  labelsNLTK.append((line.split()[0])[1:]+'-n')

#Obtain synsets
for s1 in labelsNLTK:
  synsets.append(wn.of2ss(s1))

d_s = wn.synset('dog.n.01').closure(lambda s:s.hyponyms())
dogs = []
for d in d_s:
  dogs.append(d)

w_s = wn.synset('wheeled_vehicle.n.01').closure(lambda s:s.hyponyms())
wheeled = []
for w in w_s:
  wheeled.append(w)
wheeled.append(wn.synset('school_bus.n.01'))
wheeled.append(wn.synset('minibus.n.01'))
wheeled.append(wn.synset('trolleybus.n.01'))

a_s = wn.synset('animal.n.01').closure(lambda s:s.hyponyms())

Example #13

0

Show file

labels = {hyper: hyper.name() for hyper in gg}
pos = graphviz_layout(graph)
nx.draw_networkx(graph, pos, labels=labels)
# nx.draw_networkx_labels(graph, pos, labels)
plt.show()

with open(map_clsloc) as ifs:
    classes_temp = ifs.read().strip().split('\n')

imagenet_classes = [kls.split() for kls in classes_temp]
imagenet_classes = {k: v for k, _, v in imagenet_classes}

orig_hypos = []
for wnid, label in imagenet_classes.items():
    offset = wnid.split('n')[-1]
    hypo = wn.of2ss(offset + 'n')
    orig_hypos.append(hypo)

all_hypos = set()
set_hypers = set()
hyper_to_hypo = {}
for orig_hypo in orig_hypos:
    for hyper in orig_hypo.closure(get_hypernyms, depth=1):
        set_hypers.add(hyper)
        for new_hypo in hyper.closure(get_hyponyms, depth=1):
            all_hypos.add(new_hypo)
            hyper_to_hypo.setdefault(hyper, set()).add(new_hypo)

for hyper, hypos in sorted(hyper_to_hypo.items(),
                           key=lambda x: (x[0].max_depth(), -1 * len(x[1]))):
    print(hyper.max_depth(), len(hypos), hyper.name())

Example #14

0

Show file

File: db_text_2.py Project: nixidekaoya/webapi

    else:
        sim = 0
    return sim


for similarity in similarity_list:
    np_matrix = np.zeros((len(wnid_list), len(wnid_list)), float)
    print(similarity)
    matrix_path = "/home/li/datasets/csv/" + str(
        similarity) + "_similarity_" + str(len(wnid_list)) + ".csv"
    for i in range(len(wnid_list)):
        if i % 100 == 0:
            print(i)
        wnid_1 = wnid_list[i]
        offset_1 = str(wnid_1[1:]) + "n"
        synset_1 = wn.of2ss(offset_1)
        for j in range(len(wnid_list) - i):
            wnid_2 = wnid_list[j + i]
            offset_2 = str(wnid_2[1:]) + "n"
            synset_2 = wn.of2ss(offset_2)
            np_matrix[i][j] = wn_similarity(synset_1,
                                            synset_2,
                                            similarity=similarity)
            np_matrix[j][i] = np_matrix[i][j]
    df1 = DataFrame(np_matrix, index=wnid_list, columns=wnid_list)
    df1.to_csv(matrix_path)

################### Choose Valid Synsets from Japanese wordnet
'''

japanese_wn_list = []

Example #15

0

Show file

 def synid2syn(self, synid):
     return wn.of2ss(synid)

Example #16

0

Show file

File: synset_process.py Project: PaulBreugnot/Loacore

def add_synsets_to_sentences(sentences,
                             print_synsets=False,
                             _state_queue=None,
                             _id_process=None,
                             freeling_modules=None):
    """
    Performs a Freeling process to disambiguate words of the sentences according to their context
    (UKB algorithm) linking them to a unique synset (if possible).\n
    Our sentences are converted to Freeling Sentences before processing.\n
    Notice that even if we may have already computed the Lemmas for example, Freeling Sentences generated from our
    sentences are "raw sentences", without any analysis linked to their Words. So we make all the Freeling
    process from scratch every time, except *tokenization* and *sentence splitting*, to avoid any confusion.

    .. note:: This function should be used only inside the file_process.add_files() function.

    :param sentences: Sentences to process
    :type sentences: :obj:`list` of |Sentence|
    :param print_synsets: If True, print disambiguation results
    :type print_synsets: boolean
    """

    from loacore.conf import DB_TIMEOUT
    from loacore.utils.db import safe_commit, safe_execute

    freeling_sentences = [
        sentence.compute_freeling_sentence() for sentence in sentences
    ]

    if freeling_modules is None:
        if _state_queue is not None:
            _state_queue.put(
                ProcessState(_id_process, os.getpid(), "Loading Freeling...",
                             " - "))
        morfo, tagger, sen, wsd = init_freeling()
    else:
        morfo, tagger, sen, wsd = freeling_modules

    _disambiguation_state(_state_queue, _id_process)
    # perform morphosyntactic analysis and disambiguation
    processed_sentences = morfo.analyze(freeling_sentences)
    processed_sentences = tagger.analyze(processed_sentences)
    # annotate and disambiguate senses
    processed_sentences = sen.analyze(processed_sentences)
    processed_sentences = wsd.analyze(processed_sentences)

    # Copy freeling results into our Words
    for s in range(len(sentences)):
        sentence = sentences[s]

        if not len(sentence.words) == len(processed_sentences[s]):
            print("/!\\ Warning, sentence offset error in synset_process /!\\")
            print(sentence.sentence_str())
            print([w.get_form() for w in processed_sentences[s]])

        for w in range(len(sentence.words)):
            word = sentence.words[w]
            rank = processed_sentences[s][w].get_senses()
            if len(rank) > 0:
                if not rank[0][0][0] == '8':
                    # ignore synsets offsets 8.......-.
                    # they are odd synsets that WordNet can't find...
                    word.synset = Synset(None, word.id_word, rank[0][0],
                                         wn.of2ss(rank[0][0]).name(), None,
                                         None, None)
                if print_synsets:
                    print("Word : " + word.word)
                    print("Synset code : " + rank[0][0])
                    print("Synset name : " + wn.of2ss(rank[0][0]).name())

    # Add synsets to database

    conn = sql.connect(DB_PATH, timeout=DB_TIMEOUT)
    c = conn.cursor()

    sentence_count = 0
    total_sentence = len(sentences)
    for sentence in sentences:
        # Print state
        sentence_count += 1
        _commit_state(_state_queue, _id_process, sentence_count,
                      total_sentence)

        for word in sentence.words:
            synset = word.synset

            if synset is not None:
                # Add synset

                safe_execute(
                    c,
                    "INSERT INTO Synset (ID_Word, Synset_Code, Synset_Name) "
                    "VALUES (?, ?, ?)",
                    0,
                    _state_queue,
                    _id_process,
                    mark_args=(word.id_word, synset.synset_code,
                               synset.synset_name))

                # Get back id of last inserted review
                safe_execute(c, "SELECT last_insert_rowid()", 0, _state_queue,
                             _id_process)
                id_synset = c.fetchone()[0]

                # Update Word table
                safe_execute(
                    c, "UPDATE Word SET ID_Synset = " + str(id_synset) +
                    " WHERE ID_Word = " + str(word.id_word), 0, _state_queue,
                    _id_process)

    safe_commit(conn, 0, _state_queue, _id_process)

    conn.close()

Example #17

0

Show file

def test_dog_is_cat():
    dog = Label('02099601-n', 'golden retriever', '')
    cat = wordnet.of2ss('02123045-n')

    assert not dog.is_a(cat)

Example #18

0

Show file

from operator import itemgetter

import nltk
from nltk.corpus import wordnet as wn

import IO

nltk.data.path.append(IO.data_source_dir+"/nltk_data")

imagenet_classes = IO.read_imagenet_wnid_words_file()
imagenet_labels = list(imagenet_classes.values())

imagenet_synsets = []
for wnid, label in imagenet_classes.items():
    offset = wnid.split('n')[-1]
    synset = wn.of2ss(offset + 'n')
    imagenet_synsets.append(synset)

p_tallies = IO.read_pixabay_tally_file(hit_limit=0, top3=True)

p_metadata = IO.read_pixabay_metadata_file()

# How many images have 3, 2, 1, and 0 labels from ImageNet?
# How many images have 3, 2, 1, and 0 words from WordNet?

id_tags_dict = {ii: meta['top3'] for ii, meta in p_metadata.items()}

num_images_with_tags_in_imagenet = {0: 0, 1: 0, 2: 0, 3: 0}
for ii, tags in id_tags_dict.items():
    jj = 0
    for tag in tags:

Example #19

0

Show file

File: imagenet.py Project: gerryfletch/classification_service

 def __init__(self, id: str, name: str, uri: str):
     self.id = id
     self.syn = wordnet.of2ss(id)
     self.name = name
     self.uri = uri

Example #20

0

Show file

File: find_id.py Project: wannaphong/IsanNLP

# -*- coding: utf-8 -*-
"""
Get definition and examples from WordNet ID
"""
from nltk.corpus import wordnet as wn
word = input("WordNet ID : ")
word_wn = wn.of2ss(word.replace('-', ''))
print(word_wn.definition())
print(word_wn.examples())
print("WordNet ID : " + wn.ss2of(word_wn))

Example #21

0

Show file

            result += x
    return result




exclude_ss = []

exclude_hypos_of = [
    '01326291-n',     # microorganism
    '07992450-n'      # taxonomic group
]

for synset in exclude_hypos_of:

    ss_set = extracthypos(wn.of2ss(synset))
    for ss in ss_set:
        exclude_ss.append(ss)

pwn = open('pwn_data.py', 'w+')
pwn.write("from collections import defaultdict as dd\n")
pwn.write("pwn = dd(lambda: dd())\n")

for ss in wn.all_synsets():

    if ss not in exclude_ss:
    
        pos = ss.pos()
        if pos == 's':
            pos = 'a'

Example #22

0

Show file

File: utils.py Project: nicoperetti/Tesis

def wnid2synset(wnids):
    _wnid2synset = lambda id: wn.of2ss(id[1:] + id[0])
    if isinstance(wnids, (tuple, list)):
        return [_wnid2synset(id) for id in wnids]
    return _wnid2synset(wnids)

Example #23

0

Show file

def get_synset(imagenet_synset_id):
    return wordnet.of2ss(imagenet_synset_id[1:] + 'n')

Example #24

0

Show file

File: task_half_trans.py Project: zhanshichen/git_l

        tag = tag.next_sibling.next_sibling

    #用转换英文单词的方式构造pa_child 和 parent
    #create pa_child and parent list (searching index in english-version wordnet and translating into corresponding Chinese words)
    for id in pa_child:  #每个存在中文的单词的id 寻找父节点与子节点
        b = id.split('-')
        english_id = b[2] + b[3]
        #15028818n 格式

        #生成parent字典
        parent[id] = []
        for name in pa_child[id]:  #名字压进去
            parent[id].append(name)

        try:
            english_name = wn.of2ss(
                english_id)  #english_name 格式 ： Synset('isoagglutinin.n.01')
        except:
            continue  #这个节点的两个list均为空  发生某个中文id没有对应英文id的情况，但是中文id有对应的单词
        else:

            #先构造pa_child 字典
            children_names = english_name.hyponyms()
            if children_names:  #有子节点
                for child_name in children_names:
                    child_id = str(
                        child_name.offset()).zfill(8) + '-' + child_name.pos()
                    chinese_child_id = 'cmn-10-' + child_id
                    if chinese_child_id in pa_child.keys():
                        for name in pa_child[id]:
                            pa_child[id][name].append(chinese_child_id)

Example #25

0

Show file

File: script_synonym.py Project: dss2016eu/codefest

	
	#keep paragraphs
	parag_act = wf.attrib['para']
	if parag_act != parag_ant:
		fo.write(bcolors.JUMP)
	
	#search words that have a synset
	expr="//term/span/target[@id='"+wf.attrib['id']+"']"
	term=tree.find(expr).getparent().getparent()
	wordsense = term.find("./externalReferences/externalRef")
	
	#if it has a sense
	if wordsense is not None:		
		ref = wordsense.attrib['reference']
		ref = ref.replace('ili-30-','')
		syn = wordnet.of2ss(ref)
		
		try:
			#find its antonym
			lema1 = syn.lemmas(lang='eng')[0]
			lemma2 = lema1.antonyms()[0]
			#get the lemma in the desired language
			synAnt = lemma2.synset()
			lemma = synAnt.lemma_names(lang1)[0]
			fo.write(colors[lang1] + lemma.encode('utf8') +  bcolors.ENDC + " ")

		except:
			#if something goes wrong, write the original word
			fo.write(wf.text.encode('utf8') + " ")
				
	else:

Example #26

0

Show file

File: build_distance_matrix.py Project: dariogarcia/tiramisu

  distances_wup.append(sim3)
  distances_res.append(sim4)
  distances_jcn.append(sim5)
  distances_lin.append(sim6)
  distances_res_bnc.append(sim7)
  distances_jcn_bnc.append(sim8)
  distances_lin_bnc.append(sim9)

#Import IC calculation
from nltk.corpus import wordnet_ic
brown_ic = wordnet_ic.ic('ic-brown-resnik-add1.dat')
bnc_ic = wordnet_ic.ic('ic-bnc-resnik-add1.dat')

#For each pair of synsets, compute distance
for s1 in synsets:
  syn1 = wn.of2ss(s1)
  for s2 in synsets:
    syn2 = wn.of2ss(s2)
    distances_path[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1 - wn.path_similarity(syn1,syn2)
    distances_lch[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1 - wn.lch_similarity(syn1,syn2)
    distances_wup[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1 - wn.wup_similarity(syn1,syn2)
    distances_res[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1 - wn.res_similarity(syn1,syn2,brown_ic)
    distances_jcn[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1 - wn.jcn_similarity(syn1,syn2,brown_ic)
    distances_lin[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1 - wn.lin_similarity(syn1,syn2,brown_ic)
    distances_res_bnc[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1 - wn.res_similarity(syn1,syn2,bnc_ic)
    distances_jcn_bnc[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1 - wn.jcn_similarity(syn1,syn2,bnc_ic)
    distances_lin_bnc[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1 - wn.lin_similarity(syn1,syn2,bnc_ic)
    #distances_path[labelsNLTK.index(s1)][labelsNLTK.index(s2)] =1/(labelsNLTK.index(s2)+1) 
    #distances_lch[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1/(labelsNLTK.index(s2)+1)
    #distances_wup[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1/(labelsNLTK.index(s2)+1)  
    #distances_res[labelsNLTK.index(s1)][labelsNLTK.index(s2)] = 1/(labelsNLTK.index(s2)+1)