Python WordNetCorpusReader.all_synsets 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: nltk.corpus

클래스/타입: WordNetCorpusReader

메소드/함수: all_synsets

hotexamples.com에서의 예제들: 5

Python WordNetCorpusReader.all_synsets - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 nltk.corpus.WordNetCorpusReader.all_synsets에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

synsets(9)

WordNetCorpusReader(8)

all_synsets(3)

lemma(2)

synset(2)

synset_from_pos_and_offset(1)

예제 #1

파일 보기

파일: en_dataset_creation.py 프로젝트: BinLiang-NLP/diachronic-wordnets

def main():
    # python en_dataset_creation.py ../../datasets/WNs ../../datasets/en/ 2.0 3.0

    if len(sys.argv) < 3:
        raise Exception(
            "The following arguments are required:<WordNet path> <output_path> <old_version_float> <new_version_float>"
        )

    path = sys.argv[1]
    out_path = sys.argv[2]
    old_version = sys.argv[3]

    if len(sys.argv) == 5:
        new_version = sys.argv[4]
    else:
        new_version = "3.0"

    wn2 = WordNetCorpusReader(os.path.join(path, 'WN' + old_version), None)
    wn3 = WordNetCorpusReader(os.path.join(path, 'WN' + new_version), None)

    for pos in ['nouns', 'verbs']:
        synsets_2n = set(wn2.all_synsets(pos[0]))
        synsets_3n = set(wn3.all_synsets(pos[0]))

        reference_nouns = synsets_3n.intersection(synsets_2n)
        new = extract_new_lemmas(synsets_3n.difference(synsets_2n), wn2,
                                 pos[0])
        hypernyms = generate_gold(new, wn3, reference_nouns, pos[0])

        print(f"Len {pos} {len(hypernyms)}")
        save(dict(hypernyms), out_path,
             f"{pos}_en.{old_version}-{new_version}.tsv")

예제 #2

파일 보기

class TestTransform(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.languages = ["cat", "eng", "eus", "glg", "spa"]
        cls.wn_names = {}
        for lang in cls.languages:
            cls.wn_names[lang] = '.wordnet_' + lang
            with tarfile.open('wordnet_' + lang + '.tar.gz') as f:
                f.extractall(cls.wn_names[lang])

    def test_all_synsets(self):
        self.wncr = WordNetCorpusReader(self.wn_names['spa'], None)
        for synset in self.wncr.all_synsets():
            a = synset
        # success if there is no error
        # This will also test that all synsets in data files are in index files.

    def test_invalid_literal_for_int_16(self):
        self.wncr = WordNetCorpusReader(self.wn_names['spa'], None)
        for synset in self.wncr.synsets("agudeza"):
            a = synset


#        self.wncr._synset_from_pos_and_line('n',
#                                            "04122387 00 n 0a agudeza 0 broma 0 chiste 0 chufleta 0 comentario_burlón 0 cuchufleta 0 idea 0 ocurrencia 0 pulla 0 salida 0 04 @ 04120601 n 0000 + 00620096 v 0000 + 00499330 v 0000 + 00558467 v 0000 | comentario ingenioso para hacer reír  \n")
#        # success if there is no error

    def test_key_error(self):
        self.wncr = WordNetCorpusReader(self.wn_names['spa'], None)
        self.wncr.lemma("menor.a.09.menor").antonyms()
        # success if there is no error

    def test_load_wordnet(self):
        for lang in self.languages:
            self.wncr = WordNetCorpusReader(self.wn_names[lang], None)
            # success if there is no error

    @classmethod
    def tearDownClass(cls):
        for lang in cls.languages:
            shutil.rmtree(cls.wn_names[lang])

예제 #3

파일 보기

파일: test_transform.py 프로젝트: bryant1410/wn-mcr-transform

class TestTransform(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.languages = ["cat", "eng", "eus", "glg", "spa"]
        cls.wn_names = {}
        for lang in cls.languages:
            cls.wn_names[lang] = '.wordnet_' + lang
            with tarfile.open('wordnet_' + lang + '.tar.gz') as f:
                    f.extractall(cls.wn_names[lang])

    def test_all_synsets(self):
        self.wncr = WordNetCorpusReader(self.wn_names['spa'], None)
        for synset in self.wncr.all_synsets():
            a = synset
        # success if there is no error
        # This will also test that all synsets in data files are in index files.

    def test_invalid_literal_for_int_16(self):
        self.wncr = WordNetCorpusReader(self.wn_names['spa'], None)
        for synset in self.wncr.synsets("agudeza"):
            a = synset
#        self.wncr._synset_from_pos_and_line('n',
#                                            "04122387 00 n 0a agudeza 0 broma 0 chiste 0 chufleta 0 comentario_burlón 0 cuchufleta 0 idea 0 ocurrencia 0 pulla 0 salida 0 04 @ 04120601 n 0000 + 00620096 v 0000 + 00499330 v 0000 + 00558467 v 0000 | comentario ingenioso para hacer reír  \n")
#        # success if there is no error

    def test_key_error(self):
        self.wncr = WordNetCorpusReader(self.wn_names['spa'], None)
        self.wncr.lemma("menor.a.09.menor").antonyms()
        # success if there is no error

    def test_load_wordnet(self):
        for lang in self.languages:
            self.wncr = WordNetCorpusReader(self.wn_names[lang], None)
            # success if there is no error

    @classmethod
    def tearDownClass(cls):
        for lang in cls.languages:
            shutil.rmtree(cls.wn_names[lang])

예제 #4

파일 보기

파일: listwords.py 프로젝트: stephenroller/class-nlp-project

#!/usr/bin/env python

import sys
from nltk.corpus import WordNetCorpusReader

dict_dir = sys.argv[1]

wn = WordNetCorpusReader(dict_dir)

for synset in wn.all_synsets():
    for lem in synset.lemmas:
        print lem.name, synset.lexname

예제 #5

파일 보기

    # with open('gloss17_idfs.pickle', 'wb') as f:
    #     pickle.dump(dic, f)

    with open('gloss_idfs.pickle', 'rb') as f:
        dic = pickle.load(f)
    print(dic['to'])
    with open('gloss17_idfs.pickle', 'rb') as f:
        idfs = pickle.load(f)

    model = word2vec.Word2Vec.load_word2vec_format(
        "../word2vec/models/GoogleNews-vectors-negative300.bin", binary=True)

    vec_dict = {}
    index = 0

    for synset in WN17.all_synsets():
        vec = 0
        gloss = synset.definition()
        gloss = gloss.replace(";", "").replace("(", "").replace(
            ")", "").replace(":", "").replace('"', "").replace("'",
                                                               "").lower()

        gloss_words = gloss.split(" ")
        for gw in gloss_words:
            if gw in model.wv.vocab:
                if vec is 0:
                    vec = idfs[gw] * model[gw]
                else:
                    vec += idfs[gw] * model[gw]

        vec_dict[synset.name()] = vec