def test_save_load(self): def validate(obj): s1 = self.dictionary.serialize() s2 = obj.serialize() for key in s1.keys(): if isinstance(s1[key], np.ndarray): np.array_equal(s1[key], s2[key]) else: eq_(s1[key], s2[key]) validate(Dictionary.load(self.dictionary.serialize())) with NamedTemporaryFile() as f: self.dictionary.save(f.name) validate(Dictionary.load(f.name))
def setUp(self): tokenizer = get_tokenizer('regexp') self.dictionary = Dictionary.build(get_dump_db(), tokenizer=tokenizer, lowercase=True, min_word_count=2, min_entity_count=1, min_paragraph_len=5, category=True, disambi=False, pool_size=1, chunk_size=1, progressbar=False)
def setUp(self): self.dictionary = Dictionary.build(get_dump_db(), None, lowercase=True, min_word_count=2, min_entity_count=1, pool_size=1, chunk_size=1, min_paragraph_len=5, category=True, progressbar=False) self.link_graph = LinkGraph.build(get_dump_db(), self.dictionary, pool_size=1, chunk_size=1, progressbar=False)
def setUp(self): self.phrase_dic = PhraseDictionary.build(get_dump_db(), min_link_count=0, min_link_prob=0.1, lowercase=True, max_phrase_len=3, pool_size=1, chunk_size=1, progressbar=False) self.dictionary = Dictionary.build(get_dump_db(), phrase_dict=self.phrase_dic, lowercase=True, min_word_count=2, min_entity_count=1, min_paragraph_len=5, category=True, pool_size=1, chunk_size=1, progressbar=False)
def __init__(self): self.dic = Dictionary.load(WIKI_DICTIONARY_PATH) self.db = MentionDB.load(WIKI_MENTION_DB_PATH, self.dic)
# -*- coding: utf-8 -*- import sys from wikipedia2vec.dictionary import Dictionary from wikipedia2vec.mention_db import MentionDB from wikipedia2vec.utils.tokenizer.mecab_tokenizer import MeCabTokenizer dic = Dictionary.load(sys.argv[2]) db = MentionDB.load(sys.argv[3], dic) with open(sys.argv[1]) as f: text = f.read() tokenizer = MeCabTokenizer() tokens = tokenizer.tokenize(text) for mention in db.detect_mentions(text, tokens): print(mention)
def __init__(self, lang, dic, mention_db): self.lang = lang self.dic = Dictionary.load(dic) self.mention_db = MentionDB.load(mention_db, self.dic)