def test_mini_1_1(): assert len(wn.lexicons()) == 4 assert len(wn.lexicons(lang='en')) == 2 assert len(wn.lexicons(lang='ja')) == 1 assert wn.lexicons(lang='ja')[0].logo == 'logo.svg' w = wn.Wordnet(lang='en') assert len(w.lexicons()) == 2 assert len(w.expanded_lexicons()) == 0 assert len(w.word('test-en-exemplify-v').lemma().tags()) == 1 w = wn.Wordnet(lang='ja') assert len(w.lexicons()) == 1 assert len(w.expanded_lexicons()) == 1 assert len(w.synsets('例え')[0].hypernyms()) == 1 assert w.synsets('例え')[0].lexfile() == 'noun.cognition' assert len(w.word('test-ja-例え-n').lemma().pronunciations()) == 1 assert w.word('test-ja-例え-n').forms()[1].id == 'test-ja-例え-n-たとえ' p = w.word('test-ja-例え-n').lemma().pronunciations()[0] assert p.value == 'tatoe' assert p.variety == 'standard' assert p.notation == 'ipa' assert p.phonemic assert p.audio == 'tatoe.wav' w = wn.Wordnet(lang='ja', expand='') assert len(w.lexicons()) == 1 assert len(w.expanded_lexicons()) == 0 assert len(w.synsets('例え')[0].hypernyms()) == 0 w = wn.Wordnet(lexicon='test-en test-en-ext') assert len(w.lexicons()) == 2 assert len(w.expanded_lexicons()) == 0 assert len(w.synsets('fire')[0].hyponyms()) == 1
def test_remove_extension(mini_lmf_1_0, mini_lmf_1_1): with tempfile.TemporaryDirectory('wn_data_1_1_trigger') as dir: old_data_dir = wn.config.data_directory wn.config.data_directory = dir wn.add(mini_lmf_1_0) wn.add(mini_lmf_1_1) assert len(wn.lexicons()) == 4 wn.remove('test-en-ext') assert len(wn.lexicons()) == 3 wn.remove('test-ja') assert len(wn.lexicons()) == 2 wn.add(mini_lmf_1_1) assert len(wn.lexicons()) == 4 wn.remove('test-en') assert {lex.id for lex in wn.lexicons()} == {'test-es', 'test-ja'} wn.config.data_directory = old_data_dir # close any open DB connections before teardown for conn in wn._db.pool.values(): conn.close()
def test_export(mini_lmf_1_0, tmp_path): tmpdir = tmp_path / 'test_export' tmpdir.mkdir() tmppath = tmpdir / 'mini_lmf_export.xml' lexicons = wn.lexicons(lexicon='test-en test-es') wn.export(lexicons, tmppath) if hasattr(ET, 'canonicalize'): # available from Python 3.8 # remove comments, indentation, etc. orig = ET.canonicalize(from_file=mini_lmf_1_0, strip_text=True) temp = ET.canonicalize(from_file=tmppath, strip_text=True) # additional transformation to help with debugging orig = orig.replace('<', '\n<') temp = temp.replace('<', '\n<') assert orig == temp
def test_lexicons_empty(): assert len(wn.lexicons()) == 0
def test_lexicons_unknown(): results = wn.lexicons(lang='unk') assert len(results) == 0 results = wn.lexicons(lexicon='test-unk') assert len(results) == 0
def test_lexicons_mini(): assert len(wn.lexicons()) == 2 assert all(isinstance(lex, wn.Lexicon) for lex in wn.lexicons()) results = wn.lexicons(lang='en') assert len(results) == 1 and results[0].language == 'en' results = wn.lexicons(lang='es') assert len(results) == 1 and results[0].language == 'es' results = wn.lexicons(lexicon='*') assert len(results) == 2 results = wn.lexicons(lexicon='*:1') assert len(results) == 2 results = wn.lexicons(lexicon='test-en') assert len(results) == 1 and results[0].language == 'en' results = wn.lexicons(lexicon='test-en:1') assert len(results) == 1 and results[0].language == 'en' results = wn.lexicons(lexicon='test-en:*') assert len(results) == 1 and results[0].language == 'en' assert wn.lexicons(lexicon='test-en')[0].specifier() == 'test-en:1' assert wn.lexicons(lexicon='test-es')[0].specifier() == 'test-es:1' assert wn.lexicons(lexicon='test-en')[0].requires() == {} assert wn.lexicons(lexicon='test-es')[0].requires() == {}
'https://opensource.org/licenses/Apache-2.0': 'Apache-2.0', 'https://www.unicode.org/license.html': 'unicode' } core = [] for l in open('wn-core-ili.tab'): core.append(l.strip()) #print(core) def link(text, url): return (f"<a href='{url}'>{text}</a>") stats = list() for l in wn.lexicons(): ### Fixme link for wordnet license incore = len( [s for s in wn.synsets(lexicon=l.id) if s.ili and (s.ili.id in core)]) synsets = len(wn.synsets(lexicon=l.id)) data = f""" <tr> <th>{l.specifier()}</th> <td>{l.language}</td> <td>{link(l.label, l.url)}</td> <td align='right'>{synsets:,d}</td> <td align='right'>{len(wn.senses(lexicon=l.id)):,d}</td> <td align='right'>{len(wn.words(lexicon=l.id)):,d}</td> <td align='right'>{incore/len(core):.1%}</td> <td>{link(licenses[l.license], l.license)}</td> </tr>""" stats.append(data)
def _lexicons(args): for lex in wn.lexicons(lang=args.lang, lexicon=args.lexicon): print('\t'.join((lex.id, lex.version, f'[{lex.language}]', lex.label)))