def get_test_gwn(db_path=TEST_DB): db = GWNSQL(db_path) if not os.path.isfile(db_path) or os.path.getsize(db_path) == 0: # insert dummy synsets xmlwn = GWordnetXML() xmlwn.read(MOCKUP_SYNSETS_DATA) db.insert_synsets(xmlwn.synsets) return db
def test_setup_insert_stuff(self): db = GWNSQL(":memory:") xmlwn = GWordnetXML() xmlwn.read(MOCKUP_SYNSETS_DATA) with db.ctx() as ctx: synsets = list(xmlwn.synsets) db.insert_synset(synsets[0], ctx=ctx) db.insert_synsets(synsets[1:3], ctx=ctx) # test select stuff out ssids = ctx.synset.select() self.assertEqual(len(ssids), 3) # all tags tags = db.tagged_sensekeys(ctx=ctx) self.assertEqual(tags, {'not%4:02:00::', 'be_born%2:30:00::', 'christian_era%1:28:00::', 'christ%1:18:00::', 'date%1:28:04::', 'musical_accompaniment%1:10:00::', 'a_cappella%4:02:00::', 'ad%4:02:00::', 'ce%4:02:00::'}) # all sensekeys sks = ctx.sensekey.select() self.assertEqual(len(sks), 7)
def test_all_api(self): gwn = GWNSQL(':memory:') with gwn.ctx() as ctx: setup_ram_gwn(gwn, ctx) self.assertRaises(WordnetException, lambda: gwn.get_synset('00001740-n', ctx=ctx)) ssids = ctx.synset.select(columns=('ID',)) self.assertEqual(len(ssids), 219) # test get_synset() and get_synsets() r00008007 = gwn.get_synset('00008007-r', ctx=ctx) self.assertTrue(r00008007) self.assertTrue(r00008007.definition) self.assertTrue(r00008007.examples) self.assertTrue(r00008007.get_aux()) for ss in gwn.get_synsets(('a01179767', 'n03095965', 'r00001837'), ctx=ctx): self.assertTrue(ss.ID) self.assertTrue(ss.definition) self.assertTrue(ss.keys) # test get by key r00008007 = gwn.get_by_key('wholly%4:02:00::', ctx=ctx) self.assertTrue(r00008007) self.assertTrue(r00008007.definition) self.assertTrue(r00008007.examples) self.assertTrue(r00008007.get_aux()) # test get_by_keys synsets = gwn.get_by_keys(('divine%3:00:02:heavenly:00', 'wholly%4:02:00::'), ctx=ctx) for ss in synsets: self.assertTrue(ss.definition) self.assertTrue(ss.keys) self.assertTrue(ss.examples) # test sk2sid self.assertEqual(gwn.sk2sid('wholly%4:02:00::', ctx=ctx), 'r00008007') # test search lemma = 'automatically' synsets = gwn.search(lemma=lemma, ctx=ctx) self.assertTrue(synsets) for ss in synsets: self.assertTrue(ss.keys) self.assertTrue(ss.definition) self.assertIn(lemma, ss.lemmas) # limit by POS self.assertFalse(gwn.search(lemma=lemma, pos='v', ctx=ctx)) # hypernyms, hyponyms, hypehypo are not supported self.assertRaises(WordnetException, lambda: gwn.hypernyms('r00008007', ctx=ctx)) self.assertRaises(WordnetException, lambda: gwn.hyponyms('r00008007', ctx=ctx)) self.assertRaises(WordnetException, lambda: gwn.hypehypo('r00008007', ctx=ctx))
def test_match_surface(self): fixed = CSV.read("data/fixed_surface.tab") raws_map = {x[0]: x[1:] for x in fixed if x} gwn = GWNSQL(YLConfig.GWN30_DB) sid = 'v02681795' ss = gwn.get_synset(sid) raws = raws_map[sid] if sid in raws_map else ss.get_orig().split() print("raws: {}".format(raws)) print("glosses: {}".format([(x.text(), x.cat) for x in ss.glosses])) for r, g in zip(raws, ss.glosses): tokens = [t.text for t in g] while tokens[-1] == ';': tokens.pop() sent = ttl.Sentence(r) sent.import_tokens(tokens) print("{} --- {}".format(r, tokens)) self.assertTrue(ss.match_surface(raws=raws)) for g in ss.glosses: print(g.items, g.surface)
def test_single_match(self): gwn = GWNSQL(YLConfig.GWN30_DB) ss = gwn.get_synset('r00008007') raws = ss.get_orig().split() d = ss.get_def() for idx, r in enumerate(raws): sent = ttl.Sentence(r) try: tokens = [i.text for i in d.items] sent.import_tokens(tokens) # found the def raw if "(" in r: new_part = r.replace("(", ";(").split(";") raws[idx] = new_part[0] for loc, part in enumerate(new_part[1:]): raws.insert(idx + loc + 1, part) break except: continue print("Before:", ss.get_orig().split()) print("After:", raws)
def test_setup_insert_stuff(self): if os.path.isfile(TEST_DB_SETUP): os.unlink(TEST_DB_SETUP) db = GWNSQL(TEST_DB_SETUP) xmlwn = GWordnetXML() xmlwn.read(MOCKUP_SYNSETS_DATA) db.insert_synset(xmlwn.synsets[0]) db.insert_synsets(xmlwn.synsets[1:3]) self.assertIsNotNone(db) # test select stuff out ss = db.all_synsets() self.assertEqual(len(ss), 3) # all tags tags = db.get_all_sensekeys_tagged() self.assertEqual( tags, { 'not%4:02:00::', 'be_born%2:30:00::', 'christian_era%1:28:00::', 'christ%1:18:00::', 'date%1:28:04::', 'musical_accompaniment%1:10:00::', 'a_cappella%4:02:00::', 'ad%4:02:00::', 'ce%4:02:00::' }) # all sensekeys sks = db.get_all_sensekeys() self.assertEqual(len(sks), 7)
def test_shallow_search(self): gwn = GWNSQL(YLConfig.GWN30_DB) ss = gwn.search('dog', deep_select=False) self.assertTrue(ss)