Esempio n. 1
0
 def hyponyms(self, synsetid, lang='eng', deep_select=True, ctx=None):
     synsetid = self.ensure_sid(synsetid)
     synsetids = ctx.synlink.select("synset1=? and link='hypo'", (synsetid,), columns=('synset2',))
     if deep_select:
         return self.get_synsets(synsetids=(x.synset2 for x in synsetids), lang=lang, ctx=ctx)
     else:
         return [Synset(sid) for sid in synsetids]
Esempio n. 2
0
 def search(self,
            lemma,
            pos=None,
            deep_select=True,
            ignore_case=True,
            synsets=None,
            ctx=None,
            **kwargs):
     like_phrase = ' LIKE ? '
     if '%' in lemma or '_' in lemma:
         like_phrase = " LIKE ? ESCAPE '@'"
         lemma = escape_like(lemma)
     if ignore_case:
         query = [
             'ID IN (SELECT sid FROM term WHERE lower(term) {})'.format(
                 like_phrase)
         ]
         params = [lemma.lower()]
     else:
         query = [
             'ID IN (SELECT sid FROM term WHERE term {})'.format(
                 like_phrase)
         ]
         params = [lemma]
     if pos:
         query.append('pos = ?')
         params.append(pos)
     # query synsetids
     results = ctx.synset.select(' AND '.join(query),
                                 params,
                                 columns=('ID', ))
     if deep_select:
         return self.results_to_synsets(results, ctx=ctx, synsets=synsets)
     else:
         return SynsetCollection(synsets=(Synset(x.ID) for x in results))
Esempio n. 3
0
 def get_synset(self, synsetid, lang='eng', ctx=None):
     synsetid = self.ensure_sid(synsetid)
     res = ctx.synset.by_id(synsetid)
     synset = Synset(res.synset, lang=lang)
     # select lemma
     words = ctx.word.select('wordid in (SELECT wordid FROM sense WHERE synset=?) and lang=?', (synsetid, lang))
     synset.lemmas.extend((w.lemma for w in words))
     # select defs
     def_rows = ctx.sdef.select("synset=? AND lang=?", (synsetid, lang))
     for row in def_rows:
         synset.definitions.append(row._2)
     # examples
     exes = ctx.sex.select('synset=? and lang=?', (synsetid, lang))
     synset.examples.extend([e._2 for e in exes])
     return synset
Esempio n. 4
0
 def get_synsets_by_lemma(self, lemma):
     with Execution(self.schema) as exe:
         # get synset object
         rows = exe.schema.wss.select(where='lemma=?', values=(lemma,))
         synsets = SynsetCollection()
         if rows is not None and len(rows) > 0:
             for row in rows:
                 ss = Synset(row.synsetid)
                 ss.definition = row.definition
                 ss.add_lemma(row.lemma)
                 ss.add_key(row.sensekey)
                 ss.tagcount = row.tagcount
                 # add examples
                 exes = exe.schema.ex.select(where='synsetid=?', values=[row.synsetid], orderby='sampleid')
                 for ex in exes:
                     ss.exes.append(ex.sample)
                 synsets.add(ss)
         return synsets
Esempio n. 5
0
 def get_synset_by_sk(self, sk):
     with Execution(self.schema) as exe:
         # get synset object
         rows = exe.schema.wss.select(where='sensekey=?', values=(sk,))
         if rows is not None and len(rows) > 0:
             ss = Synset(rows[0].synsetid)
             ss.definition = rows[0].definition
             for row in rows:
                 ss.add_lemma(row.lemma)
                 ss.add_key(row.sensekey)
                 ss.tagcount += row.tagcount
             # add examples
             exes = exe.schema.ex.select(where='synsetid=?', values=[rows[0].synsetid], orderby='sampleid')
             for ex in exes:
                 ss.exes.append(ex.sample)
             return ss
Esempio n. 6
0
 def all_senses(self):
     if WordnetSQL.sense_map_cache:
         return WordnetSQL.sense_map_cache
     _query = """SELECT lemma, pos, synsetid, sensekey, definition, tagcount
                             FROM wordsXsensesXsynsets ORDER BY lemma, pos, tagcount DESC;"""
     conn = self.get_conn()
     c = conn.cursor()
     result = c.execute(_query).fetchall()
     # Build lemma map
     lemma_map = {}
     for (lemma, pos, synsetid, sensekey, definition, tagcount) in result:
         sinfo = Synset(synsetid, tagcount=tagcount, lemma=lemma)
         # add to map
         if lemma not in lemma_map:
             lemma_map[lemma] = []
         lemma_map[lemma].append(sinfo)
     # close connection & return results
     conn.close()
     WordnetSQL.sense_map_cache = lemma_map
     return lemma_map
Esempio n. 7
0
    def search_senses(self, lemma_list, pos=None, a_conn=None):
        if len(lemma_list) == 0:
            return list()

        CACHE_JOIN_TOKEN = '|\t'*12
        cache_key=CACHE_JOIN_TOKEN.join(lemma_list)
        # caching method
        if cache_key in WordnetSQL.lemma_list_cache:
            return WordnetSQL.lemma_list_cache[cache_key]

        # Build query lemma, pos, synsetid, sensekey, definition, tagcount
        _query = """SELECT lemma, pos, synsetid, sensekey, definition, tagcount 
                                FROM wordsXsensesXsynsets
                                WHERE (%s) """ % 'or '.join(["lemma=?"] * len(lemma_list))
        _args = list(lemma_list)
        if pos:
            _query += " and pos = ?";
            _args.append(pos)
        
        # Query
        if a_conn:
            conn = a_conn
        else:
            conn = self.get_conn()
        c = conn.cursor()
        result = c.execute(_query, _args).fetchall()

        # Build results
        senses = []
        for (lemma, pos, synsetid, sensekey, definition, tagcount) in result:
            senses.append(Synset(synsetid, tagcount=tagcount, lemma=lemma))
        if not a_conn:
            conn.close()

        # store to cache
        WordnetSQL.lemma_list_cache[cache_key] = senses
        return senses
Esempio n. 8
0
 def get_synset(self, synsetid, ctx=None, **kwargs):
     sid = self.ensure_sid(synsetid)
     # get synset object
     synset_info = ctx.ss.by_id(sid)
     if synset_info is None:
         return None
     else:
         ss = Synset(synset_info.synsetid)
         ss.definition = synset_info.definition
         # add lemmas, sensekeys and tag count
         rows = ctx.wordsense.select('synsetid=?', (sid, ),
                                     columns=('lemma', 'sensekey',
                                              'tagcount'))
         for row in rows:
             ss.add_lemma(row.lemma)
             ss.add_key(row.sensekey)
             ss.tagcount += row.tagcount
         # add examples
         exes = ctx.ex.select(where='synsetid=?',
                              values=[sid],
                              orderby='sampleid')
         for ex in exes:
             ss.examples.append(ex.sample)
         return ss