Example #1
0
    def existsItem(self, text):
        # print(str(time.clock()), "existsItem(self, text) - INIZIO")
        text = text.lower()
        text = self.normalizeInput(text)
        
        if text in self.enamdict:
            return True
        
        katakanaText = romkan.hiragana_to_katakana(text)

        query = """
                select 1
                  from edict_lemmas l
                 where lemma = '{0}' 
                 union all
                select 1
                  from kotobank_lemmas kl
                 where lemmatitle = '{0}' 
                 union all
                 select 1
                  from kotobank_lemmas kl
                 where lemmasubtitle = '{1}'
                """.format(text.replace("'", "\'"), katakanaText.replace("'", "\'"))

        output = len(self.connection.execute(query).fetchall()) > 0
        # print(str(time.clock()), "existsItem(self, text) - FINE")
        return output
Example #2
0
    def getTranslation(self, text):
        # print(str(time.clock()), "getTranslation(self, text) - INIZIO")
        # if text in self._translationsCache:
            # return self._translationsCache[text]

        if text.strip() == "":
            return None
            
        output = []
        
        text = text.lower()
                 
        text = self.normalizeInput(text)
        
        katakanaText = romkan.hiragana_to_katakana(text)

        query = """
                select ifnull((select group_concat(acc.base_form, '/')
                                 from pitch_accents acc
                                 where acc.kanji = l.uninflectedLemma), '') || ' ' || a.content,
                       1 as o
                  from edict_lemmas l
                  join edict_articles a on a.id = l.articleId
                 where l.lemma = '{text}' 
                union
                select replace(
                        '<b>' || kl.lemmatitle || ' - ' || kl.lemmasubtitle 
                              || ' (' || ifnull((select group_concat(acc.base_form, '/')
                                                        from pitch_accents acc
                                                        where acc.kanji = kl.lemmatitle), '') 
                              || ')</b><br/>' || ka.content,
                        '()',
                        ''),
                       2 as o
                  from kotobank_lemmas kl
                  join kotobank_rel_lemma_article rel on kl.id = rel.lemmaid
                  join kotobank_articles ka on ka.id = rel.articleId
                 where (kl.lemmatitle = '{text}' 
                            or kl.lemmasubtitle = '{katakanaText}'
                            or kl.lemmatitle in (select el.uninflectedlemma from edict_lemmas el where el.lemma = '{text}')
                            or kl.lemmasubtitle in (select el.uninflectedlemma from edict_lemmas el where el.lemma = '{katakanaText}')
                        ) and ka.dictionary = 'デジタル大辞泉の解説'
                 order by o
                """.format(text = text.replace("'", "\'"), katakanaText = katakanaText.replace("'", "\'"))

        for entry in self.connection.execute(query).fetchall():
            entry = entry[0]
            entry = entry.replace("</a>", "") \
                         .replace("</img>", "") \
                         .replace("</spellout>", "") \
                         .replace("</section>", "")
            entry = re.sub("<(a|img|section|spellout) .*?>", "", entry) #remove links
            # entry = re.sub("<img .*?>", "", entry) #remove images
                        
            output.append(entry)

        # self._translationsCache[text] = output
        
        # print(str(time.clock()), "getTranslation(self, text) - FINE")
        
        if text in self.enamdict:
            output += self.enamdict[text]
        
        if output == []:
            return None
        else:
            return output