Beispiel #1
0
 def __init__(self):
     # star dict part
     self.ifo_file = "dict/powerword2007_pwqec.ifo"
     self.idx_file = "dict/powerword2007_pwqec.idx"
     self.dict_file = "dict/powerword2007_pwqec.dict.dz"
     self.ifo_reader = IfoFileReader(self.ifo_file)
     self.idx_reader = IdxFileReader(self.idx_file)
     # TODO: This dict is too big to be entirely loaded (about 110M)
     # which force me to use F2 frontend instance
     # need to find a way to reduce its usage
     # !AND! speed up the lauch time!
     
     # TODO: this reader is not thread-safe, need to create
     # multiple instances, one for each thread
     self.dict_reader = DictFileReader(self.dict_file, 
                                         self.ifo_reader, 
                                         self.idx_reader, 
                                         True)
     # qq translation
     self.httpServ = httplib.HTTPConnection("dict.qq.com", 80, timeout=10)
     self.httpServ.connect()
     pass
Beispiel #2
0
class Translator(object):
    def __init__(self):
        # star dict part
        self.ifo_file = "dict/powerword2007_pwqec.ifo"
        self.idx_file = "dict/powerword2007_pwqec.idx"
        self.dict_file = "dict/powerword2007_pwqec.dict.dz"
        self.ifo_reader = IfoFileReader(self.ifo_file)
        self.idx_reader = IdxFileReader(self.idx_file)
        # TODO: This dict is too big to be entirely loaded (about 110M)
        # which force me to use F2 frontend instance
        # need to find a way to reduce its usage
        # !AND! speed up the lauch time!
        
        # TODO: this reader is not thread-safe, need to create
        # multiple instances, one for each thread
        self.dict_reader = DictFileReader(self.dict_file, 
                                            self.ifo_reader, 
                                            self.idx_reader, 
                                            True)
        # qq translation
        self.httpServ = httplib.HTTPConnection("dict.qq.com", 80, timeout=10)
        self.httpServ.connect()
        pass
    
    def run(self, wordlist):
        for wordElement in wordlist:
            wordElement["meaning"] = self.getMeaning(wordElement["name"])
        return wordlist
    
    def getMeaning(self, wordName):
        meaning = self.getTranslationFromDB(wordName)
        if meaning: return meaning
        meaning = self.getTranslationFromStarDict(wordName)
        if meaning: return meaning
        meaning = self.getTranslationFromQQ(wordName)
        if meaning: return meaning
        meaning = "unknown"
        self.storeTranslationToDB(wordName, meaning, None)
        return meaning
    
    def storeTranslationToDB(self, wordName, meaning, response):
        wordRecord = Word(key_name=wordName, 
                            translation = meaning,
                            origine = repr(response))
        wordRecord.put()
    
    def getTranslationFromDB(self, wordName):
        wordKey = db.Key.from_path("Word", wordName)
        wordRecord = db.get(wordKey)
        if wordRecord:
            return wordRecord.translation
        else:
            return None

    def getTranslationFromStarDict(self, wordName):
        raw = self.dict_reader.get_dict_by_word(wordName)
        if raw:
            xmldoc = minidom.parseString(raw[0]["k"])
            categories = xmldoc.getElementsByTagName(u"单词词性")
            meanings = xmldoc.getElementsByTagName(u"解释项")
            commonLen = min(len(categories), len(meanings))
            translation = "; ".join([
                        " ".join([categories[i].firstChild.wholeText,
                                    meanings[i].firstChild.wholeText])
                        for i in range(commonLen)
                        ])
        else:
            translation = None
        return translation
        pass

    def getTranslationFromQQ(self, wordName):
        # TODO: coroutine optimisation
        self.httpServ.request('GET', "/dict?q=" + wordName)
        response = self.httpServ.getresponse()
        if response.status == httplib.OK:
            data = json.load(response)
            try:
                des = data["local"][0]["des"]
                ds = [" ".join([value for key, value in ele.iteritems()]) 
                        for ele in des]
                meaning = "; ".join(ds)
                self.storeTranslationToDB(wordName, meaning, response)
                return meaning
            except KeyError:
                pass
            pass
        return None
        pass