def __init__(self): # star dict part self.ifo_file = "dict/powerword2007_pwqec.ifo" self.idx_file = "dict/powerword2007_pwqec.idx" self.dict_file = "dict/powerword2007_pwqec.dict.dz" self.ifo_reader = IfoFileReader(self.ifo_file) self.idx_reader = IdxFileReader(self.idx_file) # TODO: This dict is too big to be entirely loaded (about 110M) # which force me to use F2 frontend instance # need to find a way to reduce its usage # !AND! speed up the lauch time! # TODO: this reader is not thread-safe, need to create # multiple instances, one for each thread self.dict_reader = DictFileReader(self.dict_file, self.ifo_reader, self.idx_reader, True) # qq translation self.httpServ = httplib.HTTPConnection("dict.qq.com", 80, timeout=10) self.httpServ.connect() pass
class Translator(object): def __init__(self): # star dict part self.ifo_file = "dict/powerword2007_pwqec.ifo" self.idx_file = "dict/powerword2007_pwqec.idx" self.dict_file = "dict/powerword2007_pwqec.dict.dz" self.ifo_reader = IfoFileReader(self.ifo_file) self.idx_reader = IdxFileReader(self.idx_file) # TODO: This dict is too big to be entirely loaded (about 110M) # which force me to use F2 frontend instance # need to find a way to reduce its usage # !AND! speed up the lauch time! # TODO: this reader is not thread-safe, need to create # multiple instances, one for each thread self.dict_reader = DictFileReader(self.dict_file, self.ifo_reader, self.idx_reader, True) # qq translation self.httpServ = httplib.HTTPConnection("dict.qq.com", 80, timeout=10) self.httpServ.connect() pass def run(self, wordlist): for wordElement in wordlist: wordElement["meaning"] = self.getMeaning(wordElement["name"]) return wordlist def getMeaning(self, wordName): meaning = self.getTranslationFromDB(wordName) if meaning: return meaning meaning = self.getTranslationFromStarDict(wordName) if meaning: return meaning meaning = self.getTranslationFromQQ(wordName) if meaning: return meaning meaning = "unknown" self.storeTranslationToDB(wordName, meaning, None) return meaning def storeTranslationToDB(self, wordName, meaning, response): wordRecord = Word(key_name=wordName, translation = meaning, origine = repr(response)) wordRecord.put() def getTranslationFromDB(self, wordName): wordKey = db.Key.from_path("Word", wordName) wordRecord = db.get(wordKey) if wordRecord: return wordRecord.translation else: return None def getTranslationFromStarDict(self, wordName): raw = self.dict_reader.get_dict_by_word(wordName) if raw: xmldoc = minidom.parseString(raw[0]["k"]) categories = xmldoc.getElementsByTagName(u"单词词性") meanings = xmldoc.getElementsByTagName(u"解释项") commonLen = min(len(categories), len(meanings)) translation = "; ".join([ " ".join([categories[i].firstChild.wholeText, meanings[i].firstChild.wholeText]) for i in range(commonLen) ]) else: translation = None return translation pass def getTranslationFromQQ(self, wordName): # TODO: coroutine optimisation self.httpServ.request('GET', "/dict?q=" + wordName) response = self.httpServ.getresponse() if response.status == httplib.OK: data = json.load(response) try: des = data["local"][0]["des"] ds = [" ".join([value for key, value in ele.iteritems()]) for ele in des] meaning = "; ".join(ds) self.storeTranslationToDB(wordName, meaning, response) return meaning except KeyError: pass pass return None pass