def extract(): query = NewWord.select() #query = NewWord.select().where((NewWord.is_valid == True) & (NewWord.re1 == 'added')).order_by(-NewWord.frequency) print(len(query)) for word in NewWord.select(): #print(word.name) res = [] for i in [word.name, word.explanation, word.frequency]: res.append(i) yield res
def _insert_book_data(self, book, words_times): # 建立书籍之后,载入数据 if not book: return # 向数据库内插入数据 for word, fre in words_times: query = NewWord.select().where(NewWord.name == word) # 数据库是否已有这个单词 if query: # 已有 word_ins = query[0] word_ins.frequency += fre word_ins.save() else: word_ins = NewWord.create(name=word, frequency=fre) book.is_analyzed = True book.save()
def add(self): query = NewWord.select().where((NewWord.is_valid == True) & ( NewWord.re1 == '')).order_by(-NewWord.frequency) iter_word = iter(query) self._open_bookid() iter_lst = iter(self.listid) id = next(iter_lst) while True: # 单词添加完毕,程序结束 try: next_word = next(iter_word) except: break res = self._add_one(next_word.name, id) # 设置请求错误处理 if res == '1': print('请求错误,稍后再试') break # 设置单词无效处理 elif 'NOT' in res: next_word.re1 = 'invalid' next_word.save() continue # 换单词表 elif '过上限' in res: id = next(iter_lst) self.list_count = 0 self._add_one(next_word.name, id) # 标记该单词已经添加 next_word.re1 = 'added' next_word.save()
def _trans_ici(self): urls = [] lock = threading.Lock() for i in NewWord.select(): urls.append( 'http://www.iciba.com/index.php?a=getWordMean&c=search&word=' + i.name) print(len(urls)) req = (grequests.get(u) for u in urls) resp = grequests.map(req, size=1000) print(resp) for item in resp: item = resp.json() data = item['baesInfo']['symbols'][0] assert item['baesInfo']['symbols'][0] # 去除没有音标的单词 assert item['ph_am'] and data['ph_en'] # 去除没有词性的单词 assert item['parts'][0]['part'] ph_en = '英 [' + item['ph_en'] + ']' ph_am = '美 [' + item['ph_am'] + ']' ex = '' for part in data['parts']: ex += part['part'] + ';'.join(part['means']) + ';' try: with lock: name.explanation = (ph_en + ph_am, ex)[1] name.save() print(name.explanation) except Exception as e: print(e) finally: print('翻译结束')
def extract(): print("extract()") query = NewWord.select() #query = NewWord.select().where((NewWord.is_valid == True) & (NewWord.re1 == 'added')).order_by(-NewWord.frequency) print(len(query)) res = [] for word in query: #print(word.name) res.append(Bean(word.name, word.explanation, word.frequency).__dict__) save(res)
def extract(): query = NewWord.select().where((NewWord.is_valid == True) & ( NewWord.re1 == 'added')).order_by(-NewWord.frequency) # print(len(query)) for word in query: # print(chardet.detect(word.name)) res = [] for i in [word.name, word.phonogram, word.explanation]: res.append(i) yield res
def _insert_book(self, book, words): # 检查数据库内是否有该书籍 if not book: return # 向数据库内插入数据 for word, fre in words: query = NewWord.select().where(NewWord.name == word) if query: word_ins = query[0] word_ins.frequency += fre word_ins.save() else: word_ins = NewWord.create( name=word, frequency=fre, ) # print('处理了 {} 个单词'.format(len(words))) # 标记该书已经被处理 book.is_analyzed = True book.save()
def translate(): t = Translate() # res = t._trans_shanbay('hello') # print(res) # t.trans() res = t._trans_ici('hello') print(res[1]) # 写代码遍历修改数据库 for i in NewWord.select(): print(i.name, end=' ') exp = str(t._trans_ici(i.name)[1]) i.explanation = exp # print(i.explanation) i.save()
def trans(self): query = NewWord.select().where(NewWord.explanation != '') if not query: return for word in query: res = self._trans_ici(word.name) # print(res) if res: word.phonogram = res[0] # word. word.explanation = res[1] else: word.is_valid = False word.save() time.sleep(1)
res = self._trans_ici(word.name) # print(res) if res: word.phonogram = res[0] # word. word.explanation = res[1] else: word.is_valid = False word.save() time.sleep(1) if __name__ == '__main__': t = Translate() # res = t._trans_shanbay('hello') # print(res) # t.trans() res = t._trans_ici('hello') print(res[1]) #写代码遍历修改数据库 for i in NewWord.select(): print(i.name, end=' ') exp = str(t._trans_ici(i.name)[1]) i.explanation = exp #print(i.explanation) i.save()