def filter(text, result_file=None): p_word = re.compile(r'([,。?]?)(.+?)\((.+?)\)') for line in text.split('\n'): word_list = list() pinyin_list = list() line = line.strip() match_list = re.findall(p_word, line) for match in match_list: symbol, word, pinyin = match if symbol: # 添加标点符号 word_list.append(symbol) pinyin_list.append(symbol) if len(word) > 1: split_pinyin_list = PinyinSplitter.split_pinyin(pinyin) if len(word) != len(split_pinyin_list): print('汉字与拼音长度不匹配') print(match) return # 添加多个字 word_list.extend(word) pinyin_list.extend(split_pinyin_list) else: # 添加一个字 word_list.append(word) pinyin_list.append(pinyin) string = '\t'.join(['%5s' % i for i in pinyin_list]) print(string) if result_file: filex.write(result_file, string + '\n', mode='a') string = '\t'.join(['%5s' % i for i in word_list]) if result_file: filex.write(result_file, string + '\n', mode='a') print(string)
def translate(en): js = Py4Js() tk = js.getTk(en) tk2 = get_google_tk(en) if tk != tk2: print('计算的 tk 不相等') filex.write('data/error_tk.txt', en + '\n', 'a') return en url = "http://translate.google.cn/translate_a/single?client=t" \ "&sl=en&tl=zh-CN&hl=zh-CN&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca" \ "&dt=rw&dt=rm&dt=ss&dt=t&ie=UTF-8&oe=UTF-8&clearbtn=1&otf=1&pc=1" \ "&srcrom=0&ssel=0&tsel=0&kc=2&tk=%s&q=%s" % (tk, urllib.parse.quote(en)) result = netx.get(url, need_print=False) """ [[["测试","test",null,null,2],[null,null,"Cèshì","test"]],...] """ if result: result = json.loads(result) if result: # 第一个结果 first_result = result[0] # 前几个为翻译,最后 1 个可能带拼音 cn = '' for translation in first_result: if len(translation) == 5: # 翻译中的第一个即是结果 cn += translation[0] cn = GoogleTranslator.process_result(en, cn) return cn return None
def get_pan_url(self, url): url = 'http://www.57fx.com' + url r = requests.get(url) soup = BeautifulSoup(r.text, "html.parser") a = soup.find('a', {'name': 'downurl'}) href = a['href'] filex.write(self.pan_list_file, href + '\n', mode='a')
def export_html(source_file, result_file): """导出 html""" lipstick_type = os.path.splitext(os.path.basename(result_file))[0].split('_')[0] current_path = os.path.dirname(result_file) + '/' lines = filex.read_lines(source_file, ignore_line_separator=True) length = len(lines) html = ''' <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>选呀选呀选口红~</title> <link rel="stylesheet" href="https://cdn.bootcss.com/bootstrap/3.3.7/css/bootstrap.min.css" integrity="sha384-BVYiiSIFeK1dGmJRAkycuHAHRg32OmUcww7on3RYdg4Va+PmSTsz/K68vbdEjh4u" crossorigin="anonymous"> <link rel="stylesheet" href="css/lipstick.css"> </head> <body> <div class="container-fluid"> [content] </div> </body> </html> ''' content = list() for i in range(length): content.append('<div class="row">') lipstick = Lipstick.from_string(lines[i]) # 左边 content.append('<div class="%s">' % 'col-xs-3') item_list = ['编号:' + lipstick.index, '色号:' + lipstick.name, '类别:' + lipstick.category, lipstick.other] for cell in item_list: content.append('<h4>%s</h4>' % cell) content.append('</div>') # 右边的图 image_list = lipstick.img.split(',') length = len(image_list) for j in range(length): if length == 2: if j == 0: col_style = 'col-xs-3' else: col_style = 'col-xs-5' else: col_style = 'col-xs-3' img = image_list[j] image_path = '%simage/%s_%03d_%d.jpg' % (current_path, lipstick_type, i + 1, j + 1) ima_tag = '<img class="%s" src="%s"/>' % (col_style, image_path.replace(current_path, '')) if not os.path.exists(image_path): netx.get_file(img, image_path) content.append(ima_tag) content.append('</div>') content = html.replace('[content]', '\n'.join(content)) filex.write(result_file, content)
def export_color_html(source_file, result_file): """导出 html""" html = ''' <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>选呀选呀选口红~</title> <link rel="stylesheet" href="https://cdn.bootcss.com/bootstrap/3.3.7/css/bootstrap.min.css" integrity="sha384-BVYiiSIFeK1dGmJRAkycuHAHRg32OmUcww7on3RYdg4Va+PmSTsz/K68vbdEjh4u" crossorigin="anonymous"> <link rel="stylesheet" href="css/lipstick_color.css"> </head> <body> <div class="container-fluid"> [content] </div> </body> </html> ''' lipstick_type = os.path.splitext(os.path.basename(result_file))[0].split('_')[0] content = list() lines = filex.read_lines(source_file, ignore_line_separator=True) length = len(lines) for i in range(0, length, 4): content.append('<div class="row">') end_index = i + 4 if end_index > length: end_index = length for j in range(i, end_index): lipstick = Lipstick.from_string(lines[j]) name_tag = '<span class="item">%s</span>' % '<br/>'.join( ('编号:' + lipstick.index, '色号:' + lipstick.name)) image_path = 'image/%s_%03d_%d.jpg' % (lipstick_type, j + 1, 1) img_tag = '<img class="center-block" src="%s"/>' % image_path content.append('<div class="col-xs-3">') content.append(name_tag) content.append(img_tag) content.append('</div>') content.append('</div>') html = html.replace('[content]', '\n'.join(content)) filex.write(result_file, html)
def export_all_cmd_help(self): """导出所有命令的帮助""" cmd_list = self.get_all_cmd() length = len(cmd_list) for i in range(length): print('%d/%d' % (i + 1, length)) cmd = cmd_list[i] if ' ' not in cmd: continue cmd, info = cmd.split(' ', maxsplit=1) filex.write(self.all_cmd_file, '\n\n# 【%d/%d】%s\n' % (i + 1, length, cmd), mode='a') if cmd == 'SC': # 这个要输入,卡住了 continue cmd = 'help %s > %s' % (cmd, self.temp_file) self.run_cmd(cmd) result = filex.read_lines(self.temp_file, encoding='gbk') result.insert(0, '```\n') result.append('\n```') filex.write_lines(self.all_cmd_file, result, mode='a') os.remove(self.temp_file)
def login(self, file_path): """登录并保存信息""" lines = filex.read_lines(file_path, ignore_line_separator=True) if not lines: print('没有读取到登录信息', file_path) exit() if len(lines) < 2: print('登录信息不完整,第一行写帐号,第二行写密码') exit() print('打开登录页...') r = requests.get(self.login_url) cookies, authenticity_token = self.parse_cookies_and_token_from_result( r) if not (cookies and authenticity_token): print('登录失败') return # 获取完 cookies, token ,开始登录 params = { 'username': lines[0], 'password': lines[1], 'utf8': '✓', 'authenticity_token': authenticity_token, } print('登录帐号 %s ...' % lines[0]) r = requests.post(self.login_url, params, cookies=cookies) # 注意 cookies 和 token 都要重新获取 cookies, authenticity_token = self.parse_cookies_and_token_from_result( r) if not (cookies and authenticity_token): print('登录失败') return print('登录成功') cookies_str = ';'.join([k + '=' + v for k, v in cookies.items()]) filex.write(self.cookies_file, cookies_str) filex.write(self.token_file, authenticity_token)