Example #1
0
    def filter(text, result_file=None):
        p_word = re.compile(r'([,。?]?)(.+?)\((.+?)\)')
        for line in text.split('\n'):
            word_list = list()
            pinyin_list = list()
            line = line.strip()
            match_list = re.findall(p_word, line)
            for match in match_list:
                symbol, word, pinyin = match
                if symbol:
                    # 添加标点符号
                    word_list.append(symbol)
                    pinyin_list.append(symbol)
                if len(word) > 1:
                    split_pinyin_list = PinyinSplitter.split_pinyin(pinyin)
                    if len(word) != len(split_pinyin_list):
                        print('汉字与拼音长度不匹配')
                        print(match)
                        return
                    # 添加多个字
                    word_list.extend(word)
                    pinyin_list.extend(split_pinyin_list)
                else:
                    # 添加一个字
                    word_list.append(word)
                    pinyin_list.append(pinyin)
            string = '\t'.join(['%5s' % i for i in pinyin_list])
            print(string)
            if result_file:
                filex.write(result_file, string + '\n', mode='a')

            string = '\t'.join(['%5s' % i for i in word_list])
            if result_file:
                filex.write(result_file, string + '\n', mode='a')
            print(string)
Example #2
0
 def translate(en):
     js = Py4Js()
     tk = js.getTk(en)
     tk2 = get_google_tk(en)
     if tk != tk2:
         print('计算的 tk 不相等')
         filex.write('data/error_tk.txt', en + '\n', 'a')
         return en
     url = "http://translate.google.cn/translate_a/single?client=t" \
           "&sl=en&tl=zh-CN&hl=zh-CN&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca" \
           "&dt=rw&dt=rm&dt=ss&dt=t&ie=UTF-8&oe=UTF-8&clearbtn=1&otf=1&pc=1" \
           "&srcrom=0&ssel=0&tsel=0&kc=2&tk=%s&q=%s" % (tk, urllib.parse.quote(en))
     result = netx.get(url, need_print=False)
     """
     [[["测试","test",null,null,2],[null,null,"Cèshì","test"]],...]
     """
     if result:
         result = json.loads(result)
         if result:
             # 第一个结果
             first_result = result[0]
             # 前几个为翻译,最后 1 个可能带拼音
             cn = ''
             for translation in first_result:
                 if len(translation) == 5:
                     # 翻译中的第一个即是结果
                     cn += translation[0]
             cn = GoogleTranslator.process_result(en, cn)
             return cn
     return None
Example #3
0
 def get_pan_url(self, url):
     url = 'http://www.57fx.com' + url
     r = requests.get(url)
     soup = BeautifulSoup(r.text, "html.parser")
     a = soup.find('a', {'name': 'downurl'})
     href = a['href']
     filex.write(self.pan_list_file, href + '\n', mode='a')
Example #4
0
    def export_html(source_file, result_file):
        """导出 html"""
        lipstick_type = os.path.splitext(os.path.basename(result_file))[0].split('_')[0]
        current_path = os.path.dirname(result_file) + '/'
        lines = filex.read_lines(source_file, ignore_line_separator=True)
        length = len(lines)
        html = '''
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>选呀选呀选口红~</title>
    <link rel="stylesheet" href="https://cdn.bootcss.com/bootstrap/3.3.7/css/bootstrap.min.css"
          integrity="sha384-BVYiiSIFeK1dGmJRAkycuHAHRg32OmUcww7on3RYdg4Va+PmSTsz/K68vbdEjh4u" crossorigin="anonymous">
    <link rel="stylesheet" href="css/lipstick.css">
</head>
<body>
<div class="container-fluid">
    [content]
</div>
</body>
</html>
        '''
        content = list()
        for i in range(length):
            content.append('<div class="row">')
            lipstick = Lipstick.from_string(lines[i])
            # 左边
            content.append('<div class="%s">' % 'col-xs-3')
            item_list = ['编号:' + lipstick.index, '色号:' + lipstick.name, '类别:' + lipstick.category, lipstick.other]
            for cell in item_list:
                content.append('<h4>%s</h4>' % cell)
            content.append('</div>')

            # 右边的图
            image_list = lipstick.img.split(',')
            length = len(image_list)
            for j in range(length):
                if length == 2:
                    if j == 0:
                        col_style = 'col-xs-3'
                    else:
                        col_style = 'col-xs-5'
                else:
                    col_style = 'col-xs-3'
                img = image_list[j]
                image_path = '%simage/%s_%03d_%d.jpg' % (current_path, lipstick_type, i + 1, j + 1)
                ima_tag = '<img class="%s" src="%s"/>' % (col_style, image_path.replace(current_path, ''))
                if not os.path.exists(image_path):
                    netx.get_file(img, image_path)
                content.append(ima_tag)
            content.append('</div>')

        content = html.replace('[content]', '\n'.join(content))
        filex.write(result_file, content)
Example #5
0
    def export_color_html(source_file, result_file):
        """导出 html"""
        html = '''
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>选呀选呀选口红~</title>
    <link rel="stylesheet" href="https://cdn.bootcss.com/bootstrap/3.3.7/css/bootstrap.min.css"
          integrity="sha384-BVYiiSIFeK1dGmJRAkycuHAHRg32OmUcww7on3RYdg4Va+PmSTsz/K68vbdEjh4u" crossorigin="anonymous">
    <link rel="stylesheet" href="css/lipstick_color.css">
</head>
<body>
<div class="container-fluid">
    [content]
</div>
</body>
</html>
        '''
        lipstick_type = os.path.splitext(os.path.basename(result_file))[0].split('_')[0]
        content = list()

        lines = filex.read_lines(source_file, ignore_line_separator=True)
        length = len(lines)
        for i in range(0, length, 4):
            content.append('<div class="row">')
            end_index = i + 4
            if end_index > length:
                end_index = length
            for j in range(i, end_index):
                lipstick = Lipstick.from_string(lines[j])
                name_tag = '<span class="item">%s</span>' % '<br/>'.join(
                    ('编号:' + lipstick.index, '色号:' + lipstick.name))
                image_path = 'image/%s_%03d_%d.jpg' % (lipstick_type, j + 1, 1)
                img_tag = '<img class="center-block" src="%s"/>' % image_path

                content.append('<div class="col-xs-3">')
                content.append(name_tag)
                content.append(img_tag)
                content.append('</div>')

            content.append('</div>')

        html = html.replace('[content]', '\n'.join(content))
        filex.write(result_file, html)
Example #6
0
 def export_all_cmd_help(self):
     """导出所有命令的帮助"""
     cmd_list = self.get_all_cmd()
     length = len(cmd_list)
     for i in range(length):
         print('%d/%d' % (i + 1, length))
         cmd = cmd_list[i]
         if ' ' not in cmd:
             continue
         cmd, info = cmd.split(' ', maxsplit=1)
         filex.write(self.all_cmd_file,
                     '\n\n# 【%d/%d】%s\n' % (i + 1, length, cmd),
                     mode='a')
         if cmd == 'SC':
             # 这个要输入,卡住了
             continue
         cmd = 'help %s > %s' % (cmd, self.temp_file)
         self.run_cmd(cmd)
         result = filex.read_lines(self.temp_file, encoding='gbk')
         result.insert(0, '```\n')
         result.append('\n```')
         filex.write_lines(self.all_cmd_file, result, mode='a')
     os.remove(self.temp_file)
Example #7
0
    def login(self, file_path):
        """登录并保存信息"""
        lines = filex.read_lines(file_path, ignore_line_separator=True)
        if not lines:
            print('没有读取到登录信息', file_path)
            exit()
        if len(lines) < 2:
            print('登录信息不完整,第一行写帐号,第二行写密码')
            exit()

        print('打开登录页...')
        r = requests.get(self.login_url)
        cookies, authenticity_token = self.parse_cookies_and_token_from_result(
            r)
        if not (cookies and authenticity_token):
            print('登录失败')
            return

        # 获取完 cookies, token ,开始登录
        params = {
            'username': lines[0],
            'password': lines[1],
            'utf8': '✓',
            'authenticity_token': authenticity_token,
        }
        print('登录帐号 %s ...' % lines[0])
        r = requests.post(self.login_url, params, cookies=cookies)
        # 注意 cookies 和 token 都要重新获取
        cookies, authenticity_token = self.parse_cookies_and_token_from_result(
            r)
        if not (cookies and authenticity_token):
            print('登录失败')
            return
        print('登录成功')
        cookies_str = ';'.join([k + '=' + v for k, v in cookies.items()])
        filex.write(self.cookies_file, cookies_str)
        filex.write(self.token_file, authenticity_token)