Beispiel #1
0
def space_file(file):

    f = open(file, 'r')
    txt = f.read()
    f.close()

    # new_txt = txt.replace('- ', '+ ')
    new_txt = txt.replace('* ', '+ ')
    # print(new_txt)

    f = open(file, 'w')
    f.write(new_txt)
    f.close()


    name, ext = os.path.splitext(file)
    nf = open(f'../new_{file}', 'w')
    with open(file, 'r') as f:
        for line in f.readlines():
            tmp = line.strip()
            tmp = tmp.replace('blob/debug', 'blob/master')

            new = pangu.spacing_text(tmp)
            new = new.replace('** ', '**')
            new = new.replace(' **', '**')
            new = new.replace('~~ ', '~~')
            new = new.replace(' ~~', '~~')

            nf.writelines(new + '\n')
            # print(new)
    nf.close()
    def clearReplace(self, text):
        paragraph = []
        multi_replace_regexp = re.compile(r'\n\s*\n')
        text_segment = multi_replace_regexp.split(text)

        period_replace_regexp = re.compile(r'\n')

        for i in text_segment:
            line_i = i.strip()
            if self.flag_keep_period_replace == 1:
                line_tmp = []
                line_segment = period_replace_regexp.split(line_i)
                for j in line_segment:
                    line_j = j.strip()
                    if re.match(r'.*[.:?!。:?!]$', line_j):
                        line_tmp.append(line_j)
                        line_tmp.append('\n')
                    else:
                        line_tmp.append(line_j)
                        line_tmp.append(' ')
                paragraph.append(''.join(line_tmp))
            else:
                paragraph.append(line_i.replace('\n', ' '))

        fine_text = '\n\n'.join(paragraph)  # 多次换行保留为直接换两行
        new_text = pangu.spacing_text(fine_text)
        return new_text
Beispiel #3
0
Datei: fy.py Projekt: coderMR/fy
def highlight(text: str, keyword: str):
    text = pangu.spacing_text(text)
    return re.sub(
        keyword,
        "\33[0m" + "\33[93m" + keyword + "\33[0m" + "\33[37m",
        text,
        flags=re.IGNORECASE,
    )
Beispiel #4
0
def headersstr2headers(header_str):
    t = arrow.now()
    print('大家好,今天是公历 %d 月 %d 日,星期%s \r\n' %
          (t.month, t.day, week[t.weekday()]))
    for str_line in header_str.splitlines():
        if str_line != "":
            str_line = re.sub('\.$', '。', str_line)
            str_line = re.sub('。$', '', str_line)
            pangu_text = pangu.spacing_text(str_line)
            print(pangu_text + '\r\n')
Beispiel #5
0
Datei: fy.py Projekt: coderMR/fy
def youdao_api(words: str):
    print()
    print(huepy.grey(" -------- "))
    print()
    url = (
        "http://fanyi.youdao.com/openapi.do?keyfrom={}&key={}&"
        "type=data&doctype=json&version=1.1&q={}"
    )
    try:
        resp = requests.get(
            url.format(CONF.youdao_key_from, CONF.youdao_key, words), headers=HEADERS
        ).json()

        phonetic = ""
        basic = resp.get("basic", None)
        if basic and resp.get("basic").get("phonetic"):
            phonetic += huepy.purple("  [ " + basic.get("phonetic") + " ]")

        print(" " + words + phonetic + huepy.grey("  ~  fanyi.youdao.com"))
        print()

        translation = resp.get("translation", [])
        if len(translation) > 0:
            print(" - " + pangu.spacing_text(huepy.green(translation[0])))

        if basic and basic.get("explains", None):
            for item in basic.get("explains"):
                print(huepy.grey(" - ") + pangu.spacing_text(huepy.green(item)))
        print()

        web = resp.get("web", None)
        if web and len(web):
            for i, item in enumerate(web):
                print(
                    huepy.grey(
                        " " + str(i + 1) + ". " + highlight(item.get("key"), words)
                    )
                )
                print("    " + huepy.cyan(", ".join(item.get("value"))))

    except:
        print(" " + huepy.red(ERR_MSG))
Beispiel #6
0
    def on_pangu_btn_clicked(self):
        text = self.before_txt.toPlainText()
        if text:
            text = self.handle_text(text)

            try:
                text = pangu.spacing_text(text)
            except:
                pass

            self.after_txt.setText(text)
Beispiel #7
0
def to_html(data, template_filename, fp):
    # Template with Jinja2
    with _resources.path("crawler_book_info", "templates") as _path:
        template_path = str(_path)
        loader = FileSystemLoader(searchpath=template_path)
        env = Environment(loader=loader)
        template = env.get_template(template_filename)

        # Mapping the parser data to template.
        result = template.render(**data)

        # Write to HTML file.
        fp.write(pangu.spacing_text(result))
Beispiel #8
0
Datei: fy.py Projekt: coderMR/fy
def google_api(words: str):
    print()

    def switch_language():
        for w in words:
            if "\u4e00" <= w <= "\u9fff":
                return "en"
        return "zh-cn"

    translator = Translator(service_urls=["translate.google.cn"])
    text = pangu.spacing_text(translator.translate(words, dest=switch_language()).text)
    print(" " + words + huepy.grey("  ~  translate.google.cn"))
    print()
    print(" - " + huepy.cyan(text))
Beispiel #9
0
def modify_text(line):
    """处理文字的格式"""
    # 去 \n 是转 pdf 时启用
    # line = line.replace('\n', '')
    line = pangu.spacing_text(line)
    new_line = line.replace(' “', '“')\
        .replace('” ', '”')\
        .replace('“', '「')\
        .replace('”', '」')\
        .replace('・', '·')\
        .replace(', ', ',')\
        .replace('。 ', '。')\
        .replace('’', '\'')\
        .replace('  ', ' ')
    new_line = new_line.strip()
    return new_line
Beispiel #10
0
 def parse_html(content):
     content = content.replace("\n", "<br>")
     result = re.findall(r"<e [^>]*>", content)
     if result:
         for i in result:
             html = PyQuery(i)
             if html.attr("type") == "web":
                 template = "[%s](%s)" % (parse.unquote(
                     html.attr("title")), parse.unquote(html.attr("href")))
             elif html.attr("type") == "hashtag":
                 template = " `%s` " % parse.unquote(html.attr("title"))
             elif html.attr("type") == "mention":
                 template = parse.unquote(html.attr("title"))
             else:
                 template = i
             content = content.strip().replace(i, template)
     else:
         content = pangu.spacing_text(content)
     return content
Beispiel #11
0
    def onClipboradChanged(self):
        """
        根据是否 `监听剪贴板` 和 `自动替换回车`
        """
        if not self.listen_clip_board.isChecked():
            return

        clipboard = QApplication.clipboard()
        text = clipboard.text()

        if not text:
            return

        text = self.handle_text(text)

        self.before_txt.setText(text)

        try:
            text = pangu.spacing_text(text)
        except:
            pass

        self.after_txt.setText(text)
Beispiel #12
0
 def parse_html(self, content):
     content = content.replace("\n", "<br>")
     result = re.findall(r"<e [^>]*>", content)
     if result:
         for i in result:
             html = PyQuery(i)
             if html.attr("type") == "web":
                 title = parse.unquote(html.attr("title"))
                 url = parse.unquote(html.attr("href"))
                 template = "[%s](%s)" % (title, url)
                 template += f"\n## {title}\n"
                 template += self.parse_article(url)
                 template += "\n"
             elif html.attr("type") == "hashtag":
                 template = " `%s` " % parse.unquote(html.attr("title"))
             elif html.attr("type") == "mention":
                 template = parse.unquote(html.attr("title"))
             else:
                 template = i
             content = content.strip().replace(i, template)
     else:
         content = pangu.spacing_text(content)
     return content
Beispiel #13
0
 def format_python(self, text):
     return FormatCode(pangu.spacing_text(text))[0]
Beispiel #14
0
 def format_md(self, text):
     return pangu.spacing_text(text)
Beispiel #15
0
 def format_json(self, text):
     return jsbeautifier.beautify(pangu.spacing_text(text))
runpy3 () {
/anaconda3/bin/python << 'EOF' - "$@"

import sys
import pangu
import clipboard
import pyautogui

for f in sys.argv:

	target = clipboard.paste()
	result = pangu.spacing_text(target)
	clipboard.copy(result)

	pyautogui.keyDown('command')
	pyautogui.press('v')
	pyautogui.keyUp('command')

EOF
}

runpy3 "$@"
Beispiel #17
0
    def get_html_from_blog(self, blog, rule):
        s = requests.session()
        r = s.get(blog.url, headers=headers)
        if rule['encoding'] is not None:
            r.encoding = 'utf-8'
        # 获取文本内容
        html = r.text

        soup = BeautifulSoup(html, 'lxml')
        if False:

            # 增加代码标签
            html = re.sub('<code.*?>', '<code>```\n', html)
            html = re.sub('</code>', '```\n</code>', html)
        with open('temp.html', 'w', encoding='utf-8') as f:
            f.write(html)

        # 正则获取标题
        title_pattern = rule['title_pattern']
        titles = re.findall(title_pattern, html, re.DOTALL)
        if len(titles) == 0:
            title = 'default'
        else:
            title = pangu.spacing_text(titles[0])
        blog.title = title
        print('标题:', title)

        if rule['content_type'] == 'bs':
            content = soup.select(rule['content_pattern']).pop()
            content = str(content)

        else:
            # 提取正文内容
            content_pattern = rule['content_pattern']
            contents = re.findall(content_pattern, html, re.DOTALL)
            if len(contents) == 0:
                content = ''
            else:
                content = contents[0]

        content = '<h1><a href="{}">{}</a></h1><br><br>'.format(
            blog.url, blog.title) + content
        for src, dst in rule['content_replaces']:
            content = re.sub(src, dst, content)
        blog.content = content
        # print('正文:', content)

        # 转换为 MD
        # md_content = Tomd(content).markdown
        # content = re.sub('<a id=".*?"></a>', '', content)
        text_maker = ht.HTML2Text()
        md_content = text_maker.handle(content)
        # 去空行
        md_content = md_content.replace('\r', '')
        while ' \n' in md_content:
            md_content = md_content.replace(' \n', '\n')
        #md_content = md_content.replace('\n', '\n\n')
        while '\n\n\n' in md_content:
            md_content = md_content.replace('\n\n\n', '\n\n')
        # print(' MD:', md_content)

        # 正则替换
        for src, dst in rule['md_replaces']:
            md_content = re.sub(src, dst, md_content)
        # 加空格
        md_content = pangu.spacing_text(md_content)

        # ** *
        for star_line in re.findall('\*(.*?)\*', md_content):
            md_content = md_content.replace('{}'.format(star_line),
                                            '{}'.format(star_line.strip()))
        # 异常断行
        md_content = re.sub('-\n', '-', md_content)

        # 规范代码标签
        #md_content = re.sub('[ ]```', '```', md_content)

        # 过滤非法字符
        title = re.sub('[\/:*?"<>|]', '-', title)
        with open("blogs" + os.sep + title + '.md', 'w',
                  encoding='utf-8') as f:
            f.write(md_content)

        pass
Beispiel #18
0
def main():
    try:

        # Template with Jinja2
        template = Template('''\
<!DOCTYPE html>
<html>
<head>
  <meta charset="utf-8" />
  <meta name="viewport" content="width=device-width" />
  <title> {{ title }} </title>
</head>
<body>
  <p>
    Buy:
    <ul>
      <li> <a href="{{ url }}" target="_blank">博客來</a> </li>
    </ul>
  </p>
  <hr>
  {{ full_title }}
  <p>
    <img src="{{ cover }}"/>
  </p>

  {{ info1 }}
  {{ price }}
  {{ info2 }}

  <h2>商品描述</h2>
  {{ desc }}

  <h2>作者簡介</h2>
  {{ author }}

  <h2>目錄大綱</h2>
  {{ outline }}

  <h2>Memo</h2>

  <h3>我想讀這本書的原因是什麼?</h3>
  <ol>
    <li> &lt;FIXED_ME&gt; </li>
  </ol>

  <h3>看完書封介紹和目錄大綱後,我覺得我可以從那邊得到什麼?</h3>
  <ol>
    <li> &lt;FIXED_ME&gt; </li>
  </ol>

  <h3>在買這本新書前,我曾讀過相關的主題的書籍嗎? 當時得到了什麼新知?</h3>
  <ol>
    <li> &lt;FIXED_ME&gt; </li>
  </ol>

  <footer style="text-align: center;">
    Parser by
      <a href="https://github.com/chusiang/crawler-book-info" target="_blank">
        chusiang/crawler-book-info
      </a>
    <hr>
  </footer>
</body>
</html>
''')

        # Get data.
        data = get_data()

        # Parser.
        book_title = parser_book_title(data[0])
        book_url = data[1]
        book_full_title = parser_book_full_title(data[0])
        book_cover = parser_book_cover(data[0])
        book_info1 = parser_book_info1(data[0])
        book_price = parser_book_price(data[0])
        book_info2 = parser_book_info2(data[0])
        book_desc = parser_book_desc(data[0])
        book_author = parser_book_author(data[0])
        book_outline = parser_book_outline(data[0])

        # Mapping the parser data to template.
        result = template.render(
            title=book_title,
            url=book_url,
            full_title=book_full_title,
            cover=book_cover,
            info1=book_info1,
            price=book_price,
            info2=book_info2,
            desc=book_desc,
            author=book_author,
            outline=book_outline
        )

        # Write to HTML file.
        f = open('index.html', 'w')
        f.write(pangu.spacing_text(result))
        f.close()

    except Exception as e:
        print(e)
Beispiel #19
0
 def pangu_func(self, keyword):
     text = pyperclip.paste()
     new_text = pangu.spacing_text(text)
     pyperclip.copy(new_text)
Beispiel #20
0
 def format_sql(self, text):
     return sqlparse.format(pangu.spacing_text(text))
Beispiel #21
0
for infile in glob.glob("/Users/Daglas/Desktop/*.docx"):
    filename, ext = os.path.splitext(infile)

    document = Document(filename + ".docx")
    # document.save('new-file-name.txt')

    # for para in document.paragraphs:
    #     print(para.text)

    # 对文字处理并写入文件
    with open(filename + ".md", 'w') as file_obj:
        for para in document.paragraphs:
            if para.text != '\n':
                file_obj.write(para.text + "\n\n")

    # 读取文件,文件名「filename + ".md"」是关键
    with open(filename + ".md") as file_obj:
        lines = file_obj.readlines()

    with open(filename + ".md", 'w') as file_obj:
        for line in lines:
            if line != '\n':
                new_content = pangu.spacing_text(line)
                new_content = new_content.replace(' “', '“')
                new_content = new_content.replace('” ', '”')
                new_content = new_content.replace('“', '「')
                new_content = new_content.replace('”', '」')
                new_content = new_content.replace('・', '·')
                new_content = new_content.replace(', ', ',')
                new_content = new_content.replace('。 ', '。')
                file_obj.write(new_content + "\n\n")
Beispiel #22
0
def cjk_layout(text):
    return pangu.spacing_text(text)
Beispiel #23
0
 def test_spacing_text(self):
     self.assertEqual(pangu.spacing_text('請使用uname -m指令來檢查你的Linux作業系統是32位元或是[敏感词已被屏蔽]位元'), '請使用 uname -m 指令來檢查你的 Linux 作業系統是 32 位元或是 [敏感词已被屏蔽] 位元')
Beispiel #24
0
 async def set_content(self, content):
     return await self.set_props_by_key('content',
                                        pangu.spacing_text(content))
Beispiel #25
0
#!/usr/bin/python3
# coding=utf-8
import pangu
import pyperclip

text = pyperclip.paste()
print(text)
new_text = pangu.spacing_text(text)
pyperclip.copy(new_text)
print(new_text)

Beispiel #26
0
 def pangu_spacing(self):
     self.text = pangu.spacing_text(self.text)
     return self
Beispiel #27
0
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import pangu
import pyperclip
import sys
from enum import Enum, unique


@unique
class QueryMode(Enum):
    Paste = 0  # 格式化剪切板文字
    Selection = 1  # 格式化选中文字


mode = QueryMode.Selection
query = ""

if sys.argv[1]:
    mode = QueryMode.Selection
    query = sys.argv[1]
else:
    mode = QueryMode.Paste
    query = pyperclip.paste()

ret = pangu.spacing_text(query)

if mode == QueryMode.Selection:
    print(ret, end='')  # 默认输出不要换行
else:
    pyperclip.copy(ret)
Beispiel #28
0
 def wrapper(*args, **kwargs):
     _args = list(args)
     _args[index] = pangu.spacing_text(_args[index])
     result = func(*_args, **kwargs)
     return result
Beispiel #29
0
    arr.append('。')
    arr.append('?')
    arr.append('!')
    arr.append(',')
    arr.append('.')
    arr.append('?')
    arr.append('!')

    if sys.argv[2] == 'all':
        replace = sys.argv[3]
    else:
        replace = sys.argv[2]

sub = "[" + "|".join(arr) + "]+"
f1 = open(script, 'r')

lines = f1.readlines()
for line in lines:
    if line.strip() == '':
        continue
    data = re.split('[\t ]', line)
    trans = ' '.join(data[1:])
    new = re.sub(sub, replace, trans) \
        .replace('[', replace) \
        .replace(']', replace) \
        .replace('FIL', replace) \
        .replace('SPK', replace) \
        .replace('  ', ' ')
    print(data[0] + '\t' + pangu.spacing_text(new).upper().strip())
f1.close()