def FileWithLink():
    import time, os, MyDef

    start_time = time.time()  # 初始时间戳
    # ========================输入区开始========================
    output_readline = []

    refer_file_path = '/Users/alicewish/Dropbox/漫画图源MD5表.csv'
    MD5_dict = MyDef.ReadDictC(refer_file_path, True)
    yun_link_file_path = '/Users/alicewish/Dropbox/漫画图源度盘地址表.csv'
    yun_link_dict = MyDef.ReadDictB(yun_link_file_path)

    file_path_list = []
    for key in MD5_dict:
        file_path = MD5_dict[key]
        file_size = os.path.getsize(file_path)
        readable_file_size = sizeof_fmt(file_size)
        file_name = os.path.split(file_path)[1]
        file_path_list.append(file_path)

        if file_path in yun_link_dict:
            yun_link = yun_link_dict[file_path]
            output_line = "[" + file_name + "](" + yun_link + ") | " + readable_file_size
        else:
            output_line = "[" + file_name + "]() | " + readable_file_size
            print(file_name)
        # print(output_line)
        output_readline.append(output_line)
    output_readline.sort()
    output_readline.insert(0, '--- | ---')
    output_readline.insert(0, '文件名 | 大小')
    for i in range(len(output_readline)):
        line = output_readline[i]
        if "]()" in line:
            line = line[1:].replace("]()", "")
        output_readline[i] = line

    # ================写入剪贴板================
    output_text = '\r\n'.join(output_readline)

    MyDef.WriteClip(output_text)
    print(MyDef.RunTime(start_time))
Beispiel #2
0
def Processing():
    import time, jieba, re, MyDef

    start_time = time.time()  # 初始时间戳
    scenario_list_full = Training()

    # ======================================处理区开始======================================
    dict_file_path = '/Users/alicewish/我的坚果云/userdict.txt'  # 自定义词典路径

    cut_right_count = 0
    cut_wrong_count = 0

    # ========================输入区开始========================
    input_file_path = "/Users/alicewish/Downloads/my.md"
    # ================按行读取输入文本================
    read_text = open(input_file_path, 'r').read()  # 读取文本

    text_readline = read_text.replace("\nclass",
                                      "class").replace("...",
                                                       "…").splitlines()
    # print(text_readline)

    # ================按行读取文本:with open(更好)================
    status_readline = []  # 状态列表
    output_readline = []  # 输出列表

    jieba.load_userdict(dict_file_path)

    line_formmat_list_all = []

    for a in range(len(text_readline)):
        text_readline[a] = re.sub(r'<span.*</span>', '',
                                  text_readline[a])  # 去除span
        text_readline[a] = text_readline[a].replace('……', '…')

        markdown_line = text_readline[a].replace("\*", "の").replace(
            "\[", "[").replace("\]", "]")
        print(markdown_line)
        line_cut_list = markdown_line.split("*")
        print(line_cut_list)

        plain_line = markdown_line.replace("*", "").replace("の", "*")  # 调整*
        print(plain_line)

        line_formmat_list = []

        for j in range(len(plain_line)):
            line_formmat_list.append(0)
        # print(line_formmat_list)

        line_mark_count_list = []
        for k in range(len(plain_line) + 1):
            line_mark_count_list.append(0)

        point = 0
        for char in markdown_line:
            if char == '*':
                line_mark_count_list[point] = line_mark_count_list[point] + 1
            else:
                point = point + 1
        print(line_mark_count_list)

        pin = 0
        before = 0

        for seg in line_cut_list:
            if seg == '':
                pass
            else:
                last_pin = pin
                pin += len(seg)
                # print(line_mark_count_list[last_pin])
                # print(line_mark_count_list[pin])
                if last_pin > 0:
                    before = line_formmat_list[last_pin - 1]

                for l in range(last_pin, pin):
                    if before == 0:
                        line_formmat_list[
                            l] = before + line_mark_count_list[last_pin]
                    elif before == 1 and line_mark_count_list[last_pin] == 1:
                        line_formmat_list[
                            l] = before - line_mark_count_list[last_pin]
                    elif before == 1 and line_mark_count_list[last_pin] == 2:
                        line_formmat_list[
                            l] = before + line_mark_count_list[last_pin]
                    elif before == 2 and line_mark_count_list[last_pin] == 1:
                        line_formmat_list[
                            l] = before + line_mark_count_list[last_pin]
                    elif before == 2 and line_mark_count_list[last_pin] == 2:
                        line_formmat_list[
                            l] = before - line_mark_count_list[last_pin]
                    elif before == 3:
                        line_formmat_list[
                            l] = before - line_mark_count_list[last_pin]
                        # print(line_formmat_list[last_pin - 1])
        print(line_formmat_list)
        line_formmat_list_all.append(line_formmat_list)

        print(plain_line)
        print(len(plain_line))

        need_cut = True  # 需要切吗?
        if len(plain_line) == 0:
            pass
        elif len(plain_line) == 2 and re.match(r'[0-9][0-9]', plain_line):
            # 页码
            need_cut = False
        elif a < 8:
            # 首部
            need_cut = False
        elif len(plain_line) > 2:
            if plain_line[0] == '*' or plain_line[0] == '[':
                # 注释
                need_cut = False

        if plain_line == "":
            status = 0  # 空行
        elif not need_cut:
            status = -1  # 不需要切
        else:
            status = 1  # 待分词
        status_readline.append(status)
        if status == 1:
            # ================结巴分词================
            string_list = []
            seg_list = jieba.cut(plain_line)  # 默认是精确模式
            for word in seg_list:
                string_list.append(word)
            print(string_list)

            start_status = False
            for i in range(len(scenario_list_full)):
                scenario_line_full = scenario_list_full[i]
                if scenario_line_full[0:2] == str(len(plain_line)).zfill(2):
                    if start_status:
                        end_i = i
                    else:
                        start_i = i
                        start_status = True
                        end_i = i
            # ================进行切分================
            current_i = start_i

            cut_right = False
            while current_i <= end_i and not cut_right:
                current_cut = scenario_list_full[current_i]
                current_cut_list = current_cut[7:].split("-")  # 列表存储的切分方案
                # ================进行分词判断================
                line_can_cut_list = []

                for i in range(len(plain_line)):
                    line_can_cut_list.append(0)

                j = 0
                for string in string_list:
                    j = j + len(string)
                    # print(j)
                    line_can_cut_list[j - 1] = 1
                # ================对标点和语气词进行纠正================
                for i in range(len(plain_line)):
                    if plain_line[i] in ',.?!,。…?!”·-》>:】【]、':
                        # 这些之前不可切
                        line_can_cut_list[i - 1] = 0
                    elif plain_line[i] in '“《<【[':
                        # 这些之后不可切
                        line_can_cut_list[i] = 0
                    elif plain_line[
                            i] in '上中下内出完的地得了吗吧着个就前世里嘛图们来呗' and line_can_cut_list[
                                i - 1] == 1 and line_can_cut_list[i] == 1:
                        # 这些之前不可切
                        line_can_cut_list[i - 1] = 0
                    elif plain_line[i] in '太每帮跟另' and line_can_cut_list[
                            i - 1] == 1 and line_can_cut_list[i] == 1:
                        # 这些之后不可切
                        line_can_cut_list[i] = 0
                print(line_can_cut_list)
                print(current_cut_list)

                # ================判断方案正确与否================

                sum = 0
                cut_right = True
                for i in range(len(current_cut_list)):  # 切分
                    last_sum = sum
                    sum = sum + int(current_cut_list[i])
                    print(line_can_cut_list[sum - 1])
                    print(plain_line[last_sum:sum])

                    if line_can_cut_list[sum - 1] == 0:
                        cut_right = False
                print(cut_right)
                if not cut_right:
                    current_i += 1
            if cut_right:  # 切对了
                cut_right_count += 1
                sum = 0
                for i in range(len(current_cut_list)):
                    last_sum = sum
                    sum = sum + int(current_cut_list[i])
                    output_line = plain_line[last_sum:sum]
                    output_line_format_list = line_formmat_list_all[a][
                        last_sum:sum]
                    output_line_mark_count_list = []

                    format_list_for_use = [0] + output_line_format_list + [
                        0
                    ]  # 11
                    for b in range(len(format_list_for_use) - 1):
                        output_line_mark_count = abs(format_list_for_use[b +
                                                                         1] -
                                                     format_list_for_use[b])
                        output_line_mark_count_list.append(
                            output_line_mark_count)

                    print(output_line_mark_count_list)  # 10

                    output_markdown_line = ''

                    for c in range(len(output_line_mark_count_list) - 1):
                        for d in range(output_line_mark_count_list[c]):
                            output_markdown_line += '*'
                        output_markdown_line += output_line[c]

                    for d in range(output_line_mark_count_list[-1]):
                        output_markdown_line += '*'

                    output_readline.append(output_markdown_line)
                    print("格式", output_line_format_list)
            else:  # 切错了
                output_readline.append(text_readline[a])
                cut_wrong_count += 1
        elif status == 0:  # 不需要切
            output_readline.append('\n|\n')
        else:  # 不需要切
            output_readline.append(text_readline[a].replace("\[", "[").replace(
                "\]", "]"))

    print('切对', cut_right_count)
    print('待切', cut_wrong_count)

    # ================写入剪贴板================
    text = '\r\n'.join(output_readline)

    MyDef.WriteClip(text)
    print(MyDef.RunTime(start_time))
all_count = len(output_readline) - 1
print(all_count)
text = '\r\n'.join(output_readline)
# print(text)

output_file_name = '0 Day Week文件地址-墨问非名制作-' + now_date + '(' + str(
    all_count) + ').csv'

output_file_path = os.path.join(dropbox_path, output_file_name)
f = open(output_file_path, 'w')
try:
    f.write(text)
finally:
    f.close()

# ================写入剪贴板================
markdown_text = '\r\n'.join(markdown_readline)

MyDef.WriteClip(markdown_text)

# ================运行时间计时================
run_time = time.time() - start_time
if run_time < 60:  # 秒(两位小数)
    print("耗时:{:.2f}秒".format(run_time))
elif run_time < 3600:  # 分+秒(取整)
    print("耗时:{:.0f}分{:.0f}秒".format(run_time // 60, run_time % 60))
else:  # 时分秒取整
    print("耗时:{:.0f}时{:.0f}分{:.0f}秒".format(run_time // 3600,
                                            run_time % 3600 // 60,
                                            run_time % 60))