Beispiel #1
0
def BulidLinkB():
    import time, MyDef, os
    start_time = time.time()  # 初始时间戳
    refer_dict = ReadMD5SamplingCSV()
    MD5_refer_dict, MD5_list = ReadMD5CSV()
    yun_link_dict = ReadMobile()
    output_list = []
    # ========================主目录========================
    for key in refer_dict:
        if key in yun_link_dict:
            folder_name = MyDef.HexShift(key)
            yun_link = yun_link_dict[key]

            sample_list = refer_dict[key].split("|")
            print(sample_list)
            # ========================次级目录========================
            for sample in sample_list:  # 文件MD5
                file_path = MD5_refer_dict[sample]
                file_name = os.path.split(file_path)[1]

                file_folder_name = MyDef.HexShift(sample)
                file_link = yun_link + '#path=%252F' + folder_name + '%252F' + folder_name + file_folder_name
                print(file_name)
                print(file_link)
                path_list = [file_path, file_link]
                output_list.append(path_list)
    path = '/Users/alicewish/Dropbox/漫画图源度盘地址表.csv'
    MyDef.StoreCSV(output_list, path)
    print(len(output_list))
    print(MyDef.RunTime(start_time))
Beispiel #2
0
def UnDistribute():
    import time, MyDef, os, shutil
    start_time = time.time()  # 初始时间戳
    refer_dict = ReadMD5SamplingCSV()
    MD5_refer_dict, MD5_list = ReadMD5CSV()
    new_file_dir = '/Volumes/Mack/Distribute'

    for key in refer_dict:
        new_folder_path = os.path.join(new_file_dir, key)

        sample_list = refer_dict[key].split("|")
        print(sample_list)
        for sample in sample_list:
            file_path = MD5_refer_dict[sample]
            file_name = os.path.split(file_path)[1]
            print("旧", file_path)
            new_file_path = os.path.join(new_folder_path, sample, file_name)
            print("新", new_file_path)

            try:
                shutil.move(new_file_path, file_path)  # 移动文件或目录都是使用这条命令
            except:
                pass

    print(MyDef.RunTime(start_time))
Beispiel #3
0
def Distribute():
    import time, MyDef, os, shutil
    start_time = time.time()  # 初始时间戳
    refer_dict = ReadMD5SamplingCSV()
    MD5_refer_dict, MD5_list = ReadMD5CSV()

    # ========================最外层目录========================
    new_file_dir = '/Volumes/Mack/Distribute'
    if not os.path.exists(new_file_dir):  # 判断目标是否存在
        try:
            os.mkdir(new_file_dir)  # 创建最外层目录
        except:
            pass
    # ========================主目录========================
    for key in refer_dict:
        folder_name = MyDef.HexShift(key)
        new_folder_path = os.path.join(new_file_dir, folder_name)
        if not os.path.exists(new_folder_path):  # 判断目标是否存在
            try:
                os.mkdir(new_folder_path)  # 创建目录
            except:
                pass
        sample_list = refer_dict[key].split("|")
        print(sample_list)
        # ========================次级目录========================
        for sample in sample_list:  # 文件MD5
            file_path = MD5_refer_dict[sample]
            file_name = os.path.split(file_path)[1]
            print("旧", file_path)
            file_folder_name = MyDef.HexShift(sample)

            new_file_folder_path = os.path.join(new_file_dir, folder_name,
                                                folder_name + file_folder_name)

            try:
                os.mkdir(new_file_folder_path)  # 创建目录
            except:
                pass

            new_file_path = os.path.join(new_file_folder_path, file_name)
            print("新", new_file_path)

            try:
                shutil.move(file_path, new_file_path)  # 移动文件或目录都是使用这条命令
            except:
                pass

    print(MyDef.RunTime(start_time))
def MD5Table(file_dir):
    import time, os, MyDef
    start_time = time.time()  # 初始时间戳
    dropbox_path = '/Users/alicewish/Dropbox'
    refer_file_name = '漫画图源MD5表.csv'
    refer_file_path = os.path.join(dropbox_path, refer_file_name)  # 词典文件的地址
    MD5_refer_dict = MyDef.ReadDictB(refer_file_path, True)
    # print(MD5_refer_dict)
    file_path_check_set = set()
    major_key_list = []
    output_list = []
    # ================读取文件夹内容================
    file_list = os.listdir(file_dir)  # 获得目录中的内容
    # print(file_list)

    for file_MD5 in MD5_refer_dict:
        file_path = MD5_refer_dict[file_MD5]
        file_path_check_set.add(file_path)

    for file_name in file_list:
        file_path = os.path.join(file_dir, file_name)
        if file_path in file_path_check_set:
            pass
        else:
            # file_MD5 = MyDef.HashMD5File(file_path)
            file_MD5 = MyDef.md5sum(file_path)
            print(file_MD5, file_name)
            MD5_refer_dict[file_MD5] = file_path

    for file_MD5 in MD5_refer_dict:
        file_path = MD5_refer_dict[file_MD5]
        file_name = os.path.split(file_path)[1]
        major_key = file_name + file_MD5
        major_key_list.append(major_key)

    major_key_list.sort()
    for major_key in major_key_list:
        file_MD5 = major_key[-32:]
        info_list = [file_MD5, MD5_refer_dict[file_MD5]]
        output_list.append(info_list)
    head_info = ['MD5', '路径']
    MyDef.StoreCSV(output_list, refer_file_path, head_info)
    print(MyDef.RunTime(start_time))
Beispiel #5
0
def ReadMobile():
    import time, requests, MyDef

    start_time = time.time()  # 初始时间戳
    refer_dict = {}

    header = {
        'User-Agent':
        'Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.8.1.14) Gecko/20080404 (FoxPlus) Firefox/2.0.0.14'
    }
    url = 'https://pan.baidu.com/wap/share/home?uk=2007334207&third=0'
    page = requests.get(url=url, headers=header)
    # print(page.encoding)
    # print(page.headers)
    # print(page.cookies)
    # print(page.text)
    html = page.content.decode("utf", "ignore")
    # print(html)

    shareid_list = MyDef.ReFind(html, r'"shareid":"[0-9]{1,20}')

    print(shareid_list)
    print(len(shareid_list))

    title_list = MyDef.ReFind(html, r'"title":"[a-z]{64}')
    print(title_list)
    print(len(title_list))

    for i in range(len(shareid_list)):
        shareid = shareid_list[i].replace('"shareid":"', '')
        title = title_list[i].replace('"title":"', '')
        real_name = MyDef.HexShiftBack(title[:32])
        refer_dict[
            real_name] = 'https://pan.baidu.com/share/link?uk=2007334207&shareid=' + shareid

    refer_dict = AddDict(refer_dict)

    # ================运行时间计时================
    print(MyDef.RunTime(start_time))
    print(refer_dict)
    print(len(refer_dict))
    return refer_dict
def FileWithLink():
    import time, os, MyDef

    start_time = time.time()  # 初始时间戳
    # ========================输入区开始========================
    output_readline = []

    refer_file_path = '/Users/alicewish/Dropbox/漫画图源MD5表.csv'
    MD5_dict = MyDef.ReadDictC(refer_file_path, True)
    yun_link_file_path = '/Users/alicewish/Dropbox/漫画图源度盘地址表.csv'
    yun_link_dict = MyDef.ReadDictB(yun_link_file_path)

    file_path_list = []
    for key in MD5_dict:
        file_path = MD5_dict[key]
        file_size = os.path.getsize(file_path)
        readable_file_size = sizeof_fmt(file_size)
        file_name = os.path.split(file_path)[1]
        file_path_list.append(file_path)

        if file_path in yun_link_dict:
            yun_link = yun_link_dict[file_path]
            output_line = "[" + file_name + "](" + yun_link + ") | " + readable_file_size
        else:
            output_line = "[" + file_name + "]() | " + readable_file_size
            print(file_name)
        # print(output_line)
        output_readline.append(output_line)
    output_readline.sort()
    output_readline.insert(0, '--- | ---')
    output_readline.insert(0, '文件名 | 大小')
    for i in range(len(output_readline)):
        line = output_readline[i]
        if "]()" in line:
            line = line[1:].replace("]()", "")
        output_readline[i] = line

    # ================写入剪贴板================
    output_text = '\r\n'.join(output_readline)

    MyDef.WriteClip(output_text)
    print(MyDef.RunTime(start_time))
def Sampling(number=30):
    import time, MyDef, os

    start_time = time.time()  # 初始时间戳

    MD5_refer_dict, MD5_list = ReadMD5CSV()

    remain_sample = MD5_list
    refer_dict = {}
    while len(remain_sample) > 0:
        random_sample, remain_sample = RandomSample(remain_sample, number)
        random_sample_string = '|'.join(random_sample)
        print(random_sample_string)
        random_sample_string_MD5 = MyDef.HashMD5String(random_sample_string)
        print('方案MD5', random_sample_string_MD5)
        refer_dict[random_sample_string_MD5] = random_sample_string

    dropbox_path = '/Users/alicewish/Dropbox'
    refer_file_name = '漫画图源MD5随机分样表.csv'
    refer_file_path = os.path.join(dropbox_path, refer_file_name)
    MyDef.WriteDictB(refer_dict, refer_file_path)

    print(MyDef.RunTime(start_time))
Beispiel #8
0
magnet_list = tree.xpath('//a[@title="Download this torrent using magnet"]/@href')

count = len(title_list)
for i in range(count):
    output_line_in_list = [title_list[i], magnet_list[i]]
    output_line = ",".join(output_line_in_list)
    output_readline.append(output_line)
this_week_readline.append(magnet_list[0])
# ================写入文本================
text = '\r\n'.join(output_readline)
print(text)

f = open(output_file_path, 'w')
try:
    f.write(text)
finally:
    f.close()

info = "\r\n".join(this_week_readline)
print(info)
print(len(this_week_readline))
# ================写入剪贴板================
import pyperclip

pyperclip.copy(info)
spam = pyperclip.paste()



print("耗时:",MyDef.RunTime(start_time))
Beispiel #9
0
def ReadChrome(input_file_path):
    import time, re, MyDef
    from lxml import html

    start_time = time.time()  # 初始时间戳

    # ==============读取文本==============
    input_file_path = '/Users/alicewish/Dropbox/百度云 网盘-我的分享.htm'
    read_text = open(input_file_path, 'r').read()  # 读取文本
    tree = html.fromstring(read_text)

    # ==============读取文件名==============
    names = tree.xpath('//span[@node-type="name-text"]/@title')  # 列表存储
    all_name = '\r\n'.join(names)
    print(len(names))
    print(all_name)

    # ==============读取下载地址==============
    share_links = []
    links = tree.xpath('//a[@target="_blank"]/@href')  # 列表存储
    for link in links:
        # print(link)
        if re.match(r'https://pan.baidu.com/s/[^<]*', link):  # 判断是否度盘外链
            share_links.append(link)
    all_link = '\n'.join(share_links)
    print(len(share_links))
    print(all_link)

    # ==============读取分享时间和浏览、保存、下载次数==============
    raw_share_time = tree.xpath(
        '//div[@style="width: 20%"]/text()')  # 列表存储分享时间
    all_number = tree.xpath('//div[@style="width: 9%"]/text()')  # 列表存储各类次数

    share_time = []
    view_number = []
    save_number = []
    download_number = []

    for i in range(len(names)):
        share_time.append(raw_share_time[i + 1].strip(" \n\t\r"))
        view_number.append(all_number[3 * i +
                                      3].strip(" \n\t\r").strip("次"))  # 浏览次数
        save_number.append(all_number[3 * i +
                                      4].strip(" \n\t\r").strip("次"))  # 保存次数
        download_number.append(
            all_number[3 * i + 5].strip(" \n\t\r").strip("次"))  # 下载次数

    # ==============合并信息==============
    info_list = []
    refer_dict = {}
    if len(names) == len(share_links):
        for i in range(len(names)):
            info_line_in_list = [
                names[i], share_links[i], share_time[i], view_number[i],
                save_number[i], download_number[i]
            ]
            info_line = "\t".join(info_line_in_list)
            info_list.append(info_line)
            if len(names[i]) == 64:
                real_name = MyDef.HexShiftBack(names[i][:32])  # 重要
                refer_dict[real_name] = share_links[i]
    else:
        print("错误", len(names), len(share_links))
    all_info = '\n'.join(info_list)

    print(all_info)
    print(MyDef.RunTime(start_time))
    return refer_dict
Beispiel #10
0
def Processing():
    import time, jieba, re, MyDef

    start_time = time.time()  # 初始时间戳
    scenario_list_full = Training()

    # ======================================处理区开始======================================
    dict_file_path = '/Users/alicewish/我的坚果云/userdict.txt'  # 自定义词典路径

    cut_right_count = 0
    cut_wrong_count = 0

    # ========================输入区开始========================
    input_file_path = "/Users/alicewish/Downloads/my.md"
    # ================按行读取输入文本================
    read_text = open(input_file_path, 'r').read()  # 读取文本

    text_readline = read_text.replace("\nclass",
                                      "class").replace("...",
                                                       "…").splitlines()
    # print(text_readline)

    # ================按行读取文本:with open(更好)================
    status_readline = []  # 状态列表
    output_readline = []  # 输出列表

    jieba.load_userdict(dict_file_path)

    line_formmat_list_all = []

    for a in range(len(text_readline)):
        text_readline[a] = re.sub(r'<span.*</span>', '',
                                  text_readline[a])  # 去除span
        text_readline[a] = text_readline[a].replace('……', '…')

        markdown_line = text_readline[a].replace("\*", "の").replace(
            "\[", "[").replace("\]", "]")
        print(markdown_line)
        line_cut_list = markdown_line.split("*")
        print(line_cut_list)

        plain_line = markdown_line.replace("*", "").replace("の", "*")  # 调整*
        print(plain_line)

        line_formmat_list = []

        for j in range(len(plain_line)):
            line_formmat_list.append(0)
        # print(line_formmat_list)

        line_mark_count_list = []
        for k in range(len(plain_line) + 1):
            line_mark_count_list.append(0)

        point = 0
        for char in markdown_line:
            if char == '*':
                line_mark_count_list[point] = line_mark_count_list[point] + 1
            else:
                point = point + 1
        print(line_mark_count_list)

        pin = 0
        before = 0

        for seg in line_cut_list:
            if seg == '':
                pass
            else:
                last_pin = pin
                pin += len(seg)
                # print(line_mark_count_list[last_pin])
                # print(line_mark_count_list[pin])
                if last_pin > 0:
                    before = line_formmat_list[last_pin - 1]

                for l in range(last_pin, pin):
                    if before == 0:
                        line_formmat_list[
                            l] = before + line_mark_count_list[last_pin]
                    elif before == 1 and line_mark_count_list[last_pin] == 1:
                        line_formmat_list[
                            l] = before - line_mark_count_list[last_pin]
                    elif before == 1 and line_mark_count_list[last_pin] == 2:
                        line_formmat_list[
                            l] = before + line_mark_count_list[last_pin]
                    elif before == 2 and line_mark_count_list[last_pin] == 1:
                        line_formmat_list[
                            l] = before + line_mark_count_list[last_pin]
                    elif before == 2 and line_mark_count_list[last_pin] == 2:
                        line_formmat_list[
                            l] = before - line_mark_count_list[last_pin]
                    elif before == 3:
                        line_formmat_list[
                            l] = before - line_mark_count_list[last_pin]
                        # print(line_formmat_list[last_pin - 1])
        print(line_formmat_list)
        line_formmat_list_all.append(line_formmat_list)

        print(plain_line)
        print(len(plain_line))

        need_cut = True  # 需要切吗?
        if len(plain_line) == 0:
            pass
        elif len(plain_line) == 2 and re.match(r'[0-9][0-9]', plain_line):
            # 页码
            need_cut = False
        elif a < 8:
            # 首部
            need_cut = False
        elif len(plain_line) > 2:
            if plain_line[0] == '*' or plain_line[0] == '[':
                # 注释
                need_cut = False

        if plain_line == "":
            status = 0  # 空行
        elif not need_cut:
            status = -1  # 不需要切
        else:
            status = 1  # 待分词
        status_readline.append(status)
        if status == 1:
            # ================结巴分词================
            string_list = []
            seg_list = jieba.cut(plain_line)  # 默认是精确模式
            for word in seg_list:
                string_list.append(word)
            print(string_list)

            start_status = False
            for i in range(len(scenario_list_full)):
                scenario_line_full = scenario_list_full[i]
                if scenario_line_full[0:2] == str(len(plain_line)).zfill(2):
                    if start_status:
                        end_i = i
                    else:
                        start_i = i
                        start_status = True
                        end_i = i
            # ================进行切分================
            current_i = start_i

            cut_right = False
            while current_i <= end_i and not cut_right:
                current_cut = scenario_list_full[current_i]
                current_cut_list = current_cut[7:].split("-")  # 列表存储的切分方案
                # ================进行分词判断================
                line_can_cut_list = []

                for i in range(len(plain_line)):
                    line_can_cut_list.append(0)

                j = 0
                for string in string_list:
                    j = j + len(string)
                    # print(j)
                    line_can_cut_list[j - 1] = 1
                # ================对标点和语气词进行纠正================
                for i in range(len(plain_line)):
                    if plain_line[i] in ',.?!,。…?!”·-》>:】【]、':
                        # 这些之前不可切
                        line_can_cut_list[i - 1] = 0
                    elif plain_line[i] in '“《<【[':
                        # 这些之后不可切
                        line_can_cut_list[i] = 0
                    elif plain_line[
                            i] in '上中下内出完的地得了吗吧着个就前世里嘛图们来呗' and line_can_cut_list[
                                i - 1] == 1 and line_can_cut_list[i] == 1:
                        # 这些之前不可切
                        line_can_cut_list[i - 1] = 0
                    elif plain_line[i] in '太每帮跟另' and line_can_cut_list[
                            i - 1] == 1 and line_can_cut_list[i] == 1:
                        # 这些之后不可切
                        line_can_cut_list[i] = 0
                print(line_can_cut_list)
                print(current_cut_list)

                # ================判断方案正确与否================

                sum = 0
                cut_right = True
                for i in range(len(current_cut_list)):  # 切分
                    last_sum = sum
                    sum = sum + int(current_cut_list[i])
                    print(line_can_cut_list[sum - 1])
                    print(plain_line[last_sum:sum])

                    if line_can_cut_list[sum - 1] == 0:
                        cut_right = False
                print(cut_right)
                if not cut_right:
                    current_i += 1
            if cut_right:  # 切对了
                cut_right_count += 1
                sum = 0
                for i in range(len(current_cut_list)):
                    last_sum = sum
                    sum = sum + int(current_cut_list[i])
                    output_line = plain_line[last_sum:sum]
                    output_line_format_list = line_formmat_list_all[a][
                        last_sum:sum]
                    output_line_mark_count_list = []

                    format_list_for_use = [0] + output_line_format_list + [
                        0
                    ]  # 11
                    for b in range(len(format_list_for_use) - 1):
                        output_line_mark_count = abs(format_list_for_use[b +
                                                                         1] -
                                                     format_list_for_use[b])
                        output_line_mark_count_list.append(
                            output_line_mark_count)

                    print(output_line_mark_count_list)  # 10

                    output_markdown_line = ''

                    for c in range(len(output_line_mark_count_list) - 1):
                        for d in range(output_line_mark_count_list[c]):
                            output_markdown_line += '*'
                        output_markdown_line += output_line[c]

                    for d in range(output_line_mark_count_list[-1]):
                        output_markdown_line += '*'

                    output_readline.append(output_markdown_line)
                    print("格式", output_line_format_list)
            else:  # 切错了
                output_readline.append(text_readline[a])
                cut_wrong_count += 1
        elif status == 0:  # 不需要切
            output_readline.append('\n|\n')
        else:  # 不需要切
            output_readline.append(text_readline[a].replace("\[", "[").replace(
                "\]", "]"))

    print('切对', cut_right_count)
    print('待切', cut_wrong_count)

    # ================写入剪贴板================
    text = '\r\n'.join(output_readline)

    MyDef.WriteClip(text)
    print(MyDef.RunTime(start_time))