Ejemplo n.º 1
0
def AddDict(refer_dict):
    import requests, MyDef
    for k in range(15):

        header = {
            'User-Agent':
            'Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.8.1.14) Gecko/20080404 (FoxPlus) Firefox/2.0.0.14'
        }
        url = 'https://pan.baidu.com/wap/share/home?third=0&uk=2007334207&start=' + str(
            20 * (k + 1))
        page = requests.get(url=url, headers=header)
        # print(page.encoding)
        # print(page.headers)
        # print(page.cookies)
        # print(page.text)
        html = page.content.decode("utf", "ignore")
        # print(html)

        shareid_list = MyDef.ReFind(html, r'"shareid":"[0-9]{1,20}')

        print(shareid_list)
        print(len(shareid_list))

        title_list = MyDef.ReFind(html, r'"title":"[a-z]{64}')
        print(title_list)
        print(len(title_list))

        for i in range(len(shareid_list)):
            shareid = shareid_list[i].replace('"shareid":"', '')
            title = title_list[i].replace('"title":"', '')
            real_name = MyDef.HexShiftBack(title[:32])
            refer_dict[
                real_name] = 'https://pan.baidu.com/share/link?uk=2007334207&shareid=' + shareid
    return refer_dict
Ejemplo n.º 2
0
def BulidLinkB():
    import time, MyDef, os
    start_time = time.time()  # 初始时间戳
    refer_dict = ReadMD5SamplingCSV()
    MD5_refer_dict, MD5_list = ReadMD5CSV()
    yun_link_dict = ReadMobile()
    output_list = []
    # ========================主目录========================
    for key in refer_dict:
        if key in yun_link_dict:
            folder_name = MyDef.HexShift(key)
            yun_link = yun_link_dict[key]

            sample_list = refer_dict[key].split("|")
            print(sample_list)
            # ========================次级目录========================
            for sample in sample_list:  # 文件MD5
                file_path = MD5_refer_dict[sample]
                file_name = os.path.split(file_path)[1]

                file_folder_name = MyDef.HexShift(sample)
                file_link = yun_link + '#path=%252F' + folder_name + '%252F' + folder_name + file_folder_name
                print(file_name)
                print(file_link)
                path_list = [file_path, file_link]
                output_list.append(path_list)
    path = '/Users/alicewish/Dropbox/漫画图源度盘地址表.csv'
    MyDef.StoreCSV(output_list, path)
    print(len(output_list))
    print(MyDef.RunTime(start_time))
Ejemplo n.º 3
0
def UnDistribute():
    import time, MyDef, os, shutil
    start_time = time.time()  # 初始时间戳
    refer_dict = ReadMD5SamplingCSV()
    MD5_refer_dict, MD5_list = ReadMD5CSV()

    # ========================最外层目录========================
    new_file_dir = '/Volumes/Mack/Distribute'

    # ========================主目录========================
    for key in refer_dict:
        folder_name = MyDef.HexShift(key)
        new_folder_path = os.path.join(new_file_dir, folder_name)

        sample_list = refer_dict[key].split("|")
        print(sample_list)
        # ========================次级目录========================
        for sample in sample_list:  # 文件MD5
            file_path = MD5_refer_dict[sample]
            file_name = os.path.split(file_path)[1]
            print("旧", file_path)
            file_folder_name = MyDef.HexShift(sample)

            new_file_folder_path = os.path.join(new_file_dir, folder_name,
                                                folder_name + file_folder_name)

            new_file_path = os.path.join(new_file_folder_path, file_name)
            print("新", new_file_path)

            try:
                shutil.move(new_file_path, file_path)  # 移动文件或目录都是使用这条命令
            except:
                pass

    print(MyDef.RunTime(start_time))
Ejemplo n.º 4
0
def ChangeBack():
    import os, MyDef
    dropbox_path = '/Users/alicewish/Dropbox'
    refer_file_name = '漫画图源MD5表.csv'
    refer_file_path = os.path.join(dropbox_path, refer_file_name)
    text_readline = MyDef.ReadCSV(refer_file_path, type=False)
    for i in range(len(text_readline)):
        if '@k_k@' in text_readline[i][1]:
            print(text_readline[i][1])
            text_readline[i][1] = text_readline[i][1].replace('@k_k@', '')
    MyDef.StoreCSV(text_readline, refer_file_path)
def MD5Table(file_dir):
    import time, os, MyDef
    start_time = time.time()  # 初始时间戳
    dropbox_path = '/Users/alicewish/Dropbox'
    refer_file_name = '漫画图源MD5表.csv'
    refer_file_path = os.path.join(dropbox_path, refer_file_name)  # 词典文件的地址
    MD5_refer_dict = MyDef.ReadDictB(refer_file_path, True)
    # print(MD5_refer_dict)
    file_path_check_set = set()
    major_key_list = []
    output_list = []
    # ================读取文件夹内容================
    file_list = os.listdir(file_dir)  # 获得目录中的内容
    # print(file_list)

    for file_MD5 in MD5_refer_dict:
        file_path = MD5_refer_dict[file_MD5]
        file_path_check_set.add(file_path)

    for file_name in file_list:
        file_path = os.path.join(file_dir, file_name)
        if file_path in file_path_check_set:
            pass
        else:
            # file_MD5 = MyDef.HashMD5File(file_path)
            file_MD5 = MyDef.md5sum(file_path)
            print(file_MD5, file_name)
            MD5_refer_dict[file_MD5] = file_path

    for file_MD5 in MD5_refer_dict:
        file_path = MD5_refer_dict[file_MD5]
        file_name = os.path.split(file_path)[1]
        major_key = file_name + file_MD5
        major_key_list.append(major_key)

    major_key_list.sort()
    for major_key in major_key_list:
        file_MD5 = major_key[-32:]
        info_list = [file_MD5, MD5_refer_dict[file_MD5]]
        output_list.append(info_list)
    head_info = ['MD5', '路径']
    MyDef.StoreCSV(output_list, refer_file_path, head_info)
    print(MyDef.RunTime(start_time))
def FileWithLink():
    import time, os, MyDef

    start_time = time.time()  # 初始时间戳
    # ========================输入区开始========================
    output_readline = []

    refer_file_path = '/Users/alicewish/Dropbox/漫画图源MD5表.csv'
    MD5_dict = MyDef.ReadDictC(refer_file_path, True)
    yun_link_file_path = '/Users/alicewish/Dropbox/漫画图源度盘地址表.csv'
    yun_link_dict = MyDef.ReadDictB(yun_link_file_path)

    file_path_list = []
    for key in MD5_dict:
        file_path = MD5_dict[key]
        file_size = os.path.getsize(file_path)
        readable_file_size = sizeof_fmt(file_size)
        file_name = os.path.split(file_path)[1]
        file_path_list.append(file_path)

        if file_path in yun_link_dict:
            yun_link = yun_link_dict[file_path]
            output_line = "[" + file_name + "](" + yun_link + ") | " + readable_file_size
        else:
            output_line = "[" + file_name + "]() | " + readable_file_size
            print(file_name)
        # print(output_line)
        output_readline.append(output_line)
    output_readline.sort()
    output_readline.insert(0, '--- | ---')
    output_readline.insert(0, '文件名 | 大小')
    for i in range(len(output_readline)):
        line = output_readline[i]
        if "]()" in line:
            line = line[1:].replace("]()", "")
        output_readline[i] = line

    # ================写入剪贴板================
    output_text = '\r\n'.join(output_readline)

    MyDef.WriteClip(output_text)
    print(MyDef.RunTime(start_time))
def ReadMD5CSV():
    import os, MyDef
    dropbox_path = '/Users/alicewish/Dropbox'
    refer_file_name = '漫画图源MD5表.csv'
    refer_file_path = os.path.join(dropbox_path, refer_file_name)  # 词典文件的地址
    MD5_refer_dict = MyDef.ReadDictB(refer_file_path, True)

    MD5_list = []
    for file_MD5 in MD5_refer_dict:
        MD5_list.append(file_MD5)
    return (MD5_refer_dict, MD5_list)
def ReadMobile():
    import time, requests, MyDef

    start_time = time.time()  # 初始时间戳
    refer_dict = {}

    header = {
        'User-Agent':
        'Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.8.1.14) Gecko/20080404 (FoxPlus) Firefox/2.0.0.14'
    }
    url = 'https://pan.baidu.com/wap/share/home?uk=2007334207&third=0'
    page = requests.get(url=url, headers=header)
    # print(page.encoding)
    # print(page.headers)
    # print(page.cookies)
    # print(page.text)
    html = page.content.decode("utf", "ignore")
    # print(html)

    shareid_list = MyDef.ReFind(html, r'"shareid":"[0-9]{1,20}')

    print(shareid_list)
    print(len(shareid_list))

    title_list = MyDef.ReFind(html, r'"title":"[a-z]{64}')
    print(title_list)
    print(len(title_list))

    for i in range(len(shareid_list)):
        shareid = shareid_list[i].replace('"shareid":"', '')
        title = title_list[i].replace('"title":"', '')
        refer_dict[
            title] = 'https://pan.baidu.com/share/link?uk=2007334207&shareid=' + shareid

    refer_dict = AddDict(refer_dict)

    # ================运行时间计时================
    print(MyDef.RunTime(start_time))
    print(refer_dict)
    print(len(refer_dict))
    return refer_dict
def Sampling(number=30):
    import time, MyDef, os

    start_time = time.time()  # 初始时间戳

    MD5_refer_dict, MD5_list = ReadMD5CSV()

    remain_sample = MD5_list
    refer_dict = {}
    while len(remain_sample) > 0:
        random_sample, remain_sample = RandomSample(remain_sample, number)
        random_sample_string = '|'.join(random_sample)
        print(random_sample_string)
        random_sample_string_MD5 = MyDef.HashMD5String(random_sample_string)
        print('方案MD5', random_sample_string_MD5)
        refer_dict[random_sample_string_MD5] = random_sample_string

    dropbox_path = '/Users/alicewish/Dropbox'
    refer_file_name = '漫画图源MD5随机分样表.csv'
    refer_file_path = os.path.join(dropbox_path, refer_file_name)
    MyDef.WriteDictB(refer_dict, refer_file_path)

    print(MyDef.RunTime(start_time))
def MD5Table(file_dir):
    import time, os, MyDef
    start_time = time.time()  # 初始时间戳
    dropbox_path = '/Users/alicewish/Dropbox'
    refer_file_name = '漫画图源MD5表.csv'
    refer_file_path = os.path.join(dropbox_path, refer_file_name)  # 词典文件的地址
    MD5_refer_dict = MyDef.ReadDictB(refer_file_path, True)
    # print(MD5_refer_dict)
    file_path_check_set = set()
    major_key_list = []
    output_list = []
    # ================读取文件夹内容================
    file_list = os.listdir(file_dir)  # 获得目录中的内容
    # print(file_list)

    for file_MD5 in MD5_refer_dict:
        file_path = MD5_refer_dict[file_MD5]
        file_path_check_set.add(file_path)

    for file_name in file_list:
        file_path = os.path.join(file_dir, file_name)
        if file_path in file_path_check_set:
            pass
        else:
            # file_MD5 = MyDef.HashMD5File(file_path)
            file_MD5 = MyDef.md5sum(file_path)
            print(file_MD5, file_name)
            MD5_refer_dict[file_MD5] = file_path

    for file_MD5 in MD5_refer_dict:
        file_path = MD5_refer_dict[file_MD5]
        file_name = os.path.split(file_path)[1]
        major_key = file_name + file_MD5
        major_key_list.append(major_key)

    major_key_list.sort()
    for major_key in major_key_list:
        file_MD5 = major_key[-32:]
        info_list = [file_MD5, MD5_refer_dict[file_MD5]]
        output_list.append(info_list)
    head_info = ['MD5', '路径']
    MyDef.StoreCSV(output_list, refer_file_path, head_info)
    print(MyDef.RunTime(start_time))
def Distribute():
    import time, MyDef, os, shutil
    start_time = time.time()  # 初始时间戳
    refer_dict = ReadMD5SamplingCSV()
    MD5_refer_dict, MD5_list = ReadMD5CSV()
    new_file_dir = '/Volumes/Mack/Distribute'
    try:
        os.mkdir(new_file_dir)  # 创建目录
    except:
        pass
    for key in refer_dict:
        new_folder_path = os.path.join(new_file_dir, key)

        try:
            os.mkdir(new_folder_path)  # 创建目录
        except:
            pass
        sample_list = refer_dict[key].split("|")
        print(sample_list)
        for sample in sample_list:
            file_path = MD5_refer_dict[sample]
            file_name = os.path.split(file_path)[1]
            print("旧", file_path)
            new_file_path = os.path.join(new_folder_path, sample, file_name)
            print("新", new_file_path)

            new_file_folder_path = os.path.join(new_file_dir, key, sample)

            try:
                os.mkdir(new_file_folder_path)  # 创建目录
            except:
                pass

            try:
                shutil.move(file_path, new_file_path)  # 移动文件或目录都是使用这条命令
            except:
                pass

    print(MyDef.RunTime(start_time))
Ejemplo n.º 12
0
magnet_list = tree.xpath('//a[@title="Download this torrent using magnet"]/@href')

count = len(title_list)
for i in range(count):
    output_line_in_list = [title_list[i], magnet_list[i]]
    output_line = ",".join(output_line_in_list)
    output_readline.append(output_line)
this_week_readline.append(magnet_list[0])
# ================写入文本================
text = '\r\n'.join(output_readline)
print(text)

f = open(output_file_path, 'w')
try:
    f.write(text)
finally:
    f.close()

info = "\r\n".join(this_week_readline)
print(info)
print(len(this_week_readline))
# ================写入剪贴板================
import pyperclip

pyperclip.copy(info)
spam = pyperclip.paste()



print("耗时:",MyDef.RunTime(start_time))
Ejemplo n.º 13
0
def ReadChrome(input_file_path):
    import time, re, MyDef
    from lxml import html

    start_time = time.time()  # 初始时间戳

    # ==============读取文本==============
    input_file_path = '/Users/alicewish/Dropbox/百度云 网盘-我的分享.htm'
    read_text = open(input_file_path, 'r').read()  # 读取文本
    tree = html.fromstring(read_text)

    # ==============读取文件名==============
    names = tree.xpath('//span[@node-type="name-text"]/@title')  # 列表存储
    all_name = '\r\n'.join(names)
    print(len(names))
    print(all_name)

    # ==============读取下载地址==============
    share_links = []
    links = tree.xpath('//a[@target="_blank"]/@href')  # 列表存储
    for link in links:
        # print(link)
        if re.match(r'https://pan.baidu.com/s/[^<]*', link):  # 判断是否度盘外链
            share_links.append(link)
    all_link = '\n'.join(share_links)
    print(len(share_links))
    print(all_link)

    # ==============读取分享时间和浏览、保存、下载次数==============
    raw_share_time = tree.xpath(
        '//div[@style="width: 20%"]/text()')  # 列表存储分享时间
    all_number = tree.xpath('//div[@style="width: 9%"]/text()')  # 列表存储各类次数

    share_time = []
    view_number = []
    save_number = []
    download_number = []

    for i in range(len(names)):
        share_time.append(raw_share_time[i + 1].strip(" \n\t\r"))
        view_number.append(all_number[3 * i +
                                      3].strip(" \n\t\r").strip("次"))  # 浏览次数
        save_number.append(all_number[3 * i +
                                      4].strip(" \n\t\r").strip("次"))  # 保存次数
        download_number.append(
            all_number[3 * i + 5].strip(" \n\t\r").strip("次"))  # 下载次数

    # ==============合并信息==============
    info_list = []
    refer_dict = {}
    if len(names) == len(share_links):
        for i in range(len(names)):
            info_line_in_list = [
                names[i], share_links[i], share_time[i], view_number[i],
                save_number[i], download_number[i]
            ]
            info_line = "\t".join(info_line_in_list)
            info_list.append(info_line)
            if len(names[i]) == 64:
                real_name = MyDef.HexShiftBack(names[i][:32])  # 重要
                refer_dict[real_name] = share_links[i]
    else:
        print("错误", len(names), len(share_links))
    all_info = '\n'.join(info_list)

    print(all_info)
    print(MyDef.RunTime(start_time))
    return refer_dict
import time, re, MyDef

start_time = time.time()  # 初始时间戳
now = time.strftime("%Y%m%d", time.localtime())  # 当前日期戳
# ========================输入区开始========================

# ================读取剪贴板================
text_readline = MyDef.ReadClipL()

# ========================处理文本========================

output_readline = []  # 初始化信息列表

for i in range(len(text_readline)):
    line = text_readline[i]
    output_line = line
    level = 1
    if re.match(r'第\w部分', line):
        level = 1
    elif re.match(r'第[0-9]{1,}章', line):
        level = 2
    elif re.match(r'[0-9]{1,2}\.[0-9]{1,2}', line):
        level = 3
    else:
        level = 3
    output_line = (level - 1) * "  " + "- " + line
    output_readline.append(output_line)

# ================写入昵称列表================
text = '\r\n'.join(output_readline)  # 写入文本
print(text)
Ejemplo n.º 15
0
def Processing():
    import time, jieba, re, MyDef

    start_time = time.time()  # 初始时间戳
    scenario_list_full = Training()

    # ======================================处理区开始======================================
    dict_file_path = '/Users/alicewish/我的坚果云/userdict.txt'  # 自定义词典路径

    cut_right_count = 0
    cut_wrong_count = 0

    # ========================输入区开始========================
    input_file_path = "/Users/alicewish/Downloads/my.md"
    # ================按行读取输入文本================
    read_text = open(input_file_path, 'r').read()  # 读取文本

    text_readline = read_text.replace("\nclass",
                                      "class").replace("...",
                                                       "…").splitlines()
    # print(text_readline)

    # ================按行读取文本:with open(更好)================
    status_readline = []  # 状态列表
    output_readline = []  # 输出列表

    jieba.load_userdict(dict_file_path)

    line_formmat_list_all = []

    for a in range(len(text_readline)):
        text_readline[a] = re.sub(r'<span.*</span>', '',
                                  text_readline[a])  # 去除span
        text_readline[a] = text_readline[a].replace('……', '…')

        markdown_line = text_readline[a].replace("\*", "の").replace(
            "\[", "[").replace("\]", "]")
        print(markdown_line)
        line_cut_list = markdown_line.split("*")
        print(line_cut_list)

        plain_line = markdown_line.replace("*", "").replace("の", "*")  # 调整*
        print(plain_line)

        line_formmat_list = []

        for j in range(len(plain_line)):
            line_formmat_list.append(0)
        # print(line_formmat_list)

        line_mark_count_list = []
        for k in range(len(plain_line) + 1):
            line_mark_count_list.append(0)

        point = 0
        for char in markdown_line:
            if char == '*':
                line_mark_count_list[point] = line_mark_count_list[point] + 1
            else:
                point = point + 1
        print(line_mark_count_list)

        pin = 0
        before = 0

        for seg in line_cut_list:
            if seg == '':
                pass
            else:
                last_pin = pin
                pin += len(seg)
                # print(line_mark_count_list[last_pin])
                # print(line_mark_count_list[pin])
                if last_pin > 0:
                    before = line_formmat_list[last_pin - 1]

                for l in range(last_pin, pin):
                    if before == 0:
                        line_formmat_list[
                            l] = before + line_mark_count_list[last_pin]
                    elif before == 1 and line_mark_count_list[last_pin] == 1:
                        line_formmat_list[
                            l] = before - line_mark_count_list[last_pin]
                    elif before == 1 and line_mark_count_list[last_pin] == 2:
                        line_formmat_list[
                            l] = before + line_mark_count_list[last_pin]
                    elif before == 2 and line_mark_count_list[last_pin] == 1:
                        line_formmat_list[
                            l] = before + line_mark_count_list[last_pin]
                    elif before == 2 and line_mark_count_list[last_pin] == 2:
                        line_formmat_list[
                            l] = before - line_mark_count_list[last_pin]
                    elif before == 3:
                        line_formmat_list[
                            l] = before - line_mark_count_list[last_pin]
                        # print(line_formmat_list[last_pin - 1])
        print(line_formmat_list)
        line_formmat_list_all.append(line_formmat_list)

        print(plain_line)
        print(len(plain_line))

        need_cut = True  # 需要切吗?
        if len(plain_line) == 0:
            pass
        elif len(plain_line) == 2 and re.match(r'[0-9][0-9]', plain_line):
            # 页码
            need_cut = False
        elif a < 8:
            # 首部
            need_cut = False
        elif len(plain_line) > 2:
            if plain_line[0] == '*' or plain_line[0] == '[':
                # 注释
                need_cut = False

        if plain_line == "":
            status = 0  # 空行
        elif not need_cut:
            status = -1  # 不需要切
        else:
            status = 1  # 待分词
        status_readline.append(status)
        if status == 1:
            # ================结巴分词================
            string_list = []
            seg_list = jieba.cut(plain_line)  # 默认是精确模式
            for word in seg_list:
                string_list.append(word)
            print(string_list)

            start_status = False
            for i in range(len(scenario_list_full)):
                scenario_line_full = scenario_list_full[i]
                if scenario_line_full[0:2] == str(len(plain_line)).zfill(2):
                    if start_status:
                        end_i = i
                    else:
                        start_i = i
                        start_status = True
                        end_i = i
            # ================进行切分================
            current_i = start_i

            cut_right = False
            while current_i <= end_i and not cut_right:
                current_cut = scenario_list_full[current_i]
                current_cut_list = current_cut[7:].split("-")  # 列表存储的切分方案
                # ================进行分词判断================
                line_can_cut_list = []

                for i in range(len(plain_line)):
                    line_can_cut_list.append(0)

                j = 0
                for string in string_list:
                    j = j + len(string)
                    # print(j)
                    line_can_cut_list[j - 1] = 1
                # ================对标点和语气词进行纠正================
                for i in range(len(plain_line)):
                    if plain_line[i] in ',.?!,。…?!”·-》>:】【]、':
                        # 这些之前不可切
                        line_can_cut_list[i - 1] = 0
                    elif plain_line[i] in '“《<【[':
                        # 这些之后不可切
                        line_can_cut_list[i] = 0
                    elif plain_line[
                            i] in '上中下内出完的地得了吗吧着个就前世里嘛图们来呗' and line_can_cut_list[
                                i - 1] == 1 and line_can_cut_list[i] == 1:
                        # 这些之前不可切
                        line_can_cut_list[i - 1] = 0
                    elif plain_line[i] in '太每帮跟另' and line_can_cut_list[
                            i - 1] == 1 and line_can_cut_list[i] == 1:
                        # 这些之后不可切
                        line_can_cut_list[i] = 0
                print(line_can_cut_list)
                print(current_cut_list)

                # ================判断方案正确与否================

                sum = 0
                cut_right = True
                for i in range(len(current_cut_list)):  # 切分
                    last_sum = sum
                    sum = sum + int(current_cut_list[i])
                    print(line_can_cut_list[sum - 1])
                    print(plain_line[last_sum:sum])

                    if line_can_cut_list[sum - 1] == 0:
                        cut_right = False
                print(cut_right)
                if not cut_right:
                    current_i += 1
            if cut_right:  # 切对了
                cut_right_count += 1
                sum = 0
                for i in range(len(current_cut_list)):
                    last_sum = sum
                    sum = sum + int(current_cut_list[i])
                    output_line = plain_line[last_sum:sum]
                    output_line_format_list = line_formmat_list_all[a][
                        last_sum:sum]
                    output_line_mark_count_list = []

                    format_list_for_use = [0] + output_line_format_list + [
                        0
                    ]  # 11
                    for b in range(len(format_list_for_use) - 1):
                        output_line_mark_count = abs(format_list_for_use[b +
                                                                         1] -
                                                     format_list_for_use[b])
                        output_line_mark_count_list.append(
                            output_line_mark_count)

                    print(output_line_mark_count_list)  # 10

                    output_markdown_line = ''

                    for c in range(len(output_line_mark_count_list) - 1):
                        for d in range(output_line_mark_count_list[c]):
                            output_markdown_line += '*'
                        output_markdown_line += output_line[c]

                    for d in range(output_line_mark_count_list[-1]):
                        output_markdown_line += '*'

                    output_readline.append(output_markdown_line)
                    print("格式", output_line_format_list)
            else:  # 切错了
                output_readline.append(text_readline[a])
                cut_wrong_count += 1
        elif status == 0:  # 不需要切
            output_readline.append('\n|\n')
        else:  # 不需要切
            output_readline.append(text_readline[a].replace("\[", "[").replace(
                "\]", "]"))

    print('切对', cut_right_count)
    print('待切', cut_wrong_count)

    # ================写入剪贴板================
    text = '\r\n'.join(output_readline)

    MyDef.WriteClip(text)
    print(MyDef.RunTime(start_time))
all_count = len(output_readline) - 1
print(all_count)
text = '\r\n'.join(output_readline)
# print(text)

output_file_name = '0 Day Week文件地址-墨问非名制作-' + now_date + '(' + str(
    all_count) + ').csv'

output_file_path = os.path.join(dropbox_path, output_file_name)
f = open(output_file_path, 'w')
try:
    f.write(text)
finally:
    f.close()

# ================写入剪贴板================
markdown_text = '\r\n'.join(markdown_readline)

MyDef.WriteClip(markdown_text)

# ================运行时间计时================
run_time = time.time() - start_time
if run_time < 60:  # 秒(两位小数)
    print("耗时:{:.2f}秒".format(run_time))
elif run_time < 3600:  # 分+秒(取整)
    print("耗时:{:.0f}分{:.0f}秒".format(run_time // 60, run_time % 60))
else:  # 时分秒取整
    print("耗时:{:.0f}时{:.0f}分{:.0f}秒".format(run_time // 3600,
                                            run_time % 3600 // 60,
                                            run_time % 60))
def ComixIssue(search_comic_name="Transformers"):
    from lxml import html
    import requests, time, re
    # ========================输入区开始========================
    save_comic_name = search_comic_name.replace(":", "").replace(
        "/", "").replace("&", "").replace("  ", " ")
    key_title = search_comic_name.replace(" ", "-")
    print(key_title)
    url_prefix = 'https://www.comixology.com/search?search='
    comic_url = url_prefix + search_comic_name  # 完整的查询网址
    # ========================执行区开始========================
    page = requests.get(comic_url)  # 获取网页信息
    tree = html.fromstring(page.text)  # 构筑查询用树
    # ====================找到系列====================
    all_url = tree.xpath('//a[@class="content-details"]/@href')
    print(len(all_url))

    issues_url = []  # 每期网址
    check_set = set()  # 重复检查
    info_dict = {}
    alter_info_dict = {}
    major_key_list = []
    for i in range(len(all_url)):
        entry_start_time = time.time()
        print(i)
        print(all_url[i])
        if re.match(r'.*/digital-comic/[^?]*',
                    all_url[i]) and key_title in all_url[i]:
            matches = re.match(r'.*/digital-comic/[^?]*', all_url[i])
            short_link = matches.group(0)
            if short_link not in check_set:  # 尚未读取过
                check_set.add(short_link)
                print("获取中……")
                issues_url.append(short_link)
                # ========================执行区开始========================
                page = requests.get(short_link)  # 获取网页信息
                tree = html.fromstring(page.text)  # 构筑查询用树
                # ====================关键词列表====================
                key_word_list = [
                    "Written by", "Art by", "Pencils", "Inks", "Colored by",
                    "Cover by", "Genres", "Digital Release Date",
                    "Print Release Date", "Page Count", "Age Rating",
                    "Sold by", "About Book"
                ]
                # ====================标题====================
                title = tree.xpath('//h1[@class="title"]/text()')[0]
                # ====================简介====================
                raw_description = tree.xpath(
                    '//section[@class="item-description"]/text()')  # 列表
                description = "".join(raw_description)
                formatted_description = description.strip("\n\t").replace(
                    "\r\n", "|")
                formatted_description = formatted_description.replace(
                    "\r", "|").replace("\n", "|")
                # ====================创作信息====================
                credit_list = []
                raw_credits = tree.xpath(
                    '//div[@class="credits"]//*/text()')  # 列表
                for i in range(len(raw_credits)):
                    credit_line = raw_credits[i].strip("\t\n")
                    if credit_line != "" and credit_line != "HIDE...":
                        credit_list.append(credit_line)
                credit = "\n".join(credit_list)

                # ====================评价数====================
                rating_count = ""
                try:
                    review_count = tree.xpath(
                        '//div[@itemprop="reviewCount"]/text()')[0]
                    rating_count = review_count.replace(
                        "Average Rating (", "").replace("):", "")
                except:
                    pass
                # ====================价格====================
                price = ""
                try:
                    price = tree.xpath('//h5[@class="item-price"]/text()')[0]
                except:
                    pass
                # ====================封面====================
                cover_image_url = ""
                try:
                    cover_image_url = tree.xpath(
                        '//img[@class="cover"]/@src')[0]
                except:
                    pass
                # ====================编剧====================
                writer = ""
                item = "Written by"
                if item in credit_list:
                    item_index = credit_list.index(item)
                    temp_store = credit_list[item_index + 1]
                    while credit_list[item_index + 2] not in key_word_list:
                        item_index += 1
                        temp_store = temp_store + "|" + credit_list[item_index
                                                                    + 1]
                    writer = temp_store
                # ====================画师====================
                artist = ""
                item = "Art by"
                if item in credit_list:
                    item_index = credit_list.index(item)
                    temp_store = credit_list[item_index + 1]
                    while credit_list[item_index + 2] not in key_word_list:
                        item_index += 1
                        temp_store = temp_store + "|" + credit_list[item_index
                                                                    + 1]
                    artist = temp_store
                # ====================铅稿====================
                penciller = ""
                item = "Pencils"
                if item in credit_list:
                    item_index = credit_list.index(item)
                    temp_store = credit_list[item_index + 1]
                    while credit_list[item_index + 2] not in key_word_list:
                        item_index += 1
                        temp_store = temp_store + "|" + credit_list[item_index
                                                                    + 1]
                    penciller = temp_store
                # ====================墨线====================
                inker = ""
                item = "Inks"
                if item in credit_list:
                    item_index = credit_list.index(item)
                    temp_store = credit_list[item_index + 1]
                    while credit_list[item_index + 2] not in key_word_list:
                        item_index += 1
                        temp_store = temp_store + "|" + credit_list[item_index
                                                                    + 1]
                    inker = temp_store
                # ====================上色====================
                colorist = ""
                item = "Colored by"
                if item in credit_list:
                    item_index = credit_list.index(item)
                    temp_store = credit_list[item_index + 1]
                    while credit_list[item_index + 2] not in key_word_list:
                        item_index += 1
                        temp_store = temp_store + "|" + credit_list[item_index
                                                                    + 1]
                    colorist = temp_store
                # ====================填字====================
                letterer = ""
                item = "Lettered by"
                if item in credit_list:
                    item_index = credit_list.index(item)
                    temp_store = credit_list[item_index + 1]
                    while credit_list[item_index + 2] not in key_word_list:
                        item_index += 1
                        temp_store = temp_store + "|" + credit_list[item_index
                                                                    + 1]
                    letterer = temp_store
                # ====================封面====================
                cover_artist = ""
                item = "Cover by"
                if item in credit_list:
                    item_index = credit_list.index(item)
                    temp_store = credit_list[item_index + 1]
                    while credit_list[item_index + 2] not in key_word_list:
                        item_index += 1
                        temp_store = temp_store + "|" + credit_list[item_index
                                                                    + 1]
                    cover_artist = temp_store
                # ====================类型====================
                genres = ""
                item = "Genres"
                if item in credit_list:
                    item_index = credit_list.index(item)
                    temp_store = credit_list[item_index + 1]
                    while credit_list[item_index + 2] not in key_word_list:
                        item_index += 1
                        temp_store = temp_store + "|" + credit_list[item_index
                                                                    + 1]
                    genres = temp_store
                # ====================故事线====================
                story_arc = ""
                item = "Story Arc"
                if item in credit_list:
                    item_index = credit_list.index(item)
                    temp_store = credit_list[item_index + 1]
                    while credit_list[item_index + 2] not in key_word_list:
                        item_index += 1
                        temp_store = temp_store + "|" + credit_list[item_index
                                                                    + 1]
                    story_arc = temp_store
                # ====================数字出版日期====================
                digital_release_date = ""
                item = "Digital Release Date"
                if item in credit_list:
                    time_string = credit_list[credit_list.index(item) + 1]
                    time_convert = time.strptime(time_string, "%B %d %Y")
                    digital_release_date = time.strftime(
                        "%Y-%m-%d", time_convert)
                # ====================实体出版日期====================
                print_release_date = ""
                item = "Print Release Date"
                if item in credit_list:
                    time_string = credit_list[credit_list.index(item) + 1]
                    time_convert = time.strptime(time_string, "%B %d %Y")
                    print_release_date = time.strftime("%Y-%m-%d",
                                                       time_convert)
                # ====================页数====================
                page_count = ""
                item = "Page Count"
                if item in credit_list:
                    page_count = (credit_list[credit_list.index(item) +
                                              1]).replace(" Pages", "")
                # ====================年龄评级====================
                age_rating = ""
                item = "Age Rating"
                if item in credit_list:
                    age_rating = (credit_list[credit_list.index(item) +
                                              1]).replace(" Only", "")
                # ====================出版公司====================
                publisher = ""
                item = "Sold by"
                if item in credit_list:
                    publisher = credit_list[credit_list.index(item) + 1]

                # ====================输出区开始====================
                line_info = [
                    title, digital_release_date, print_release_date, price,
                    page_count, age_rating, rating_count, publisher, genres,
                    story_arc, writer, artist, penciller, inker, colorist,
                    letterer, cover_artist, short_link, cover_image_url,
                    formatted_description
                ]
                this_line = "\t".join(line_info)  # 行信息合并
                print(this_line)

                major_key = digital_release_date + title  # "日期+标题"作为主键
                major_key_list.append(major_key)
                info_dict[major_key] = line_info
                major_key_list.sort()  # 主键表排序
                text_list = []
                for key in major_key_list:
                    text_list.append(info_dict[key])

                # ================写入TXT================
                txt_file_path = '/Users/alicewish/Dropbox/Comixology刊物' + save_comic_name + '.csv'  # TXT文件名
                head_info = [
                    "标题", "数字出版日期", "实体出版日期", "价格", "页数", "分级", "评价数", "出版商",
                    "类型", "故事线", "编剧", "画师", "铅笔稿", "墨线", "上色师", "填字员", "封面画师",
                    "短链", "封面图地址", "简介"
                ]
                MyDef.StoreCSV(text_list, txt_file_path, head_info)

                # ====================次级输出区开始====================
                alter_line_info = ["### " + title, formatted_description]
                alter_line = "\r\n".join(alter_line_info)  # 行信息合并
                print(alter_line)

                alter_info_dict[major_key] = alter_line
                major_key_list.sort()  # 主键表排序
                alter_text_list = []
                for key in major_key_list:
                    alter_text_list.append(alter_info_dict[key])

                alter_text = "\r\n".join(alter_text_list)
                # ================写入TXT================
                alter_txt_file_path = '/Users/alicewish/我的坚果云/Comixology简介' + save_comic_name + '.txt'  # TXT文件名
                f = open(alter_txt_file_path, 'w')
                try:
                    f.write(alter_text)
                finally:
                    f.close()

                entry_run_time = time.time() - entry_start_time
                print("耗时:{:.2f}秒".format(entry_run_time))

    # ========================输出区开始========================
    print("总共" + str(len(issues_url)) + "期")
Ejemplo n.º 18
0
def ReadMD5SamplingCSV():
    import os, MyDef
    dropbox_path = '/Users/alicewish/Dropbox'
    refer_file_name = '漫画图源MD5随机分样表.csv'
    refer_file_path = os.path.join(dropbox_path, refer_file_name)
    return MyDef.ReadDictB(refer_file_path)
Ejemplo n.º 19
0
    output_readline.append(output_line)

this_week_readline.append(magnet_list[0])
this_week_readline.append(magnet_list[1])
# ================写入文本================
text = '\r\n'.join(output_readline)
print(text)

f = open(output_file_path, 'w')
try:
    f.write(text)
finally:
    f.close()

info = "\r\n".join(this_week_readline)
print(info)
print(len(this_week_readline))

MyDef.Clipboard(info)

# ================运行时间计时================
run_time = time.time() - start_time
if run_time < 60:  # 两位小数的秒
    print("耗时:{:.2f}秒".format(run_time))
elif run_time < 3600:  # 分秒取整
    print("耗时:{:.0f}分{:.0f}秒".format(run_time // 60, run_time % 60))
else:  # 时分秒取整
    print("耗时:{:.0f}时{:.0f}分{:.0f}秒".format(run_time // 3600,
                                            run_time % 3600 // 60,
                                            run_time % 60))