Beispiel #1
0
def AddDict(refer_dict):
    import requests, MyDef
    for k in range(15):

        header = {
            'User-Agent':
            'Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.8.1.14) Gecko/20080404 (FoxPlus) Firefox/2.0.0.14'
        }
        url = 'https://pan.baidu.com/wap/share/home?third=0&uk=2007334207&start=' + str(
            20 * (k + 1))
        page = requests.get(url=url, headers=header)
        # print(page.encoding)
        # print(page.headers)
        # print(page.cookies)
        # print(page.text)
        html = page.content.decode("utf", "ignore")
        # print(html)

        shareid_list = MyDef.ReFind(html, r'"shareid":"[0-9]{1,20}')

        print(shareid_list)
        print(len(shareid_list))

        title_list = MyDef.ReFind(html, r'"title":"[a-z]{64}')
        print(title_list)
        print(len(title_list))

        for i in range(len(shareid_list)):
            shareid = shareid_list[i].replace('"shareid":"', '')
            title = title_list[i].replace('"title":"', '')
            real_name = MyDef.HexShiftBack(title[:32])
            refer_dict[
                real_name] = 'https://pan.baidu.com/share/link?uk=2007334207&shareid=' + shareid
    return refer_dict
Beispiel #2
0
def ReadChrome(input_file_path):
    import time, re, MyDef
    from lxml import html

    start_time = time.time()  # 初始时间戳

    # ==============读取文本==============
    input_file_path = '/Users/alicewish/Dropbox/百度云 网盘-我的分享.htm'
    read_text = open(input_file_path, 'r').read()  # 读取文本
    tree = html.fromstring(read_text)

    # ==============读取文件名==============
    names = tree.xpath('//span[@node-type="name-text"]/@title')  # 列表存储
    all_name = '\r\n'.join(names)
    print(len(names))
    print(all_name)

    # ==============读取下载地址==============
    share_links = []
    links = tree.xpath('//a[@target="_blank"]/@href')  # 列表存储
    for link in links:
        # print(link)
        if re.match(r'https://pan.baidu.com/s/[^<]*', link):  # 判断是否度盘外链
            share_links.append(link)
    all_link = '\n'.join(share_links)
    print(len(share_links))
    print(all_link)

    # ==============读取分享时间和浏览、保存、下载次数==============
    raw_share_time = tree.xpath(
        '//div[@style="width: 20%"]/text()')  # 列表存储分享时间
    all_number = tree.xpath('//div[@style="width: 9%"]/text()')  # 列表存储各类次数

    share_time = []
    view_number = []
    save_number = []
    download_number = []

    for i in range(len(names)):
        share_time.append(raw_share_time[i + 1].strip(" \n\t\r"))
        view_number.append(all_number[3 * i +
                                      3].strip(" \n\t\r").strip("次"))  # 浏览次数
        save_number.append(all_number[3 * i +
                                      4].strip(" \n\t\r").strip("次"))  # 保存次数
        download_number.append(
            all_number[3 * i + 5].strip(" \n\t\r").strip("次"))  # 下载次数

    # ==============合并信息==============
    info_list = []
    refer_dict = {}
    if len(names) == len(share_links):
        for i in range(len(names)):
            info_line_in_list = [
                names[i], share_links[i], share_time[i], view_number[i],
                save_number[i], download_number[i]
            ]
            info_line = "\t".join(info_line_in_list)
            info_list.append(info_line)
            if len(names[i]) == 64:
                real_name = MyDef.HexShiftBack(names[i][:32])  # 重要
                refer_dict[real_name] = share_links[i]
    else:
        print("错误", len(names), len(share_links))
    all_info = '\n'.join(info_list)

    print(all_info)
    print(MyDef.RunTime(start_time))
    return refer_dict