Esempio n. 1
0
def VisitPage(photo_hash, download_folder, proxy_ip):
    folder_name = out_dir + "/" + download_folder
    CheckDir(folder_name)
    s = requests.session()
    headers = {
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language':
        'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
        'Cache-Control': 'max-age=0',
        'Connection': 'keep-alive',
        'Host': '***',
    }

    try:
        rs = requests.get(photo_hash,
                          proxies=proxy_ip,
                          headers=headers,
                          cookies=MYCOOKIE,
                          verify=False)
        rs.encoding = 'utf-8'
        # print(rs.text)
        data = BeautifulSoup(rs.text, "lxml")
        # log.info(data)
        check_html(photo_hash, data, folder_name)
    except Exception as e:
        proxies = ValidIp(True, 'http://www.jiayuan.com')
        VisitPage(photo_hash, download_folder, proxies[0])
        print(e)
Esempio n. 2
0
def VisitPhotoPage(photo_hash, download_folder):

    folder_name = out_dir + "/" + download_folder
    CheckDir(folder_name)

    s = requests.session()
    headers = {
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language':
        'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
        'Cache-Control': 'max-age=0',
        'Connection': 'keep-alive',
    }

    try:
        rs = requests.get(photo_hash,
                          headers=headers,
                          cookies=cookies,
                          verify=False)
        rs.encoding = 'utf-8'
        # print(rs.text)
        data = BeautifulSoup(rs.text, "lxml")
        # print(data)
        CheckPhotoHtml(photo_hash, data, folder_name)
    except Exception as e:
        print(e)
Esempio n. 3
0
#输出文件夹
out_dir = './photo_new'
# print(project_path)


def download_file(url, folder_name, id):

    new_url = url.split("_thumbnail")[0] + ".png"
    file_name = str(id) + ".png"
    DownloadFile(new_url, folder_name, file_name)


csv_path = "/home/chenwei/文档/1031-徐汇滨江.txt"
print(csv_path)
csv_file = csv.reader(open(csv_path, 'r'))

i = 0
for line in csv_file:
    # print(line)

    folder_name = out_dir + "/" + str(i) + "/"
    CheckDir(folder_name)
    file_name = str(i) + ".png"

    download_file(line[0], folder_name, "1")
    download_file(line[1], folder_name, "2")

    print(i)

    i = i + 1