Ejemplo n.º 1
0
def start_download(site_url):
    print "Start download: " + site_url.encode('utf-8')

    global img_idx
    global img_cnt

    img_idx = 0
    img_cnt = 0

    root_path = u"/var/storage/图片收集/maldiveschina/"
    # root_path = "/Users/nangua/Desktop/test/"

    doc = pqr(url=site_url)

    save_path = os.path.join(root_path, doc(".post-title").text())

    print "Create folder: " + save_path.encode('utf-8')
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    image_list = doc(".gallery-icon a")
    name_list = doc(".gallery-caption")
    work_list = []
    img_cnt = len(image_list)
    for index, img in enumerate(image_list):
        img_url = pqr(img).attr("href")
        if index <= len(name_list)-1:
            caption = pqr(name_list[index]).text()
        else:
            caption = ""
        worker = Thread(target=download_img, args=(img_url, caption, save_path))
        work_list.append(worker)
        worker.start()
Ejemplo n.º 2
0
def start_download(site_url):
    print "Start download: " + site_url.encode('utf-8')

    global img_idx
    global img_cnt

    img_idx = 0
    img_cnt = 0

    root_path = u"/var/storage/图片收集/agoda/"
    # root_path = "/Users/nangua/Desktop/test"

    doc = pqr(url=site_url)

    save_path = os.path.join(root_path, doc("#ctl00_ctl00_MainContent_ContentMain_HotelHeaderHD_lblEHotelName").text()[1:-1])

    print "Create folder: " + save_path.encode('utf-8')

    if not os.path.exists(save_path):
        os.makedirs(save_path)

    # image_list = doc("#ctl00_ctl00_MainContent_ContentMain_MainHotelPhotoHDAB2659_ThumbPhotosHDAB2659_dtlPhotoAB2659 img")
    image_list = doc("#ctl00_ctl00_MainContent_ContentMain_MainHotelPhotoHD_ThumbPhotosHD_dtlPhoto img")
    work_list = []
    img_cnt = len(image_list)
    for img in image_list:
        img_url = pqr(img).attr("src")
        worker = Thread(target=download_img, args=(img_url.replace("s=116x88", "s=800x600"), save_path))
        work_list.append(worker)
        worker.start()
Ejemplo n.º 3
0
def start_download(site_url):
    print "Start download: " + site_url.encode('utf-8')

    global img_idx
    global img_cnt

    img_idx = 0
    img_cnt = 0

    # root_path = u"/var/storage/图片收集/jetwing/"
    root_path = "/Users/nangua/Desktop/test/"

    doc = pqr(url=site_url)

    save_path = os.path.join(root_path, doc("#header h1").text())

    print "Create folder: " + save_path.encode('utf-8')
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    image_list = doc("#pikame img")
    work_list = []
    img_cnt = len(image_list)
    for img in image_list:
        img_url = pqr(img).attr("src")
        worker = Thread(target=download_img, args=(img_url, save_path))
        work_list.append(worker)
        worker.start()
Ejemplo n.º 4
0
def scan_page(site_url, folder_name):
    print "Searching pictures at Page " + str(page_no) + " ..."

    folder_name = folder_name + "/" + str(page_no)
    if not os.path.exists(folder_name):
        os.mkdir(folder_name)

    doc = pqr(url=site_url)
    pic_div_list = doc("#px div.container div.d4")
    i = 0

    work_list = []
    for pic in pic_div_list:
        href = pqr(pqr(pic).find("div.photo a")).attr("href")
        pic_id = href.split("/")[-1]
        pic_info = {}
        pic_info["info"] = pqr(pqr(pic).find("div.info a")).text().encode('utf-8')
        pic_info["title"] = pqr(pqr(pic).find("div.title a")).text().encode('utf-8')
        pic_info["rating"] = pqr(pqr(pic).find("div.rating")).text().encode('utf-8')

        i += 1
        worker = Thread(target=download_pic , args=(pic_id, pic_info, i, page_no, folder_name))
        work_list.append(worker)
        worker.start()
    for work in work_list:
        work.join()
Ejemplo n.º 5
0
def download_pic(pic_id, pic_info, pic_idx, page_idx, folder_name):
    global pic_no

    pic_doc = pqr(url="http://500px.com/photo/" + pic_id)
    pic_url =  pqr(pic_doc("div.photo.segment img")).attr("src")
    urllib.urlretrieve(pic_url, folder_name + "/" + pic_id + "." + pic_url.split(".")[-1])
    fileHandle = open(folder_name + "/" + pic_id + ".txt", "w")

    fileHandle.write("info: " + pic_info["info"] + "\n")
    fileHandle.write("title: " + pic_info["title"] + "\n")
    fileHandle.write("rating: " + pic_info["rating"] + "\n")
    fileHandle.close()
    pic_no += 1
    print "Download complete: " + str(pic_idx) + " at Page " + str(page_idx) + ", total: " + str(pic_no)
Ejemplo n.º 6
0
def get_path(site_url):
    doc = pqr(url=site_url)
    return "booking/" + doc("#hp_hotel_name").text()
Ejemplo n.º 7
0
def get_path(site_url):
    doc = pqr(url=site_url)
    return "maldiveschina/" + doc("#hp_hotel_name").text()
Ejemplo n.º 8
0
def get_path(site_url):
    doc = pqr(url=site_url)
    return "booking/" + doc("#header h1").text()