Python auto_redireced 예제들, navegador5.solicitud.auto_redireced Python 예제들

예제 #1

0

파일 보기

파일: nudipixloc.py 프로젝트: ihgazni/NUDILOC

def get_country_urls(locs_url,
                     countries_xpath='//div[@id="content"]/div/div/a[@href]'):
    info_container, records_container = taxonomy_init()
    info_container['url'] = locs_url
    #info_container = nvsoli.walkon(info_container,records_container=records_container)
    #info_container = nvsoli.auto_redireced(info_container,records_container)
    ####
    sleep_cnt = 0
    while (1):
        sleep_cnt = sleep_cnt + 1
        if (sleep_cnt > 30):
            sleep_cnt = 30
        else:
            pass
        try:
            info_container = nvsoli.walkon(info_container,
                                           records_container=records_container)
            info_container = nvsoli.auto_redireced(info_container,
                                                   records_container)
        except:
            time.sleep(10 * sleep_cnt)
        else:
            break
    ####
    root = get_etree_root(info_container)
    eles = root.xpath(countries_xpath)
    country_urls = []
    country_names = []
    for i in range(0, eles.__len__()):
        url = nudipix_base_url + eles[i].attrib['href']
        country_urls.append(url)
        name = eles[i].text
        country_names.append(name)
    return ((country_urls, country_names))

예제 #2

0

파일 보기

파일: nudipixloc.py 프로젝트: ihgazni/NUDILOC

def get_EXIF(EXIF_url):
    info_container, records_container = taxonomy_init()
    info_container['url'] = EXIF_url
    ####
    ####info_container = nvsoli.walkon(info_container,records_container=records_container)
    ####info_container = nvsoli.auto_redireced(info_container,records_container)
    ####
    ####
    sleep_cnt = 0
    while (1):
        sleep_cnt = sleep_cnt + 1
        if (sleep_cnt > 30):
            sleep_cnt = 30
        else:
            pass
        try:
            info_container = nvsoli.walkon(info_container,
                                           records_container=records_container)
            info_container = nvsoli.auto_redireced(info_container,
                                                   records_container)
        except:
            time.sleep(10 * sleep_cnt)
        else:
            break
    ####
    root = get_etree_root(info_container)
    eles = root.xpath('//table[@class="exif"]/tr')
    EXIF = {}
    for i in range(0, eles.__len__()):
        key = eles[i].xpath('td')[0].text.rstrip(':')
        EXIF[key] = eles[i].xpath('td')[1].text
    return (EXIF)

예제 #3

0

파일 보기

파일: fishbase.us.country.py 프로젝트: ihgazni/FISHBAsE.ORG

def fishbase_init(base_url='http://www.fishbase.us/'):
    info_container = nvsoli.new_info_container()
    info_container['base_url'] = base_url
    info_container['url'] = base_url
    info_container['method'] = 'GET'
    req_head_str = '''Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8\r\nUser-Agent: Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.94 Safari/537.36\r\nAccept-Encoding: gzip,deflate,sdch\r\nAccept-Language: en;q=1.0, zh-CN;q=0.8'''
    info_container['req_head'] = nvhead.build_headers_dict_from_str(
        req_head_str, '\r\n')
    info_container['req_head']['Connection'] = 'close'
    #### init records_container
    records_container = nvsoli.new_records_container()
    info_container = nvsoli.walkon(info_container,
                                   records_container=records_container)
    info_container = nvsoli.auto_redireced(info_container, records_container)
    return ((info_container, records_container))

예제 #4

0

파일 보기

파일: fishbase.us.country.py 프로젝트: ihgazni/FISHBAsE.ORG

def search_via_country(c_code, info_container, records_container):
    req_body = gen_CI_post_body(Country=c_code)
    ciurl = info_container['base_url'] + 'country/CountrySearchList.php'
    info_container['req_head']['Referer'] = info_container['base_url']
    info_container['req_head']['Upgrade-Insecure-Requests'] = 1
    info_container['req_head'][
        'Content-Type'] = 'application/x-www-form-urlencoded'
    info_container['url'] = ciurl
    info_container['method'] = 'POST'
    info_container['req_body'] = req_body
    info_container = nvsoli.walkon(info_container,
                                   records_container=records_container)
    info_container['method'] = 'GET'
    info_container = nvsoli.auto_redireced(info_container, records_container)
    html_text = info_container['resp_body_bytes'].decode('utf-8')
    root = etree.HTML(html_text)
    return (root)

예제 #5

0

파일 보기

파일: nudipixloc.py 프로젝트: ihgazni/NUDILOC

def get_nav_urls(loc_url, nav_xpath='//p[@class="nav"]/a[@href]'):
    info_container, records_container = taxonomy_init()
    info_container['url'] = loc_url
    #info_container = nvsoli.walkon(info_container,records_container=records_container)
    #info_container = nvsoli.auto_redireced(info_container,records_container)
    ####
    sleep_cnt = 0
    while (1):
        #####
        #print('--------get_nav_urls--------')
        #print(sleep_cnt)
        #print(loc_url)
        #print('--------get_nav_urls--------')
        #####
        sleep_cnt = sleep_cnt + 1
        if (sleep_cnt > 30):
            sleep_cnt = 30
        else:
            pass
        try:
            info_container = nvsoli.walkon(info_container,
                                           records_container=records_container)
            info_container = nvsoli.auto_redireced(info_container,
                                                   records_container)
        except:
            time.sleep(10 * sleep_cnt)
        else:
            break
    ####
    root = get_etree_root(info_container)
    eles = root.xpath(nav_xpath)
    if (eles.__len__() == 0):
        nav_urls = []
    else:
        max_page = eles[-2].text
        max_page = int(max_page)
        nav_urls = [loc_url]
        tem = os.path.dirname(eles[-2].attrib['href'].rstrip('/'))
        for i in range(2, max_page + 1):
            url = nudipix_base_url + tem + '/' + str(i)
            nav_urls.append(url)
    return (nav_urls)

예제 #6

0

파일 보기

파일: nudipixloc.py 프로젝트: ihgazni/NUDILOC

def get_locsp_urls(nav_url,
                   locsp_xpah='//div[@class="thumbnail"]/div/a[@href]'):
    info_container, records_container = taxonomy_init()
    info_container['url'] = nav_url
    ####
    #info_container = nvsoli.walkon(info_container,records_container=records_container)
    #info_container = nvsoli.auto_redireced(info_container,records_container)
    ####
    sleep_cnt = 0
    while (1):
        #print('>>>>>>>>>>>>>>>>>>>>')
        #print(info_container['url'])
        #print('<<<<<<<<<<<<<<<<<<<<')
        sleep_cnt = sleep_cnt + 1
        if (sleep_cnt > 30):
            sleep_cnt = 30
        else:
            pass
        try:
            info_container = nvsoli.walkon(info_container,
                                           records_container=records_container)
            info_container = nvsoli.auto_redireced(info_container,
                                                   records_container)
        except:
            time.sleep(10 * sleep_cnt)
        else:
            break
    ####
    root = get_etree_root(info_container)
    eles = root.xpath(locsp_xpah)
    locsp_urls = []
    for i in range(0, eles.__len__()):
        url = nudipix_base_url + eles[i].attrib['href']
        if ('location' in url):
            locsp_urls.append(url)
    return (locsp_urls)

예제 #7

0

파일 보기

파일: nudipixloc.py 프로젝트: ihgazni/NUDILOC

def get_location_urls(
        country_url,
        locations_xpath='//ul[@class="country_dive_site_list"]/li/a[@href]'):
    info_container, records_container = taxonomy_init()
    info_container['url'] = country_url
    #info_container = nvsoli.walkon(info_container,records_container=records_container)
    #info_container = nvsoli.auto_redireced(info_container,records_container)
    ####
    sleep_cnt = 0
    while (1):
        sleep_cnt = sleep_cnt + 1
        if (sleep_cnt > 30):
            sleep_cnt = 30
        else:
            pass
        try:
            info_container = nvsoli.walkon(info_container,
                                           records_container=records_container)
            info_container = nvsoli.auto_redireced(info_container,
                                                   records_container)
        except:
            time.sleep(10 * sleep_cnt)
        else:
            break
    ####
    root = get_etree_root(info_container)
    eles = root.xpath(locations_xpath)
    location_urls = []
    location_names = []
    for i in range(0, eles.__len__()):
        url = nudipix_base_url + eles[i].attrib['href']
        if ('location' in url):
            location_urls.append(url)
            name = eles[i].text
            location_names.append(name)
    return ((location_urls, location_names))

예제 #8

0

파일 보기

파일: fishbase.us.country.py 프로젝트: ihgazni/FISHBAsE.ORG

def get_country_infos(c_code, country_island_dict, info_container,
                      records_container, **kwargs):
    if ('display' in kwargs):
        display = int(kwargs['display'])
    else:
        display = 0
    if ('new_database' in kwargs):
        newdb = kwargs['new_database']
    else:
        newdb = 0
    ####
    #os.system('date')
    ####
    root = search_via_country(c_code, info_container, records_container)
    tables = get_all_tables(root, info_container['base_url'])
    qurl = get_query_url(info_container, kwargs)
    info_container['url'] = qurl
    info_container['method'] = 'GET'
    info_container['req_body'] = None
    info_container = nvsoli.walkon(info_container,
                                   records_container=records_container)
    html_text = info_container['resp_body_bytes'].decode('utf-8')
    root = etree.HTML(html_text)
    eles = root.xpath('//thead/tr/th')
    ####
    ####
    fish = {}
    for i in range(0, eles.__len__()):
        fish[eles[i].text] = None
    url_dict = nvurl.url_to_dict(info_container['url'])
    qd = nvurl.urldecode(url_dict['query'])
    all_country_eles = root.xpath("//tr[@class='t_value1']")
    country = country_island_dict[str(c_code)]
    fn = '../INFOS/' + 'COUNTRYANDISLAND/' + country + '/' + qd[
        'cpresence'] + '/' + qd['vhabitat'] + '/'
    if (os.path.exists(fn)):
        pass
    else:
        os.makedirs(fn)
    picfn = '../PICS/' + 'COUNTRYANDISLAND/' + country + '/' + qd[
        'cpresence'] + '/' + qd['vhabitat'] + '/'
    if (os.path.exists(picfn)):
        pass
    else:
        os.makedirs(picfn)
    thumbfn = '../THUMBNAILS/' + 'COUNTRYANDISLAND/' + country + '/' + qd[
        'cpresence'] + '/' + qd['vhabitat'] + '/'
    if (os.path.exists(thumbfn)):
        pass
    else:
        os.makedirs(thumbfn)
    ####

    ####
    ####
    if (bool(newdb)):
        fishes = {}
    else:
        fishes_dir = fn + "fishes.dict"
        print(fishes_dir)
        if (os.path.exists(fishes_dir)):
            fd = open(fishes_dir, 'r+')
            fishes_text = fd.read()
            fishes = json.loads(fishes_text)
            fd.close()
        else:
            fishes = {}
    ####
    print('--------------------')
    from xdict.jprint import paint_str
    print(
        paint_str(
            "===============fishes loads completed======================",
            single_color='yellow'))
    print(fishes.keys())
    print('----------------')
    #os.system('date')
    ####
    for i in range(0, all_country_eles.__len__()):
        fish_ele = all_country_eles[i]
        nfish = get_fish_info(fishes, fish_ele, fish, info_container,
                              records_container)
        ######
        print(
            paint_str(
                "===============nfish load completed======================",
                single_color='green'))
        ######
        if (nfish):
            ####
            print("====handle new nfish========")
            ####
            nfish['eles-seq'] = i
            nfish['images-dir'] = picfn
            nfish['info-dir'] = fn
            fishes[nfish['Species']['name']] = nfish
            nfdir = fn + nfish['Species']['name'] + '/'
            if (os.path.exists(nfdir)):
                pass
            else:
                os.makedirs(nfdir)
            nffn = nfdir + 'fish.dict'
            infofn = nfdir + 'fish.info'
            nvft.write_to_file(fn=nffn, content=json.dumps(nfish), op='w+')
            info = get_printed_str(nfish, with_color=0, display=display)
            nvft.write_to_file(fn=infofn, content=info, op='w+')

        else:
            ####
            print("===bypass existed fish====")
            ####
            pass
    #---------------------------------------#
    ####
    print(
        paint_str(
            "===============all  nfish es load completed======================",
            single_color='yellow'))
    print(fishes.keys())
    print(fishes.keys().__len__())
    ####
    dfn = fn + 'fishes.dict'
    if (os.path.exists(dfn)):
        pass
    else:
        nvft.write_to_file(fn=dfn, content=json.dumps(fishes), op='w+')
    ldfn = fn + 'fishes.lines'
    if (os.path.exists(ldfn)):
        pass
    else:
        nvft.write_to_file(fn=ldfn, content='', op='w+')
        for key in fishes:
            nfish = fishes[key]
            nvft.write_to_file(fn=ldfn,
                               content=get_printed_str(nfish,
                                                       with_color=0,
                                                       display=display),
                               op='a+')
            nvft.write_to_file(fn=ldfn, content='\n', op='a+')
    #---------------------------------------#
    ####
    print("-----get all_photos ready----")
    ####
    apafn = fn + 'pics.array'
    if (os.path.exists(apafn)):
        fd = open(apafn, 'r+')
        apa_text = fd.read()
        all_photos = json.loads(apa_text)
        fd.close()
    else:
        all_photos = []
        for name in fishes:
            #all_photos = all_photos + copy.deepcopy(fishes[name]['All-Photos'])
            #all_photos = all_photos + copy.deepcopy(fishes[name]['All-Photos'])
            for photo in fishes[name]['All-Photos']:
                all_photos.append(photo)
    ####
    print("all_photos gotted")
    print(all_photos.__len__())
    ####
    types = []
    for each in all_photos:
        type = each['type']
        if (type in types):
            pass
        else:
            if (type == None):
                pass
            else:
                types.append(type)
    for type in types:
        typefn = picfn + type
        if (os.path.exists(typefn)):
            pass
        else:
            os.makedirs(typefn)
        typefn = thumbfn + type
        if (os.path.exists(typefn)):
            pass
        else:
            os.makedirs(typefn)
    for each in all_photos:
        if (each['type'] == None):
            each['img-dir'] = None
            each['thumb-dir'] = None
        else:
            img_dir = picfn + each['type'] + '/' + each['img-name']
            each['img-dir'] = img_dir
            thumb_dir = thumbfn + each['type'] + '/' + each['img-name']
            each['thumb-dir'] = thumb_dir
    apafn = fn + 'pics.array'
    if (os.path.exists(apafn)):
        pass
    else:
        nvft.write_to_file(fn=apafn, content=json.dumps(all_photos), op='w+')
    lapafn = fn + 'pics.lines'
    if (os.path.exists(lapafn)):
        pass
    else:
        nvft.write_to_file(fn=lapafn, content='', op='w+')
        for each in all_photos:
            nvft.write_to_file(fn=lapafn,
                               content=get_printed_str(each,
                                                       with_color=0,
                                                       display=display),
                               op='a+')
            nvft.write_to_file(fn=lapafn, content='\n', op='a+')
    ############################
    print("pics.lines and pics.array ready")
    ############################
    imagename_dir_dict = {}
    dir_imagename_dict = {}
    for each in all_photos:
        if (each['type'] != None):
            imagename = each['img-name']
            dir = each['img-dir']
        else:
            imagename = None
            dir = None
        imagename_dir_dict[imagename] = dir
        dir_imagename_dict[dir] = imagename
    iddfn = fn + 'image_dir.dict'
    didfn = fn + 'dir_image.dict'
    if (os.path.exists(iddfn)):
        pass
    else:
        nvft.write_to_file(fn=iddfn,
                           content=json.dumps(imagename_dir_dict),
                           op='w+')
    liddfn = fn + 'image_dir.lines'
    if (os.path.exists(liddfn)):
        pass
    else:
        nvft.write_to_file(fn=liddfn, content='', op='w+')
        for each in imagename_dir_dict:
            nvft.write_to_file(fn=liddfn,
                               content=get_printed_str(each,
                                                       with_color=0,
                                                       display=display),
                               op='a+')
            nvft.write_to_file(fn=liddfn, content='\n', op='a+')
    if (os.path.exists(didfn)):
        pass
    else:
        nvft.write_to_file(fn=didfn,
                           content=json.dumps(dir_imagename_dict),
                           op='w+')
    ldidfn = fn + 'dir_image.lines'
    if (os.path.exists(ldidfn)):
        pass
    else:
        nvft.write_to_file(fn=ldidfn,
                           content=get_printed_str(dir_imagename_dict,
                                                   with_color=0,
                                                   display=display),
                           op='w+')
    ###############
    print("==dir_image.dict and dir_image.lines gotted==")
    ##############
    thumb_dir_dict = {}
    dir_thumb_dict = {}
    for each in all_photos:
        if (each['type'] != None):
            imagename = each['img-name']
            dir = each['thumb-dir']
        else:
            imagename = None
            dir = None
        thumb_dir_dict[imagename] = dir
        dir_thumb_dict[dir] = imagename
    iddfn = fn + 'thumb_dir.dict'
    didfn = fn + 'dir_thumb.dict'
    if (os.path.exists(iddfn)):
        pass
    else:
        nvft.write_to_file(fn=iddfn,
                           content=json.dumps(thumb_dir_dict),
                           op='w+')
    liddfn = fn + 'thumb_dir.lines'
    if (os.path.exists(liddfn)):
        pass
    else:
        nvft.write_to_file(fn=liddfn, content='', op='w+')
        for each in thumb_dir_dict:
            nvft.write_to_file(fn=liddfn,
                               content=get_printed_str(each,
                                                       with_color=0,
                                                       display=display),
                               op='a+')
            nvft.write_to_file(fn=liddfn, content='\n', op='a+')
    if (os.path.exists(didfn)):
        pass
    else:
        nvft.write_to_file(fn=didfn,
                           content=json.dumps(dir_thumb_dict),
                           op='w+')
    ldidfn = fn + 'dir_thumb.lines'
    if (os.path.exists(ldidfn)):
        pass
    else:
        nvft.write_to_file(fn=ldidfn,
                           content=get_printed_str(dir_thumb_dict,
                                                   with_color=0,
                                                   display=display),
                           op='w+')
    ###############
    print("===dir_thumb.lines and thumb_dir.dict gotted===")
    ###############
    print("begin download images")
    ###############
    for each in all_photos:
        if (each['type'] != None):
            imagename = each['img-name']
            img_dir = each['img-dir']
            img_url = each['img-url']
            thumb_dir = each['thumb-dir']
            thumb_url = each['thumbnail-url']

            if (os.path.exists(img_dir)):
                ####
                print(paint_str("pass_by_pic", single_color="red"))
                ####
                pass
            else:
                info_container['url'] = img_url
                info_container = nvsoli.walkon(
                    info_container, records_container=records_container)
                info_container = nvsoli.auto_redireced(info_container,
                                                       records_container)
                nvft.write_to_file(fn=img_dir,
                                   content=info_container['resp_body_bytes'],
                                   op='wb+')
                ####
                print("downloaded one pic")
                ####
            if (os.path.exists(thumb_dir)):
                ####
                print(paint_str("pass_by_thumb", single_color="red"))
                ####
                pass
            else:
                info_container['url'] = thumb_url
                info_container = nvsoli.walkon(
                    info_container, records_container=records_container)
                info_container = nvsoli.auto_redireced(info_container,
                                                       records_container)
                nvft.write_to_file(fn=thumb_dir,
                                   content=info_container['resp_body_bytes'],
                                   op='wb+')
                ####
                print("downloaded one thumb")
                ####
        else:
            print("---external pics not downloaded in this version,pass--")
            pass
    return ((info_container, records_container))

예제 #9

0

파일 보기

파일: nudipixloc.py 프로젝트: ihgazni/NUDILOC

def get_img_info(img_url,
                 thumbnail_url,
                 country_abbrev,
                 location,
                 base_url=nudipix_base_url):
    info_container, records_container = taxonomy_init()
    info_container['url'] = img_url
    ####
    #sys.stdout.flush()
    #print('---------------')
    #print(img_url)
    #sys.stdout.flush()
    ####
    #info_container = nvsoli.walkon(info_container,records_container=records_container)
    #info_container = nvsoli.auto_redireced(info_container,records_container)
    ####
    sleep_cnt = 0
    while (1):
        sleep_cnt = sleep_cnt + 1
        if (sleep_cnt > 30):
            sleep_cnt = 30
        else:
            pass
        try:
            info_container = nvsoli.walkon(info_container,
                                           records_container=records_container)
            info_container = nvsoli.auto_redireced(info_container,
                                                   records_container)
        except:
            time.sleep(10 * sleep_cnt)
        else:
            break
    ####
    img_root = get_etree_root(info_container)
    tbodys = img_root.xpath('//table')
    sp = img_root.xpath('//div/div/h2/a')[0].attrib['href'].rstrip('/')
    sp_name = os.path.basename(sp)
    info_raw = tbodys[0].getchildren()
    info = {}
    for i in range(0, info_raw.__len__()):
        key = info_raw[i].xpath('td')[0].text.rstrip(':')
        if (key == 'Camera'):
            info[key] = info_raw[i].xpath('td')[1].text
            EXIF_url = nudipix_base_url + info_raw[i].xpath(
                'td/span/a')[0].attrib['href']
            info['EXIF'] = get_EXIF(EXIF_url)
        elif (key == 'Taken on'):
            info[key] = info_raw[i].xpath('td')[1].text
        elif (key == 'Viewed'):
            info[key] = info_raw[i].xpath('td')[1].text
        elif (key == 'Posted'):
            info[key] = info_raw[i].xpath('td')[1].text
        elif (key == 'Updated'):
            info[key] = info_raw[i].xpath('td')[1].text
        else:
            info[key] = info_raw[i].xpath('td/a')[0].text
    kpcofgs = get_KPCOFGS(tbodys, rsltin='dict')
    info['kpcofgs'] = kpcofgs
    img_real_url = nudipix_base_url + img_root.xpath(
        '//div/img')[0].attrib['src']
    try:
        img_verifier = img_root.xpath('//div/img')[1].attrib['title']
    except:
        img_verifier = ''
    else:
        pass
    sha1 = hashlib.sha1(img_real_url.encode('utf-8')).hexdigest()
    img_suffix = os.path.basename(img_real_url).split('.')[-1]
    img_name = sp_name + '_' + sha1 + '.' + img_suffix
    thumbnail_suffix = os.path.basename(thumbnail_url).split('.')[-1]
    thumbnail_name = sp_name + '_' + sha1 + '.thumbnail.' + thumbnail_suffix
    info_name = sp_name + '_' + sha1 + '.dict'
    info['img_url'] = img_real_url
    info['verifier'] = img_verifier
    info['img_name'] = images_dir + img_name
    info['index'] = sha1
    info['thumbnail_url'] = thumbnail_url
    info['thumbnail_name'] = thumbs_dir + thumbnail_name
    info['info_name'] = infos_dir + info_name
    info['country'] = country_abbrev
    info['location'] = location
    ####
    #print(img_real_url)
    try:
        info['seq'] = int(os.path.basename(img_real_url).split('.')[0])
    except:
        info['seq'] = -1
    else:
        pass
    #print('-------------')
    return (info)

예제 #10

0

파일 보기

파일: nudipixloc.py 프로젝트: ihgazni/NUDILOC

def get_img_urls(locsp_url,
                 img_xpath='//div[@class="thumbnail"]/div/a[@href]'):
    ####
    #sys.stdout.flush()
    #print(locsp_url)
    #sys.stdout.flush()
    ####
    info_container, records_container = taxonomy_init()
    info_container['url'] = locsp_url
    ####
    #info_container = nvsoli.walkon(info_container,records_container=records_container)
    #info_container = nvsoli.auto_redireced(info_container,records_container)
    ####
    ####
    sleep_cnt = 0
    while (1):
        #print('-------------------')
        #print(locsp_url)
        #print('-------------------')
        sleep_cnt = sleep_cnt + 1
        if (sleep_cnt > 30):
            sleep_cnt = 30
        else:
            pass
        try:
            info_container = nvsoli.walkon(info_container,
                                           records_container=records_container)
            info_container = nvsoli.auto_redireced(info_container,
                                                   records_container)
        except:
            time.sleep(10 * sleep_cnt)
        else:
            break
    ####
    root = get_etree_root(info_container)
    eles = root.xpath(img_xpath)
    img_urls = []
    thumbnail_urls = []
    ####
    ####
    for i in range(0, eles.__len__()):
        url = nudipix_base_url + eles[i].attrib['href']
        if (('photo' in url) & (not ('photographer' in url))):
            img_urls.append(url)
            ele = eles[i].xpath('img')[0]
            thumbnail_urls.append(nudipix_base_url + ele.attrib['src'])
    nav_xpath = '//p[@class="nav"]/a[@href]'
    eles = root.xpath(nav_xpath)
    if (eles.__len__() == 0):
        pass
    else:
        max_page = eles[-2].text
        max_page = int(max_page)
        tem = os.path.dirname(eles[-2].attrib['href'].rstrip('/'))
        for i in range(2, max_page + 1):
            nav_url = nudipix_base_url + tem + '/' + str(i)
            info_container, records_container = taxonomy_init()
            info_container['url'] = nav_url
            ####
            sleep_cnt = 0
            while (1):
                sleep_cnt = sleep_cnt + 1
                if (sleep_cnt > 30):
                    sleep_cnt = 30
                else:
                    pass
                try:
                    info_container = nvsoli.walkon(
                        info_container, records_container=records_container)
                    info_container = nvsoli.auto_redireced(
                        info_container, records_container)
                except:
                    time.sleep(10 * sleep_cnt)
                else:
                    break
            ####
            root = get_etree_root(info_container)
            eles = root.xpath(img_xpath)
            for j in range(0, eles.__len__()):
                url = nudipix_base_url + eles[j].attrib['href']
                if (('photo' in url) & (not ('photographer' in url))):
                    img_urls.append(url)
                    ele = eles[j].xpath('img')[0]
                    thumbnail_urls.append(nudipix_base_url + ele.attrib['src'])
    return ((img_urls, thumbnail_urls))

예제 #11

0

파일 보기

파일: nudipixloc.py 프로젝트: ihgazni/NUDILOC

                    op='w+')
 info_container, records_container = taxonomy_init()
 info_container['url'] = info['img_url']
 ####
 sleep_cnt = 0
 while (1):
     sleep_cnt = sleep_cnt + 1
     if (sleep_cnt > 30):
         sleep_cnt = 30
     else:
         pass
     try:
         info_container = nvsoli.walkon(
             info_container,
             records_container=records_container)
         info_container = nvsoli.auto_redireced(
             info_container, records_container)
     except:
         time.sleep(10 * sleep_cnt)
     else:
         break
 ####
 #sys.stdout.flush()
 #print(info['img_name'])
 #print(info['seq'])
 #print(info['index'])
 #print(info['img_url'])
 #print(info_container['resp_body_bytes'][:50])
 #sys.stdout.flush()
 ####
 nvft.write_to_file(
     fn=info['img_name'],