def main(): # tourtype = ['g33831', 'g2916', 'g2926', 'g2834', 'g5672', 'g27852', 'g20038', 'g33832'] tourarea = [ 'r29', 'r31', 'r30', 'r32', 'r12036', 'r12033', 'r34', 'r33', 'r12035' ] random.shuffle(tourarea) print('tourarea {}'.format(tourarea)) for tp in tourarea: # for loc in dz_location: baseurl = 'http://www.dianping.com/shenzhen/ch35/%s' % tp time.sleep(random.randint(2, 5)) basepage = html_from_url(baseurl) e = pq(basepage) maxpage = 0 if e('.PageLink').text(): maxpage = int(e('.PageLink:last').attr('data-ga-page')) # maxpage = int(e('.PageLink').eq(-1).attr('data-ga-page')) elif 0 < e('#shop-all-list li').length <= 15: maxpage = 1 else: print('maxpage in else: {}'.format(maxpage)) continue print('maxpage: {}'.format(maxpage)) for i in range(1, maxpage + 1): url = baseurl + 'p' + str(i) time.sleep(random.randint(5, 10)) tour = tour_from_url(url)
def main(): lifetype = [ 'g141', 'g133', 'g2636', 'g20042', 'g142', 'g134', 'g135', 'g140', 'g144', 'g32732', 'g137', 'g20038', 'g156', 'g20039', 'g20040', 'g6694', 'g2754', 'g20041', 'g33857', 'g34089', 'g34090' ] random.shuffle(lifetype) # random.shuffle(dz_location) print('lifetype {}'.format(lifetype)) # print('dz_location {}'.format(dz_location)) for tp in lifetype: for loc in dz_location: baseurl = 'http://www.dianping.com/shenzhen/ch30/%s%s' % (tp, loc) time.sleep(random.randint(2, 5)) basepage = html_from_url(baseurl) e = pq(basepage) maxpage = 0 if e('.PageLink').text(): maxpage = int(e('.PageLink:last').attr('data-ga-page')) # maxpage = int(e('.PageLink').eq(-1).attr('data-ga-page')) elif 0 < e('#shop-all-list li').length <= 15: maxpage = 1 else: print('maxpage in else: {}'.format(maxpage)) continue print('maxpage: {}'.format(maxpage)) for i in range(1, maxpage + 1): url = baseurl + 'p' + str(i) time.sleep(random.randint(5, 10)) life = life_from_url(url)
def main(): beautytype = ['g157', 'g158', 'g33761', 'g183', 'g148', 'g149', 'g2898', 'g159', 'g493', 'g2572', 'g123', 'g2790'] cur_location = ['r29', 'r1949', 'r7475', 'r1560', 'r12322', 'r1556', 'r1951', 'r12321', 'r1559', 'r12225', 'r1557', 'r1573', 'r12324', 'r12226', 'r12323', 'r3138', 'r12320', 'r12319', 'r1950'] random.shuffle(beautytype) # random.shuffle(dz_location) print('beautytype {}'.format(beautytype)) print('dz_location {}'.format(dz_location)) for tp in beautytype: # for loc in dz_location: for loc in cur_location: baseurl = 'http://www.dianping.com/shenzhen/ch50/%s%s' % (tp, loc) time.sleep(random.randint(2, 5)) # pool = ConnectionPool(host='localhost', port=6379, db=0, password='******') # redis = StrictRedis(connection_pool=pool) basepage = html_from_url(baseurl) e = pq(basepage) maxpage = 0 if e('.PageLink').text(): maxpage = int(e('.PageLink:last').attr('data-ga-page')) # maxpage = int(e('.PageLink').eq(-1).attr('data-ga-page')) elif 0 < e('#shop-all-list li').length <= 15: maxpage = 1 else: print('maxpage in else: {}'.format(maxpage)) continue print('maxpage: {}'.format(maxpage)) for i in range(1, maxpage + 1): url = baseurl + 'p' + str(i) time.sleep(random.randint(5, 10)) beauty = beauty_from_url(url)
def main(): filmtype = [ 'g136', 'g25461', 'g33880', 'g33877', 'g33879', 'g33878', 'g33881', 'g33882' ] area = ['r29', 'r31', 'r30', 'r32', 'r12033', 'r12035', 'r34', 'r33'] random.shuffle(filmtype) random.shuffle(area) print('filmtype {}'.format(filmtype)) print('area {}'.format(area)) for tp in filmtype: for loc in area: baseurl = 'http://www.dianping.com/shenzhen/ch25/%s%s' % (tp, loc) time.sleep(random.randint(2, 5)) basepage = html_from_url(baseurl) e = pq(basepage) maxpage = 0 if e('.PageLink').text(): maxpage = int(e('.PageLink:last').attr('data-ga-page')) # maxpage = int(e('.PageLink').eq(-1).attr('data-ga-page')) elif 0 < e('#shop-all-list li').length <= 15: maxpage = 1 else: print('maxpage in else: {}'.format(maxpage)) continue print('maxpage: {}'.format(maxpage)) for i in range(1, maxpage + 1): url = baseurl + 'p' + str(i) time.sleep(random.randint(5, 10)) film = film_from_url(url)
def pet_from_url(url): """ 从 url 中解析出页面内所有的商家 """ page = html_from_url(url) e = pq(page) items = e('#shop-all-list>ul>li') pet = [pet_from_li(i) for i in items] return pet
def home_from_url(url): """ 从 url 中解析出页面内所有的商家 """ page = html_from_url(url) e = pq(page) items = e('.shop-list').children('li') home = [home_from_li(i) for i in items] return home
def tour_from_url(url): """ 从 url 中解析出页面内所有的商家 """ page = html_from_url(url) e = pq(page) items = e('#shop-all-list').find('li') tour = [tour_from_li(i) for i in items] return tour
def life_from_url(url): """ 从 url 中解析出页面内所有的商家 """ page = html_from_url(url) e = pq(page) items = e('#shop-all-list').find('li') life = [life_from_li(i) for i in items] return life
def wedding_from_url(url): """ 从 url 中解析出页面内所有的商家 """ page = html_from_url(url) e = pq(page) items = e('.shop-list').children('li') wedding = [wedding_from_li(i) for i in items] return wedding
def film_from_url(url): """ 从 url 中解析出页面内所有的商家 """ page = html_from_url(url) e = pq(page) items = e('#shop-all-list').find('li') film = [film_from_li(i) for i in items] return film
def education_from_url(url): """ 从 url 中解析出页面内所有的商家 """ page = html_from_url(url) e = pq(page) items = e('#shop-all-list>ul>li') education = [education_from_li(i) for i in items] return education
def sports_from_url(url): """ 从 url 中解析出页面内所有的商家 """ page = html_from_url(url) e = pq(page) items = e('#shop-all-list').find('li') sports = [sports_from_li(i) for i in items] return sports
def baby_from_url(url): """ 从 url 中解析出页面内所有的商家 """ page = html_from_url(url) e = pq(page) items = e('.shop-list').children('li') baby = [baby_from_li(i) for i in items] return baby
def car_from_url(url): """ 从 url 中解析出页面内所有的商家 """ # url = 'http://www.dianping.com/shenzhen/ch65/g180r12320p2' page = html_from_url(url) e = pq(page) items = e('#shop-all-list>ul>li') car = [car_from_li(i) for i in items] return car
def home_from_url_decoration(url): """ 从 url 中解析出页面内所有的商家 """ page = html_from_url(url) e = pq(page) # items = e('.shop-list').children('div') # home = [home_from_div(i) for i in items if pq(i).attr('class') != 'shop-list-general'] items = e('.shop-list').children('.shop-list-item') home = [home_from_item(i) for i in items] return home
def hotel_from_url(url): """ 从 url 中解析出页面内所有的商家 """ # url = 'http://www.dianping.com/shenzhen/hotel/r12036p4' page = html_from_url(url) e = pq(page) if not e('.hotelshop-list .no-hotel-block').text(): items = e('.hotelshop-list').children('li') hotel = [hotel_from_li(i) for i in items] return hotel
def food_from_url(url): """ 从 url 中解析出页面内所有的商家 """ page = html_from_url(url) e = pq(page) if e('#not-found-tip'): return html_from_url(url) items = e('#shop-all-list').find('li') # 调用 food_from_li food = [] for i in items: e = pq(i) print("before e('.txt>.tit>a:first')('href') : {}".format(e('.txt>.tit>a:first').attr('href'))) if not e('.txt>.tit>a:first').attr('href'): print("after e('.txt>.tit>a:first')('href') : {}".format(e('.txt>.tit>a:first').attr('href'))) return food_from_url(url) food.append(food_from_li(i)) # food = [food_from_li(i) for i in items] return food
def main(): shoppingtype = [ 'g120', 'g33943', 'g33944', 'g33906', 'g33905', 'g33904', 'g119', 'g122', 'g121', 'g130', 'g32739', 'g187', 'g235', 'g123', 'g128', 'g125', 'g27809', 'g27810', 'g27811', 'g27812', 'g26085', 'g124', 'g127', 'g126', 'g6826', 'g32705', 'g6829', 'g6827', 'g32700', 'g6830', 'g34124', 'g129', 'g184', 'g33760', 'g33759', 'g2714', 'g26101', 'g33858', 'g2776', 'g32698', 'g34114', 'g131' ] # shoppingtype = ['g120', 'g33943', 'g33944', 'g33906', 'g33905', 'g33904', 'g119', 'g122', 'g121', 'g130', 'g32739', # 'g187', 'g235', 'g123', 'g128', 'g125', 'g26085', 'g124', 'g127', 'g126', 'g34124', 'g129', 'g184', # 'g2714', 'g26101', 'g33858', 'g2776', 'g32698', 'g34114', 'g131'] cur_location = [ 'r34', 'r8646', 'r1957', 'r1570', 'r12335', 'r8647', 'r8357', 'r8355', 'r12334', 'r8648', 'r3141', 'r70631' ] random.shuffle(shoppingtype) random.shuffle(dz_location) print('shoppingtype : {}'.format(shoppingtype)) print('dz_location : {}'.format(dz_location)) for tp in shoppingtype: # for loc in dz_location: for loc in cur_location[::-1]: baseurl = 'http://www.dianping.com/shenzhen/ch20/%s%s' % (tp, loc) time.sleep(random.randint(2, 5)) basepage = html_from_url(baseurl) e = pq(basepage) maxpage = 0 if e('.PageLink').text(): maxpage = int(e('.PageLink:last').attr('data-ga-page')) # maxpage = int(e('.PageLink').eq(-1).attr('data-ga-page')) elif 0 < e('#shop-all-list li').length <= 15: maxpage = 1 else: print('maxpage in else: {}'.format(maxpage)) continue print('maxpage: {}'.format(maxpage)) for i in range(1, maxpage + 1): url = baseurl + 'p' + str(i) time.sleep(random.randint(5, 10)) shopping = shopping_from_url(url)
def shopping_from_url(url): """ 从 html页面中内解析出所有的商家 """ page = html_from_url(url) e = pq(page) items = e('#shop-all-list').find('li') shopping = [] for i in items: e = pq(i) print("before e('.txt>.tit>a:first')('href') : {}".format( e('.txt>.tit>a:first').attr('href'))) if not e('.txt>.tit>a:first').attr('href'): print("after e('.txt>.tit>a:first')('href') : {}".format( e('.txt>.tit>a:first').attr('href'))) return shopping_from_url(url) shopping.append(shopping_from_li(i)) # shopping = [shopping_from_li(i) for i in items] return shopping
def beauty_from_url(url): """ 从 url 中解析出页面内所有的商家 """ # url = 'http://www.dianping.com/shenzhen/ch50/g159r64846p1' # url = 'http://www.dianping.com/shenzhen/ch50/g33761r8351p3' page = html_from_url(url) e = pq(page) items = e('#shop-all-list ul > li') beauty = [] for i in items: e = pq(i) print("before e('.txt>.tit>a:first')('href') : {}".format(e('.txt>.tit>a:first').attr('href'))) if not e('.txt>.tit>a:first').attr('href'): print("after e('.txt>.tit>a:first')('href') : {}".format(e('.txt>.tit>a:first').attr('href'))) return beauty_from_url(url) beauty.append(beauty_from_li(i)) # beauty = [beauty_from_li(i) for i in items] return beauty
def main(): # foodtype = ['g103', 'g205', 'g733', 'g1947', 'g32728', 'g1953', 'g111', 'g117', 'g1833', 'g241', 'g132', 'g113', # 'g33924', 'g225', 'g226', 'g34041', 'g34040', 'g110', 'g32731', 'g3027', 'g3023', 'g34060', 'g3017', # 'g4477', 'g32730', 'g208', 'g34061', 'g34063', 'g32729', 'g34065', 'g34062', 'g34064', 'g34066', 'g116', # 'g238', 'g24340', 'g254', 'g232', 'g231', 'g253', 'g219', 'g251', 'g508', 'g114', 'g102', 'g4467', # 'g4473', 'g4469', 'g115', 'g109', 'g104', 'g112', 'g210', 'g217', 'g1881', 'g221', 'g4509', 'g222', # 'g223', 'g4557', 'g118', 'g134', 'g133', 'g247', 'g246', 'g311', 'g6743', 'g1387', 'g26483', 'g26482', # 'g26484', 'g252', 'g34014', 'g101', 'g34055', 'g3243', 'g207', 'g106', 'g250', 'g34032', 'g1338', # 'g26481', 'g1959', 'g2714', 'g25474', 'g107', 'g34059', 'g1783'] foodtype = ['g103', 'g111', 'g117', 'g132', 'g113', 'g110', 'g116', 'g219', 'g251', 'g508', 'g114', 'g102', 'g115', 'g109', 'g104', 'g112', 'g118', 'g34014', 'g101', 'g34055', 'g3243', 'g207', 'g106', 'g250', 'g34032', 'g1338', 'g26481', 'g1959', 'g2714', 'g25474', 'g107', 'g34059', 'g1783'] cur_location = ['r29', 'r1949', 'r7475', 'r1560', 'r12322', 'r1556', 'r1951', 'r12321', 'r1559', 'r12225', 'r1557', 'r1573', 'r12324', 'r12226', 'r12323', 'r3138', 'r12320', 'r12319', 'r1950'] random.shuffle(foodtype) # random.shuffle(dz_location) print('foodtype {}'.format(foodtype)) # print('dz_location {}'.format(dz_location)) for tp in foodtype: # for loc in dz_location: for loc in cur_location[::-1]: baseurl = 'http://www.dianping.com/shenzhen/ch10/%s%s' % (tp, loc) time.sleep(random.randint(2, 5)) basepage = html_from_url(baseurl) e = pq(basepage) maxpage = 0 if e('.PageLink').text(): maxpage = int(e('.PageLink:last').attr('data-ga-page')) # maxpage = int(e('.PageLink').eq(-1).attr('data-ga-page')) elif 0 < e('#shop-all-list li').length <= 15: maxpage = 1 else: print('maxpage in else: {}'.format(maxpage)) continue print('maxpage: {}'.format(maxpage)) for i in range(1, maxpage + 1): url = baseurl + 'p' + str(i) time.sleep(random.randint(5, 10)) food = food_from_url(url)
def service_from_url(url): """ 从 url 中解析出页面内所有的商家 """ # url = 'http://www.dianping.com/shenzhen/ch80/g26085r34p39' page = html_from_url(url) e = pq(page) items = e('#shop-all-list>ul>li') service = [] for i in items: e = pq(i) print("before e('.txt>.tit>a:first')('href') : {}".format( e('.txt>.tit>a:first').attr('href'))) if not e('.txt>.tit>a:first').attr('href'): print("after e('.txt>.tit>a:first')('href') : {}".format( e('.txt>.tit>a:first').attr('href'))) return service_from_url(url) service.append(service_from_li(i)) # service = [service_from_li(i) for i in items] return service
def get_address(): for item in db.Wedding.find(): item_url = item['url'] number = item['number'] print('item url : {}'.format(item_url)) time.sleep(random.randint(2, 5)) html = html_from_url(item_url) # html = html_from_url_selenium(item_url) # html = httpSpider(item_url) print('html : {}'.format(html)) e = pq(html) # if 'window.shop_config={' in e('script').text(): address = '' if e('.road-addr').text(): address = e('.road-addr').text().strip() print('address : {}'.format(address)) db.Wedding.update({"number": number}, {"$set": { "address": address }}, upsert=True)
def sports_from_baseurl(baseurl): """ 从 baseurl 中解析出页面内所有的商家 """ time.sleep(random.randint(2, 5)) basepage = html_from_url(baseurl) e = pq(basepage) maxpage = 0 if e('.PageLink').text(): maxpage = int(e('.PageLink:last').attr('data-ga-page')) # maxpage = int(e('.PageLink').eq(-1).attr('data-ga-page')) elif 0 < e('#shop-all-list li').length <= 15: maxpage = 1 else: print('maxpage in else: {}'.format(maxpage)) # continue print('maxpage: {}'.format(maxpage)) for i in range(1, maxpage + 1): url = baseurl + 'p' + str(i) time.sleep(random.randint(5, 10)) sports = sports_from_url(url)
def main(): cartype = [ 'g175', 'g178', 'g34072', 'g176', 'g34087', 'g34073', 'g180', 'g34088', 'g34074', 'g34075', 'g34076', 'g177', 'g34077' ] area = [ 'r29', 'r31', 'r30', 'r32', 'r12036', 'r12033', 'r34', 'r33', 'r12035' ] cur_location = ['r8348', 'r8352', 'r33', 'r34'] random.shuffle(cartype) random.shuffle(area) print('cartype {}'.format(cartype)) print('area {}'.format(area)) for tp in cartype: cararea = dz_location if tp in ['g175', 'g176', 'g180', 'g177' ] else area print('cararea {}'.format(cararea)) # for loc in cararea: for loc in cur_location: baseurl = 'http://www.dianping.com/shenzhen/ch65/%s%s' % (tp, loc) time.sleep(random.randint(2, 5)) basepage = html_from_url(baseurl) e = pq(basepage) maxpage = 0 if e('.PageLink').text(): maxpage = int(e('.PageLink:last').attr('data-ga-page')) # maxpage = int(e('.PageLink').eq(-1).attr('data-ga-page')) elif 0 < e('#shop-all-list li').length <= 15: maxpage = 1 else: print('maxpage in else: {}'.format(maxpage)) continue print('maxpage: {}'.format(maxpage)) for i in range(1, maxpage + 1): url = baseurl + 'p' + str(i) time.sleep(random.randint(5, 10)) car = car_from_url(url)
def main(): educationtype = [ 'g2872', 'g2873', 'g2877', 'g2876', 'g2874', 'g2878', 'g179', 'g260', 'g32722', 'g34105', 'g33897', 'g33899', 'g33898', 'g34106', 'g34107', 'g2882' ] area = [ 'r29', 'r31', 'r30', 'r32', 'r12036', 'r12033', 'r34', 'r33', 'r12035' ] random.shuffle(educationtype) random.shuffle(area) print('educationtype {}'.format(educationtype)) print('area {}'.format(area)) for tp in educationtype: educationarea = dz_location if (tp == 'g2876' or tp == 'g179') else area for loc in educationarea: baseurl = 'http://www.dianping.com/shenzhen/ch75/%s%s' % (tp, loc) time.sleep(random.randint(2, 5)) basepage = html_from_url(baseurl) e = pq(basepage) maxpage = 0 if e('.PageLink').text(): maxpage = int(e('.PageLink:last').attr('data-ga-page')) # maxpage = int(e('.PageLink').eq(-1).attr('data-ga-page')) elif 0 < e('#shop-all-list li').length <= 15: maxpage = 1 else: print('maxpage in else: {}'.format(maxpage)) continue print('maxpage: {}'.format(maxpage)) for i in range(1, maxpage + 1): url = baseurl + 'p' + str(i) time.sleep(random.randint(5, 10)) education = education_from_url(url)
def main(): babytype = [ 'g193', 'g27761', 'g161', 'g27767', 'g188', 'g27762', 'g27763', 'g2784', 'g258', 'g27768', 'g34117', 'g257', 'g34116', 'g27814', 'g33797', 'g125', 'g20009', 'g189', 'g33803', 'g33808', 'g27769' ] area = [ 'r29', 'r30', 'r31', 'r32', 'r33', 'r34', 'r12033', 'r12035', 'r12036' ] random.shuffle(babytype) random.shuffle(area) print('babytype {}'.format(babytype)) print('area {}'.format(area)) for tp in babytype: babyarea = dz_location if tp == 'g125' else area for loc in babyarea: baseurl = 'http://www.dianping.com/shenzhen/ch70/%s%s' % (tp, loc) time.sleep(random.randint(2, 5)) basepage = html_from_url(baseurl) e = pq(basepage) maxpage = 0 if e('.PageLink').text(): maxpage = int(e('.PageLink:last').attr('title')) # maxpage = int(e('.PageLink').eq(-1).attr('title')) elif 0 < e('.shop-list li').length <= 15: maxpage = 1 else: print('maxpage in else: {}'.format(maxpage)) continue print('maxpage: {}'.format(maxpage)) for i in range(1, maxpage + 1): url = baseurl + 'p' + str(i) time.sleep(random.randint(5, 10)) baby = baby_from_url(url)
def main(): # weddingtype = ['g25410', 'g33888', 'g34057', 'g163', 'g6699', 'g6698', 'g162', 'g983', 'g1016', 'g25411', 'g167', # 'g1039', 'g27943', 'g34108', 'g191', 'g2814', 'g2816', 'g2818', 'g166', 'g185', 'g6700', 'g164', # 'g25412', 'g186', 'g192', 'g6844'] weddingtype = [ 'g25410', 'g33888', 'g34057', 'g163', 'g162', 'g167', 'g191', 'g166', 'g185', 'g6700', 'g164', 'g25412', 'g186', 'g192', 'g6844' ] area = [ 'r29', 'r31', 'r30', 'r32', 'r12036', 'r12033', 'r34', 'r33', 'r12035' ] random.shuffle(weddingtype) random.shuffle(area) print('weddingtype {}'.format(weddingtype)) print('area {}'.format(area)) get_address() for tp in weddingtype: for loc in area: baseurl = 'http://www.dianping.com/shenzhen/ch55/%s%s' % (tp, loc) time.sleep(random.randint(2, 5)) basepage = html_from_url(baseurl) e = pq(basepage) maxpage = 0 if e('.PageLink').text(): maxpage = int(e('.PageLink:last').attr('title')) # maxpage = int(e('.PageLink').eq(-1).attr('title')) elif 0 < e('.shop-list li').length <= 15: maxpage = 1 else: print('maxpage in else: {}'.format(maxpage)) continue print('maxpage: {}'.format(maxpage)) for i in range(1, maxpage + 1): url = baseurl + 'p' + str(i) time.sleep(random.randint(5, 10)) wedding = wedding_from_url(url)
def main(): medicaltype = [ 'g183', 'g181', 'g182', 'g2914', 'g612', 'g235', 'g25148', 'g34050', 'g34053', 'g257', 'g34051', 'g34046', 'g34052', 'g34054', 'g34049', 'g34048', 'g2912' ] area = [ 'r29', 'r31', 'r30', 'r32', 'r12036', 'r12033', 'r34', 'r33', 'r12035' ] random.shuffle(medicaltype) random.shuffle(area) print('medicaltype {}'.format(medicaltype)) print('area {}'.format(area)) for tp in medicaltype: medicalarea = dz_location if (tp == 'g181' or tp == 'g235') else area for loc in medicalarea: baseurl = 'http://www.dianping.com/shenzhen/ch85/%s%s' % (tp, loc) time.sleep(random.randint(2, 5)) basepage = html_from_url(baseurl) e = pq(basepage) maxpage = 0 if e('.PageLink').text(): maxpage = int(e('.PageLink:last').attr('data-ga-page')) elif 0 < e('#shop-all-list li').length <= 15: maxpage = 1 else: print('maxpage in else: {}'.format(maxpage)) continue print('maxpage: {}'.format(maxpage)) for i in range(1, maxpage + 1): url = baseurl + 'p' + str(i) time.sleep(random.randint(5, 10)) medical = medical_from_url(url)
def main(): hometype = ['g32704', 'g25475', 'g33867', 'g33876', 'g34035', 'g6827', 'g6826', 'g32702', 'g32705'] # area = ['r29', 'r31', 'r30', 'r32', 'r12036', 'r12033', 'r34', 'r33', 'r12035'] # random.shuffle(hometype) # random.shuffle(area) print('hometype {}'.format(hometype)) # print('area {}'.format(area)) for tp in hometype: for loc in dz_location: baseurl = 'http://www.dianping.com/shenzhen/ch90/%s%s' % (tp, loc) time.sleep(random.randint(2, 5)) basepage = html_from_url(baseurl) e = pq(basepage) maxpage = 0 if e('.pageLink').text(): maxpage = int(e('.pageLink:last').attr('title')) print('maxpage in first if: {}'.format(maxpage)) # maxpage = int(e('.PageLink').eq(-1).attr('title')) elif e('.pages-num>.pages').text() == '': maxpage = 1 print('maxpage in elif: {}'.format(maxpage)) else: print('maxpage in else: {}'.format(maxpage)) continue print('maxpage: {}'.format(maxpage)) for i in range(1, maxpage + 1): url = baseurl + 'p' + str(i) time.sleep(random.randint(5, 10)) if tp in ['g25475', 'g32704']: print('tp in if : {}'.format(tp)) home = home_from_url_decoration(url) else: print('tp in else : {}'.format(tp)) home = home_from_url(url)