def get_shop_info_list(self): #开启第一个页面使用fast_get_page() time.sleep(1) self.fast_get_page(url='http://piao.qunar.com/', is_scroll_to_bottom=False) self.until_scroll_to_center_send_text_by_css_selector( css_selector="#searchValue", text=self.data_region) time.sleep(3) self.until_scroll_to_center_click_by_css_selector( css_selector='#searchBtn') self.until_click_no_next_page_by_css_selector( nextpagesetup=NextPageCssSelectorSetup( css_selector='#pager-container > div > a.next', pre_pagefunc=PageFunc(func=self.driver.refresh), main_pagefunc=PageFunc(func=self.get_shop_info)))
def get_shop_comment(self): self.fast_new_page(url='http://www.baidu.com') shop_collcetion = Mongodb(db=TravelDriver.db, collection=TravelDriver.shop_collection, host='10.1.17.15').get_collection() shop_name_url_list = list() for i in shop_collcetion.find(self.get_data_key()): if i.get('shop_url'): shop_name_url_list.append( (i.get('shop_name'), i.get('shop_url'))) for i in range(len(shop_name_url_list)): self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0])) #第一次打开的时候进行验证 后面都不需要 while (True): self.is_ready_by_proxy_ip() self.switch_window_by_index(index=-1) self.deal_with_failure_page() self.fast_new_page(url=shop_name_url_list[i][1]) time.sleep(1) self.switch_window_by_index(index=-1) # 页面选择 if '验证中心' in self.driver.title: self.info_log(data='关闭验证页面!!!') self.close_curr_page() else: break self.until_click_no_next_page_by_css_selector( nextpagesetup=NextPageCssSelectorSetup( css_selector='#remark_page > a.page-next', stop_css_selector='#remark_page > a.page-next.hidden', main_pagefunc=PageFunc(func=self.from_page_get_data_list, page=page_comment_1))) self.close_curr_page()
def get_shop_comment(self): shop_collcetion = Mongodb(db=TravelDriver.db, collection=TravelDriver.shop_collection, host='10.1.17.15').get_collection() shop_name_url_list = list() for i in shop_collcetion.find(self.get_data_key()): if i.get('shop_comment_url'): shop_name_url_list.append( (i.get('shop_name'), i.get('shop_comment_url'))) for i in range(len(shop_name_url_list)): self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0])) while (True): self.is_ready_by_proxy_ip() self.switch_window_by_index(index=-1) self.deal_with_failure_page() self.fast_new_page(url=shop_name_url_list[i][1]) time.sleep(1) self.switch_window_by_index(index=-1) # 页面选择 if '验证中心' in self.driver.title: self.info_log(data='关闭验证页面!!!') self.close_curr_page() else: break self.until_click_no_next_page_by_partial_link_text( nextpagesetup=NextPageLinkTextSetup( link_text='下一页', main_pagefunc=PageFunc(func=self.from_page_get_data_list, page=page_comment_1))) self.close_curr_page()
def get_comment_list(self): self.fast_new_page(url="http://www.baidu.com") shop_collcetion = Mongodb(db=TravelDriver.db, collection=TravelDriver.shop_collection, host='localhost').get_collection() shop_name_url_list = list() for i in shop_collcetion.find(self.get_data_key()): if i.get('shop_url'): shop_name_url_list.append( (i.get('shop_name'), i.get('shop_url'))) for i in range(len(shop_name_url_list)): self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0])) self.shop_name = shop_name_url_list[i][0] self.fast_new_page("http://www.baidu.com") self.fast_new_page(url=shop_name_url_list[i][1]) self.until_click_no_next_page_by_css_selector( nextpagesetup=NextPageCssSelectorSetup( css_selector= '#allCmtComment > div.paging.orangestyle > div > a.nextpage', stop_css_selector= '#allCmtComment > div.paging.orangestyle > div > a.nextpage.hidden', main_pagefunc=PageFunc(func=self.from_page_get_data_list, page=page_comment_1), pause_time=2)) self.close_curr_page()
def get_shop_info_list(self): self.fast_new_page(url='http://www.dianping.com/qiandaohu/food') #这里加一个循环即可 shop_cooks = ['水果生鲜'] for shop_cook in shop_cooks: # time.sleep(5) self.shop_cook_style = shop_cook for i in range(1, 10): #self.fast_new_page(url='http://www.dianping.com/qiandaohu/ch10/g118r85473') self.fast_click_first_item_same_page_by_partial_link_text( link_text=shop_cook) time.sleep(2) try: self.driver.find_element_by_css_selector( css_selector='#bussi-nav > a:nth-child(' + str(i) + ')').click() time.sleep(2) self.shop_site = self.driver.find_element_by_xpath( '//*[@id="bussi-nav"]/a[' + str(i) + ']').text #self.shop_site = '千岛湖风景区' self.until_click_no_next_page_by_partial_link_text( NextPageLinkTextSetup(link_text='下一页', is_proxy=False, main_pagefunc=PageFunc( self.from_page_get_data_list, page=page_shop_1))) #self.close_curr_page() except Exception: print("无标签")
def get_shop_info_list(self): def get_shop_list(subtype): shop_data_list = self.from_page_get_data_list(page=page_shop_1) for shop_data in shop_data_list: self.save_data_to_mongodb( fieldlist=fl_shop1, mongodb=Mongodb(db=TravelDriver.db, collection=TravelDriver.shop_collection, host='10.1.17.15'), data=self.merge_dict(shop_data, subtype), external_key_name=[FieldName.SUBTYPE_NAME]) self.fast_click_first_item_page_by_partial_link_text(link_text='美食') time.sleep(2) self.until_scroll_to_center_click_by_css_selector( css_selector='#J_qs-btn') time.sleep(2) subtype_list = [] for i in self.until_presence_of_all_elements_located_by_css_selector( css_selector='#classfy > a'): if i.text and i.text != '更多': subtype_list.append({ FieldName.SUBTYPE_NAME: i.text, FieldName.SUBTYPE_URL: i.get_attribute('href') }) for subtype in subtype_list: self.fast_new_page(url=subtype.get(FieldName.SUBTYPE_URL)) self.until_click_no_next_page_by_partial_link_text( NextPageLinkTextSetup(link_text='下一页', is_proxy=False, main_pagefunc=PageFunc(get_shop_list, subtype=subtype))) self.close_curr_page()
def get_shop_info_list(self): self.fast_get_page( 'http://s.lvmama.com/hotel/U69C20180919O20180920?mdd=%E8%AF%B7%E8%BE%93%E5%85%A5%E7%9B%AE%E7%9A%84%E5%9C%B0#list', is_max=False) time.sleep(2) city = self.get_city_from_region_CHN(self.data_region) self.until_scroll_to_center_send_enter_by_css_selector( css_selector='#js_destination', text=city) self.until_scroll_to_center_send_enter_by_css_selector( css_selector='#js_keyword', text=self.data_region) time.sleep(1) # self.until_send_enter_by_css_selector(css_selector='#js_keyword') self.fast_click_page_by_css_selector('#btn_search') # self.fast_click_page_by_css_selector('body > div.lv-ban > div.lv_s_all > div > div:nth-child(1) > div.lv_s_search > span') #这里根据字段的不同重新进行编写 #千岛湖 # self.fast_click_page_by_css_selector('#search-params > div.search-nav-box.clearfix > p > a:nth-child(7)') #千岛湖森林氧吧 # self.fast_click_page_by_css_selector('body > div.banWrap.pr > div.hotelSeach.fix.pa.yh.f14 > div.hotelSeachbtn.fl.pr.tc.f18') # self.fast_click_page_by_css_selector('#_j_mfw_search_main > div.s-nav > div > div > a:nth-child(4)') time.sleep(1) #self.vertical_scroll_to() # 滚动到页面底部 self.until_click_no_next_page_by_partial_link_text( nextpagesetup=NextPageLinkTextSetup(link_text="下一页", main_pagefunc=PageFunc( func=self.get_shop_info)))
def get_comment_info_list(self): self.fast_new_page(url='http://www.baidu.com') shop_collcetion = Mongodb(db=TravelDriver.db, collection=TravelDriver.shop_collection, host='localhost').get_collection() shop_name_url_list = list() for i in shop_collcetion.find(self.get_data_key()): if i.get('shop_url'): shop_name_url_list.append( (i.get('shop_name'), i.get('shop_comment_url'))) for i in range(len(shop_name_url_list)): self.fast_new_page(url="https://www.baidu.com") # 可能会有反爬 self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0])) self.fast_new_page(url=shop_name_url_list[i][1]) self.shop_name = shop_name_url_list[i][0] time.sleep(5) self.until_click_no_next_page_by_partial_link_text( nextpagesetup=NextPageLinkTextSetup( link_text='下一页', main_pagefunc=PageFunc(func=self.from_page_get_data_list, page=page_comment_1), pause_time=2)) self.close_curr_page()
def get_shop_info(self): try: shop_data_list = self.from_page_get_data_list(page=page_shop_1) nextpagesetup = NextPageCssSelectorSetup(css_selector='#review-list > div.review-list-container > div.review-list-main > div.reviews-wrapper > div.bottom-area.clearfix > div > a.NextPage',page=page_comment_1, pause_time=2, pre_pagefunc=PageFunc(func=self.more_comment), after_pagefunc=PageFunc(func=self.close_curr_page)) extra_pagefunc = PageFunc(func=self.get_newest_comment_data_by_css_selector, nextpagesetup=nextpagesetup, shop_name_css_selector='#poi-detail > div.container > div.base-info > div.main-detail.clearfix > div.main-detail-left > div.main-detail-left-top.clearfix > div.hotel-detail-info > div > h1', is_effective=False) self.from_page_add_data_to_data_list(page=page_shop_2, pre_page=page_shop_1, data_list=shop_data_list, extra_pagefunc=extra_pagefunc) except Exception as e: self.error_log(e=str(e))
def get_shop_info_list(self): self.driver.get('https://www.baidu.com') self.fast_new_page(url='http://piao.ctrip.com/', is_scroll_to_bottom=False) self.driver.refresh() self.until_scroll_to_center_send_text_by_css_selector( css_selector="#mainInput", text=self.data_region) time.sleep(3) self.until_scroll_to_center_send_enter_by_css_selector( css_selector="#mainInput") self.until_click_no_next_page_by_css_selector( nextpagesetup=NextPageCssSelectorSetup( css_selector= '#searchResultContainer > div.pkg_page.basefix > a.down', stop_css_selector= '#searchResultContainer > div.pkg_page.basefix > a.down.down_nocurrent', pre_pagefunc=PageFunc(func=self.driver.refresh), main_pagefunc=PageFunc(func=self.get_shop_info)))
def get_shop_info_list(self): self.fast_click_first_item_page_by_partial_link_text(link_text='酒店') time.sleep(2) self.until_click_no_next_page_by_partial_link_text( NextPageLinkTextSetup(link_text='下一页', is_proxy=False, main_pagefunc=PageFunc( self.from_page_get_data_list, page=page_shop_1)))
def get_shop_info(self): try: shop_data_list = self.from_page_get_data_list(page=page_shop_1) nextpagesetup = NextPageCssSelectorSetup( css_selector= '#divCtripComment > div.c_page_box > div > a.c_down', page=page_comment_1, pause_time=2, pre_pagefunc=PageFunc(func=self.change_index_sort)) extra_pagefunc = PageFunc( func=self.get_newest_comment_data_by_css_selector, nextpagesetup=nextpagesetup) self.from_page_add_data_to_data_list(page=page_shop_2, pre_page=page_shop_1, data_list=shop_data_list, extra_pagefunc=extra_pagefunc) except Exception as e: self.error_log(e=str(e))
def get_shop_info(self): try: shop_data_list = self.from_page_get_data_list(page=page_shop_1) # root > div > div > div > div > div:nth-child(3) > div.main-bd > div > div:nth-child(5) > div.detail-left > div.content-wrapper.clearfix > ul.pkg_page > a.down # root > div > div > div > div > div:nth-child(3) > div.main-bd > div > div:nth-child(5) > div.detail-left > div.content-wrapper.clearfix > ul.pkg_page > a.down.disabled.nocurrent # nextpagesetup = NextPageCssSelectorSetup(css_selector='div.detail-left > div.content-wrapper.clearfix > ul.pkg_page > a.down', stop_css_selector='div.detail-left > div.content-wrapper.clearfix > ul.pkg_page > a.down.disabled.nocurrent', page=page_comment_1, pause_time=2) extra_pagefunc = PageFunc(func=self.get_comment) self.from_page_add_data_to_data_list(page=page_shop_2, pre_page=page_shop_1, data_list=shop_data_list,extra_pagefunc=extra_pagefunc) except Exception as e: self.error_log(e=str(e))
def get_shop_info(self): try: shop_data_list = self.from_page_get_data_list(page=page_shop_1) # nextpagesetup = NextPageCssSelectorSetup(css_selector='div.detail-left > div.content-wrapper.clearfix > ul.pkg_page > a.down', stop_css_selector='div.detail-left > div.content-wrapper.clearfix > ul.pkg_page > a.down.disabled.nocurrent', page=page_comment_1, pause_time=2) # extra_pagefunc = PageFunc(func=self.get_newest_comment_data_by_css_selector, nextpagesetup=nextpagesetup) self.from_page_add_data_to_data_list( page=page_shop_2, pre_page=page_shop_1, data_list=shop_data_list, pre_pagefunc=PageFunc(func=self.shop_detail_page_unfold)) except Exception as e: self.error_log(e=str(e))
def get_shop_info_list(self): self.fast_get_page('http://www.mafengwo.cn/hotel/', is_max=True) time.sleep(2) self.until_send_text_by_css_selector(css_selector='#_j_search_input', text=self.data_region) self.until_send_enter_by_css_selector(css_selector='#_j_search_input') time.sleep(1) self.vertical_scroll_to() #滚动到页面底部,为了使得整个页面都加载完成 self.until_click_no_next_page_by_partial_link_text( nextpagesetup=NextPageLinkTextSetup(link_text="后一页", main_pagefunc=PageFunc( func=self.get_shop_info)))
def get_shop_info_list(self): self.fast_new_page( url='http://www.qiandaohu.cc/qdhfg/jqjd/xcyjd/#wzqdh') time.sleep(60) self.until_click_no_next_page_by_partial_link_text( NextPageLinkTextSetup(link_text='下一页', is_proxy=False, main_pagefunc=PageFunc( self.from_page_get_data_list, page=page_shop_1)))
def get_shop_info_list(self): self.fast_get_page('http://www.mafengwo.cn/', is_max=False) time.sleep(2) self.until_send_text_by_css_selector(css_selector='#_j_index_search_input_all', text=self.data_region) self.until_send_enter_by_css_selector(css_selector='#_j_index_search_input_all') self.fast_click_first_item_same_page_by_partial_link_text('景点') self.fast_click_page_by_css_selector('#_j_mfw_search_main > div.s-nav > div > div > a:nth-child(4)') time.sleep(1) self.vertical_scroll_to() # 滚动到页面底部 self.until_click_no_next_page_by_partial_link_text( nextpagesetup=NextPageLinkTextSetup(link_text="下一页", main_pagefunc=PageFunc(func=self.get_shop_info)))
def get_shop_info(self): try: shop_data_list = self.from_page_get_data_list(page=page_shop_1) nextpagesetup = NextPageCssSelectorSetup( css_selector='div._j_commentlist > div.m-pagination > a.pi.pg-next' , stop_css_selector='div._j_commentlist > div.m-pagination > a.pi.pg-next.hidden', page=page_comment_1, pause_time=2) extra_pagefunc = PageFunc(func=self.get_newest_comment_data_by_css_selector, nextpagesetup=nextpagesetup) self.from_page_add_data_to_data_list(page=page_shop_2, pre_page=page_shop_1, data_list=shop_data_list,extra_pagefunc=extra_pagefunc ) except Exception as e: self.error_log(e=str(e))
def get_shop_info_list(self): self.driver.get('https://www.baidu.com') self.fast_new_page('http://hotels.ctrip.com/', is_scroll_to_bottom=False) self.until_scroll_to_center_send_text_by_css_selector( css_selector="#txtCity", text=self.data_region) time.sleep(3) self.until_scroll_to_center_send_enter_by_css_selector( css_selector="#txtCity") time.sleep(2) self.fast_click_same_page_by_css_selector( click_css_selector='#btnSearch') self.until_click_no_next_page_by_css_selector( nextpagesetup=NextPageCssSelectorSetup( css_selector='#downHerf.c_down', main_pagefunc=PageFunc(func=self.get_shop_info)))
def get_shop_info(self): try: print(1234567890) shop_data_list = self.from_page_get_data_list(page=page_shop_1) nextpagesetup = NextPageCssSelectorSetup( css_selector='#remarksPage > a.page-next' , stop_css_selector='#remarksPage > a.page-next.hidden', page=page_comment_1, pause_time=2) extra_pagefunc = PageFunc(func=self.get_newest_comment_data_by_css_selector, nextpagesetup=nextpagesetup) self.from_page_add_data_to_data_list(page=page_shop_2, pre_page=page_shop_1, data_list=shop_data_list,extra_pagefunc=extra_pagefunc ) except Exception as e: print(0000000000000000) self.error_log(e=str(e))
def get_shop_good_middle_bad_comment(self): # self.fast_new_page(url='http://www.baidu.com') # shop_collcetion = Mongodb(db=TravelDriver.db, collection=TravelDriver.shop_collection, # ).get_collection() # # shop_name_url_list = ['http://www.dianping.com/shop/23242707/review_all','http://www.dianping.com/shop/114535359/review_all','http://www.dianping.com/shop/107667835/review_all'] # shop_name_url_list = list(); # for i in shop_collcetion.find(self.get_data_key()): # # if i.get('shop_comment_url'): # if(i.get('shop_comment_num') > 1000): # # shop_name_url_list.append((i.get('shop_name'),i.get('shop_comment_url'))) # print(len(shop_name_url_list)) # for i in range(len(shop_name_url_list)): try: # self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0])) # time.sleep(10) #self.fast_new_page(url="http://www.baidu.com", is_scroll_to_bottom=False) self.fast_new_page(url="http://www.dianping.com", is_scroll_to_bottom=False) time.sleep(20) self.fast_new_page( url="http://www.dianping.com/shop/19139636/review_all", is_scroll_to_bottom=False) time.sleep(30) # try: # self.driver.find_element_by_link_text(link_text='默认排序').click(); # time.sleep(2) # self.driver.find_element_by_link_text(link_text='最新点评').click(); # time.sleep(20) # except Exception: # print("无最新点评") ##随后进行点击标签和评阅 self.until_click_no_next_page_by_partial_link_text( nextpagesetup=NextPageLinkTextSetup( link_text='下一页', main_pagefunc=PageFunc(func=self.from_page_get_data_list, page=page_comment_1), pause_time=2)) self.close_curr_page() except Exception: print("无元素") self.close_curr_page()
def get_shop_info_list(self): self.fast_get_page('http://piao.qunar.com/', is_max=False) time.sleep(2) # self.fast_click_page_by_css_selector('#js_nva_cgy > li.c_piao.js-searchnav.cur > a') self.until_send_text_by_css_selector(css_selector='#searchValue', text=self.data_region) # self.until_send_enter_by_css_selector(css_selector='#searchValue') self.fast_click_page_by_css_selector('#searchBtn') # self.until_send_enter_by_css_selector(css_selector='#js-piao-ticket > div.qcbox > div.qunar-qcbox > input') time.sleep(10) self.until_click_no_next_page_by_css_selector( nextpagesetup=NextPageCssSelectorSetup( css_selector='#pager-container > div > a.next', stop_css_selector='#pager-container > div > a.next.hidden', main_pagefunc=PageFunc(func=self.from_page_get_data_list, page=page_shop_1)))
def get_shop_info(self): try: shop_data_list = self.from_page_get_data_list(page=page_shop_1) nextpagesetup = NextPageCssSelectorSetup( css_selector= '#allCmtComment > div.paging.orangestyle > div > a.nextpage', stop_css_selector= '#allCmtComment > div.paging.orangestyle > div > a.nextpage.hidden', page=page_comment_1, pause_time=2) extra_pagefunc = PageFunc( func=self.get_newest_comment_data_by_css_selector, nextpagesetup=nextpagesetup) self.from_page_add_data_to_data_list(page=page_shop_2, pre_page=page_shop_1, data_list=shop_data_list, extra_pagefunc=extra_pagefunc) except Exception as e: self.error_log(e=str(e))
def get_shop_info_list(self): self.driver.get('https://www.baidu.com') self.fast_new_page(url='http://ticket.lvmama.com/', is_scroll_to_bottom=False) #self.driver.refresh() self.until_scroll_to_center_send_text_by_css_selector( css_selector= "body > div.banWrap > div > div.lv_s_box > div.lv_s_all > div.lv_s_input_box > input", text=self.data_region) self.until_send_enter_by_css_selector( css_selector= 'body > div.banWrap > div > div.lv_s_box > div.lv_s_all > div.lv_s_input_box > input' ) time.sleep(5) self.until_click_no_next_page_by_partial_link_text( NextPageLinkTextSetup(link_text='下一页', is_proxy=False, main_pagefunc=PageFunc( self.from_page_get_data_list, page=page_shop_1)))
def get_shop_info_list(self): self.fast_get_page(url='http://you.ctrip.com/') self.until_scroll_to_center_send_text_by_css_selector( css_selector="#gsSearch", text=self.data_region) time.sleep(5) self.fast_click_page_by_css_selector(click_css_selector='#SearchBtn') time.sleep(10) self.fast_click_page_by_css_selector( click_css_selector= 'body > div.content.cf > div.main > div.search-content.cf > ul > li.current > a > span' ) # #body > div.content.cf > div.main > div.search - content.cf > div > div.result > div.desNavigation.cf > p > span > a.left_arrow # body > div.content.cf > div.main > div.search - content.cf > div > div.result > div.desNavigation.cf > p > span > em.right_arrow self.until_click_no_next_page_by_partial_link_text( NextPageLinkTextSetup(link_text='下一页', is_proxy=False, main_pagefunc=PageFunc( self.from_page_get_data_list, page=page_shop_1)))
def get_shop_info_list(self): self.fast_get_page( url= 'http://touch.piao.qunar.com/touch/list_%E5%8C%97%E4%BA%AC.html?isSearch=1&cityName=%E5%8C%97%E4%BA%AC' ) time.sleep(5) self.until_scroll_to_center_send_text_by_css_selector( css_selector='#search-input-bind', text=self.data_region) #睡得久一点 让整个页面都加载出来 time.sleep(4) self.fast_click_page_by_css_selector( click_css_selector='#search-form-submit') time.sleep(8) #shop_data_list = self.from_page_get_data_list(page=page_shop_1) self.until_click_no_next_page_by_partial_link_text( nextpagesetup=NextPageLinkTextSetup( link_text='下一页', pause_time=5, main_pagefunc=PageFunc(func=self.from_page_get_data_list, page=page_shop_1)))
def get_shop_info_list(self): city = self.get_city_from_region_ENG(self.data_region) self.fast_get_page('http://hotel.qunar.com/city' + city, is_max=False, is_scroll_to_bottom=False) time.sleep(2) self.until_send_text_by_css_selector(css_selector='#jxQ', text=self.data_region) self.until_send_enter_by_css_selector(css_selector='#jxQ') self.vertical_scroll_to(offset=1000000000) # 滚动到页面底部 # self.until_ismore_by_send_key_arrow_down_judge_by_len(list_css_selector='#search-list > div', # ele_css_selector='#loadingDiv > div') self.vertical_scroll_to() # 滚动到页面底部 time.sleep(5) self.until_click_no_next_page_by_partial_link_text( nextpagesetup=NextPageLinkTextSetup(link_text="下一页", main_pagefunc=PageFunc( func=self.get_shop_info)))
def get_shop_info(self): try: shop_data_list = self.from_page_get_data_list(page=page_shop_1) nextpagesetup = NextPageCssSelectorSetup( css_selector= '#comment_main > div > div.wrapper > div.b_ugcpager.clrfix.js-pager-container > div.js-pager > div > a.next > span', stop_css_selector= '#comment_main > div > div.wrapper > div.b_ugcpager.clrfix.js-pager-container > div.js-pager > div > a.next > span.hidden', page=page_comment_1, pause_time=2) extra_pagefunc = PageFunc( func=self.get_newest_comment_data_by_css_selector, nextpagesetup=nextpagesetup) self.from_page_add_data_to_data_list(page=page_shop_2, pre_page=page_shop_1, data_list=shop_data_list, extra_pagefunc=extra_pagefunc) except Exception as e: self.error_log(e=str(e))
def get_shop_info_list(self): self.fast_get_page(url='http://hotel.tuniu.com/', is_scroll_to_bottom=False) city = self.get_city_from_region_CHN(self.data_region) self.until_send_text_by_css_selector(css_selector='#txtCity',text=city) time.sleep(5) self.fast_click_page_by_css_selector('#search_hotel') self.until_send_text_by_css_selector(css_selector='#keyWord', text=self.data_region) time.sleep(5) self.fast_click_page_by_css_selector('#search_hotel') # self.fast_click_page_by_css_selector( # '#topContainer > div > div > div.box.clearfix.topnav.common > a:nth-child(14)',is_scroll_to_bottom=True) time.sleep(10) self.until_click_no_next_page_by_css_selector( nextpagesetup=NextPageCssSelectorSetup(css_selector='#main > div.pagination.clearfix > div > span.next',stop_css_selector='span.next > i.hidden', main_pagefunc=PageFunc(func=self.get_shop_info)))
def get_shop_comment(self): shop_collcetion = Mongodb(db=TravelDriver.db, collection=TravelDriver.shop_collection, host='10.1.17.15').get_collection() shop_name_url_list = list() for i in shop_collcetion.find(self.get_data_key()): if i.get('shop_comment_url'): shop_name_url_list.append( (i.get('shop_name'), i.get('shop_comment_url'))) # for i in range(len(shop_name_url_list)): # self.info_log(data='第%s个,%s'%(i+1, shop_name_url_list[i][0])) # while (True): # self.is_ready_by_proxy_ip() # self.switch_window_by_index(index=-1) # self.deal_with_failure_page() # self.fast_new_page(url=shop_name_url_list[i][1]) # time.sleep(1) # self.switch_window_by_index(index=-1) # 页面选择 # if '验证中心' in self.driver.title: # self.info_log(data='关闭验证页面!!!') # self.close_curr_page() # else: # break # dianping = self.driver.find_element_by_css_selector( # css_selector='#review-list > div.review-list-container > div.review-list-main > div.reviews-wrapper > div.reviews-tags > div.content') # self.fast_click_same_page_by_css_selector(click_css_selector='#main-page > header > h2 > div:nth-child(2)') self.fast_new_page( url='http://www.dianping.com/shop/110833569/review_all') span = '#review-list > div.review-list-container > div.review-list-main > div.reviews-wrapper > div.reviews-tags > div.content > span:nth-child(' + str( 1) + ')' links = self.driver.find_elements_by_css_selector( span)[0].get_attribute('text') self.until_click_no_next_page_by_partial_link_text( nextpagesetup=NextPageLinkTextSetup( link_text='下一页', main_pagefunc=PageFunc(func=self.from_page_get_data_list, page=page_comment_1))) self.close_curr_page()