def get_shop_comment(self): self.fast_new_page(url='http://www.baidu.com') shop_collcetion = Mongodb(db=TravelDriver.db, collection=TravelDriver.shop_collection, host='10.1.17.15').get_collection() shop_name_url_list = list() for i in shop_collcetion.find(self.get_data_key()): if i.get('shop_url'): shop_name_url_list.append( (i.get('shop_name'), i.get('shop_url'))) for i in range(len(shop_name_url_list)): self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0])) #第一次打开的时候进行验证 后面都不需要 while (True): self.is_ready_by_proxy_ip() self.switch_window_by_index(index=-1) self.deal_with_failure_page() self.fast_new_page(url=shop_name_url_list[i][1]) time.sleep(1) self.switch_window_by_index(index=-1) # 页面选择 if '验证中心' in self.driver.title: self.info_log(data='关闭验证页面!!!') self.close_curr_page() else: break self.until_click_no_next_page_by_css_selector( nextpagesetup=NextPageCssSelectorSetup( css_selector='#remark_page > a.page-next', stop_css_selector='#remark_page > a.page-next.hidden', main_pagefunc=PageFunc(func=self.from_page_get_data_list, page=page_comment_1))) self.close_curr_page()
def get_comment_list(self): self.fast_new_page(url="http://www.baidu.com") shop_collcetion = Mongodb(db=TravelDriver.db, collection=TravelDriver.shop_collection, host='localhost').get_collection() shop_name_url_list = list() for i in shop_collcetion.find(self.get_data_key()): if i.get('shop_url'): shop_name_url_list.append( (i.get('shop_name'), i.get('shop_url'))) for i in range(len(shop_name_url_list)): self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0])) self.shop_name = shop_name_url_list[i][0] self.fast_new_page("http://www.baidu.com") self.fast_new_page(url=shop_name_url_list[i][1]) self.until_click_no_next_page_by_css_selector( nextpagesetup=NextPageCssSelectorSetup( css_selector= '#allCmtComment > div.paging.orangestyle > div > a.nextpage', stop_css_selector= '#allCmtComment > div.paging.orangestyle > div > a.nextpage.hidden', main_pagefunc=PageFunc(func=self.from_page_get_data_list, page=page_comment_1), pause_time=2)) self.close_curr_page()
def get_shop_info(self): try: shop_data_list = self.from_page_get_data_list(page=page_shop_1) nextpagesetup = NextPageCssSelectorSetup(css_selector='#review-list > div.review-list-container > div.review-list-main > div.reviews-wrapper > div.bottom-area.clearfix > div > a.NextPage',page=page_comment_1, pause_time=2, pre_pagefunc=PageFunc(func=self.more_comment), after_pagefunc=PageFunc(func=self.close_curr_page)) extra_pagefunc = PageFunc(func=self.get_newest_comment_data_by_css_selector, nextpagesetup=nextpagesetup, shop_name_css_selector='#poi-detail > div.container > div.base-info > div.main-detail.clearfix > div.main-detail-left > div.main-detail-left-top.clearfix > div.hotel-detail-info > div > h1', is_effective=False) self.from_page_add_data_to_data_list(page=page_shop_2, pre_page=page_shop_1, data_list=shop_data_list, extra_pagefunc=extra_pagefunc) except Exception as e: self.error_log(e=str(e))
def get_shop_info(self): try: shop_data_list = self.from_page_get_data_list(page=page_shop_1) nextpagesetup = NextPageCssSelectorSetup( css_selector='div._j_commentlist > div.m-pagination > a.pi.pg-next', stop_css_selector='div._j_commentlist > div.m-pagination > a.pi.pg-last', page=page_comment_1, pause_time=2) # extra_pagefunc = PageFunc(func=self.get_newest_comment_data_by_css_selector, nextpagesetup=nextpagesetup) self.from_page_add_data_to_data_list(page=page_shop_2, pre_page=page_shop_1, data_list=shop_data_list) except Exception as e: self.error_log(e=str(e))
def get_shop_info(self): try: shop_data_list = self.from_page_get_data_list(page=page_shop_1) nextpagesetup = NextPageCssSelectorSetup( css_selector='#comment_paging > a.page_next' , stop_css_selector='#comment_paging > a.page_next.hidden', page=page_comment_1, pause_time=2) extra_pagefunc = PageFunc(func=self.get_newest_comment_data_by_css_selector, nextpagesetup=nextpagesetup) self.from_page_add_data_to_data_list(page=page_shop_2, pre_page=page_shop_1, data_list=shop_data_list, extra_pagefunc=extra_pagefunc ) except Exception as e: self.error_log(e=str(e))
def get_shop_info_list(self): #开启第一个页面使用fast_get_page() time.sleep(1) self.fast_get_page(url='http://piao.qunar.com/', is_scroll_to_bottom=False) self.until_scroll_to_center_send_text_by_css_selector( css_selector="#searchValue", text=self.data_region) time.sleep(3) self.until_scroll_to_center_click_by_css_selector( css_selector='#searchBtn') self.until_click_no_next_page_by_css_selector( nextpagesetup=NextPageCssSelectorSetup( css_selector='#pager-container > div > a.next', pre_pagefunc=PageFunc(func=self.driver.refresh), main_pagefunc=PageFunc(func=self.get_shop_info)))
def get_shop_info_list(self): self.driver.get('https://www.baidu.com') self.fast_new_page('http://hotels.ctrip.com/', is_scroll_to_bottom=False) self.until_scroll_to_center_send_text_by_css_selector( css_selector="#txtCity", text=self.data_region) time.sleep(3) self.until_scroll_to_center_send_enter_by_css_selector( css_selector="#txtCity") time.sleep(2) self.fast_click_same_page_by_css_selector( click_css_selector='#btnSearch') self.until_click_no_next_page_by_css_selector( nextpagesetup=NextPageCssSelectorSetup( css_selector='#downHerf.c_down', main_pagefunc=PageFunc(func=self.get_shop_info)))
def get_shop_info_list(self): self.fast_get_page(url='https://www.baidu.com') time.sleep(2) self.until_scroll_to_center_send_text_by_css_selector(css_selector='#kw', text=self.data_region + self.data_website) self.until_scroll_to_center_send_enter_by_css_selector(css_selector='#kw') self.fast_click_first_item_page_by_partial_link_text(link_text=self.data_website) with open('./cookies/dianping_cookies.json', 'r', encoding='utf-8') as f: listCookies = json.loads(f.read()) for cookie in listCookies: self.driver.add_cookie(cookie) self.close_curr_page() self.fast_click_first_item_page_by_partial_link_text(link_text=self.data_website) time.sleep(2) self.fast_click_first_item_page_by_partial_link_text(link_text='酒店') self.debug_log(data='暂停20秒......') time.sleep(5) self.until_click_no_next_page_by_css_selector(nextpagesetup=NextPageCssSelectorSetup(css_selector='#poi-list > div.content-wrap > div > div.page > a.next', main_pagefunc=PageFunc(func=self.get_shop_info), is_next=False))
def get_shop_info(self): try: shop_data_list = self.from_page_get_data_list(page=page_shop_1) nextpagesetup = NextPageCssSelectorSetup( css_selector= '#divCtripComment > div.c_page_box > div > a.c_down', page=page_comment_1, pause_time=2, pre_pagefunc=PageFunc(func=self.change_index_sort)) extra_pagefunc = PageFunc( func=self.get_newest_comment_data_by_css_selector, nextpagesetup=nextpagesetup) self.from_page_add_data_to_data_list(page=page_shop_2, pre_page=page_shop_1, data_list=shop_data_list, extra_pagefunc=extra_pagefunc) except Exception as e: self.error_log(e=str(e))
def get_shop_info_list(self): self.driver.get('https://www.baidu.com') self.fast_new_page(url='http://piao.ctrip.com/', is_scroll_to_bottom=False) self.driver.refresh() self.until_scroll_to_center_send_text_by_css_selector( css_selector="#mainInput", text=self.data_region) time.sleep(3) self.until_scroll_to_center_send_enter_by_css_selector( css_selector="#mainInput") self.until_click_no_next_page_by_css_selector( nextpagesetup=NextPageCssSelectorSetup( css_selector= '#searchResultContainer > div.pkg_page.basefix > a.down', stop_css_selector= '#searchResultContainer > div.pkg_page.basefix > a.down.down_nocurrent', pre_pagefunc=PageFunc(func=self.driver.refresh), main_pagefunc=PageFunc(func=self.get_shop_info)))
def get_shop_info_list(self): self.fast_get_page('http://piao.qunar.com/', is_max=False) time.sleep(2) # self.fast_click_page_by_css_selector('#js_nva_cgy > li.c_piao.js-searchnav.cur > a') self.until_send_text_by_css_selector(css_selector='#searchValue', text=self.data_region) # self.until_send_enter_by_css_selector(css_selector='#searchValue') self.fast_click_page_by_css_selector('#searchBtn') # self.until_send_enter_by_css_selector(css_selector='#js-piao-ticket > div.qcbox > div.qunar-qcbox > input') time.sleep(10) self.until_click_no_next_page_by_css_selector( nextpagesetup=NextPageCssSelectorSetup( css_selector='#pager-container > div > a.next', stop_css_selector='#pager-container > div > a.next.hidden', main_pagefunc=PageFunc(func=self.from_page_get_data_list, page=page_shop_1)))
def get_shop_info(self): try: shop_data_list = self.from_page_get_data_list(page=page_shop_1) nextpagesetup = NextPageCssSelectorSetup( css_selector= '#comment_main > div > div.wrapper > div.b_ugcpager.clrfix.js-pager-container > div.js-pager > div > a.next > span', stop_css_selector= '#comment_main > div > div.wrapper > div.b_ugcpager.clrfix.js-pager-container > div.js-pager > div > a.next > span.hidden', page=page_comment_1, pause_time=2) extra_pagefunc = PageFunc( func=self.get_newest_comment_data_by_css_selector, nextpagesetup=nextpagesetup) self.from_page_add_data_to_data_list(page=page_shop_2, pre_page=page_shop_1, data_list=shop_data_list, extra_pagefunc=extra_pagefunc) except Exception as e: self.error_log(e=str(e))
def get_shop_info(self): try: shop_data_list = self.from_page_get_data_list(page=page_shop_1) # root > div > div > div > div > div:nth-child(3) > div.main-bd > div > div:nth-child(5) > div.detail-left > div.content-wrapper.clearfix > ul.pkg_page > a.down # root > div > div > div > div > div:nth-child(3) > div.main-bd > div > div:nth-child(5) > div.detail-left > div.content-wrapper.clearfix > ul.pkg_page > a.down.disabled.nocurrent nextpagesetup = NextPageCssSelectorSetup( css_selector= 'div.detail-left > div.content-wrapper.clearfix > ul.pkg_page > a.down', stop_css_selector= 'div.detail-left > div.content-wrapper.clearfix > ul.pkg_page > a.down.disabled.nocurrent', page=page_comment_1, pause_time=2) extra_pagefunc = PageFunc( func=self.get_newest_comment_data_by_css_selector, nextpagesetup=nextpagesetup) self.from_page_add_data_to_data_list(page=page_shop_2, pre_page=page_shop_1, data_list=shop_data_list, extra_pagefunc=extra_pagefunc) except Exception as e: self.error_log(e=str(e))
def get_shop_info_list(self): self.fast_get_page(url='http://hotel.tuniu.com/', is_scroll_to_bottom=False) city = self.get_city_from_region_CHN(self.data_region) self.until_send_text_by_css_selector(css_selector='#txtCity',text=city) time.sleep(5) self.fast_click_page_by_css_selector('#search_hotel') self.until_send_text_by_css_selector(css_selector='#keyWord', text=self.data_region) time.sleep(5) self.fast_click_page_by_css_selector('#search_hotel') # self.fast_click_page_by_css_selector( # '#topContainer > div > div > div.box.clearfix.topnav.common > a:nth-child(14)',is_scroll_to_bottom=True) time.sleep(10) self.until_click_no_next_page_by_css_selector( nextpagesetup=NextPageCssSelectorSetup(css_selector='#main > div.pagination.clearfix > div > span.next',stop_css_selector='span.next > i.hidden', main_pagefunc=PageFunc(func=self.get_shop_info)))
def get_shop_comment(self): self.fast_new_page(url='http://www.baidu.com'); shop_collcetion = Mongodb(db=TravelDriver.db, collection=TravelDriver.shop_collection, ).get_collection() shop_name_url_list = list() for i in shop_collcetion.find(self.get_data_key()): if i.get('shop_comment_url'): shop_name_url_list.append((i.get('shop_name'),i.get('shop_comment_url'))) for i in range(len(shop_name_url_list)): self.info_log(data='第%s个,%s'%(i+1, shop_name_url_list[i][0])) self.fast_new_page(url=shop_name_url_list[i][1],is_scroll_to_bottom=False) time.sleep(3) self.driver.find_element_by_link_text(link_text='默认排序').click(); time.sleep(2) self.driver.find_element_by_link_text(link_text='最新点评').click(); time.sleep(5) # while (True): # self.is_ready_by_proxy_ip() # self.switch_window_by_index(index=-1) # self.deal_with_failure_page() # self.fast_new_page(url=shop_name_url_list[i][1]) # time.sleep(1) # self.switch_window_by_index(index=-1) # 页面选择 # if '验证中心' in self.driver.title: # self.info_log(data='关闭验证页面!!!') # self.close_curr_page() # else: # break self.until_click_no_next_page_by_css_selector(nextpagesetup=NextPageCssSelectorSetup( css_selector='#review-list > div.review-list-container > div.review-list-main > div.reviews-wrapper > div.bottom-area.clearfix > div > a.NextPage', stop_css_selector='#review-list > div.review-list-container > div.review-list-main > div.reviews-wrapper > div.bottom-area.clearfix > div > a.NextPage.hidden', main_pagefunc=PageFunc( func=self.from_page_get_data_list, page=page_comment_1), pause_time=5)) self.close_curr_page()
def get_comment_list(self): self.fast_new_page(url="http://www.baidu.com") shop_collcetion = Mongodb(db=TravelDriver.db, collection=TravelDriver.shop_collection, host='localhost').get_collection() shop_name_url_list = list() for i in shop_collcetion.find(self.get_data_key()): if i.get('shop_url'): shop_name_url_list.append( (i.get('shop_name'), i.get('shop_url'))) for i in range(len(shop_name_url_list)): self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0])) # while (True): # self.is_ready_by_proxy_ip() # self.switch_window_by_index(index=-1) # self.deal_with_failure_page() # self.fast_new_page(url=shop_name_url_list[i][1]) # time.sleep(1) # self.switch_window_by_index(index=-1) # 页面选择 # if '请求数据错误' in self.driver.title: # self.info_log(data='关闭验证页面!!!') # self.close_curr_page() # else: # break self.fast_new_page(url=shop_name_url_list[i][1]) self.until_click_no_next_page_by_css_selector( nextpagesetup=NextPageCssSelectorSetup( css_selector= '#allCmtComment > div.paging.orangestyle > div > a.nextpage', stop_css_selector= '#allCmtComment > div.paging.orangestyle > div > a.nextpage.hidden', main_pagefunc=PageFunc(func=self.from_page_get_data_list, page=page_comment_1), pause_time=5)) self.close_curr_page()