예제 #1
0
    def get_shop_comment(self):
        self.fast_new_page(url='http://www.baidu.com')
        shop_collcetion = Mongodb(db=TravelDriver.db,
                                  collection=TravelDriver.shop_collection,
                                  host='10.1.17.15').get_collection()
        shop_name_url_list = list()
        for i in shop_collcetion.find(self.get_data_key()):
            if i.get('shop_url'):
                shop_name_url_list.append(
                    (i.get('shop_name'), i.get('shop_url')))
        for i in range(len(shop_name_url_list)):

            self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0]))
            #第一次打开的时候进行验证 后面都不需要

            while (True):
                self.is_ready_by_proxy_ip()
                self.switch_window_by_index(index=-1)
                self.deal_with_failure_page()
                self.fast_new_page(url=shop_name_url_list[i][1])
                time.sleep(1)
                self.switch_window_by_index(index=-1)  # 页面选择
                if '验证中心' in self.driver.title:
                    self.info_log(data='关闭验证页面!!!')
                    self.close_curr_page()
                else:

                    break
            self.until_click_no_next_page_by_css_selector(
                nextpagesetup=NextPageCssSelectorSetup(
                    css_selector='#remark_page > a.page-next',
                    stop_css_selector='#remark_page > a.page-next.hidden',
                    main_pagefunc=PageFunc(func=self.from_page_get_data_list,
                                           page=page_comment_1)))
            self.close_curr_page()
예제 #2
0
 def get_comment_list(self):
     self.fast_new_page(url="http://www.baidu.com")
     shop_collcetion = Mongodb(db=TravelDriver.db,
                               collection=TravelDriver.shop_collection,
                               host='localhost').get_collection()
     shop_name_url_list = list()
     for i in shop_collcetion.find(self.get_data_key()):
         if i.get('shop_url'):
             shop_name_url_list.append(
                 (i.get('shop_name'), i.get('shop_url')))
     for i in range(len(shop_name_url_list)):
         self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0]))
         self.shop_name = shop_name_url_list[i][0]
         self.fast_new_page("http://www.baidu.com")
         self.fast_new_page(url=shop_name_url_list[i][1])
         self.until_click_no_next_page_by_css_selector(
             nextpagesetup=NextPageCssSelectorSetup(
                 css_selector=
                 '#allCmtComment > div.paging.orangestyle > div > a.nextpage',
                 stop_css_selector=
                 '#allCmtComment > div.paging.orangestyle > div > a.nextpage.hidden',
                 main_pagefunc=PageFunc(func=self.from_page_get_data_list,
                                        page=page_comment_1),
                 pause_time=2))
         self.close_curr_page()
 def get_shop_info(self):
     try:
         shop_data_list = self.from_page_get_data_list(page=page_shop_1)
         nextpagesetup = NextPageCssSelectorSetup(css_selector='#review-list > div.review-list-container > div.review-list-main > div.reviews-wrapper > div.bottom-area.clearfix > div > a.NextPage',page=page_comment_1, pause_time=2, pre_pagefunc=PageFunc(func=self.more_comment), after_pagefunc=PageFunc(func=self.close_curr_page))
         extra_pagefunc = PageFunc(func=self.get_newest_comment_data_by_css_selector, nextpagesetup=nextpagesetup, shop_name_css_selector='#poi-detail > div.container > div.base-info > div.main-detail.clearfix > div.main-detail-left > div.main-detail-left-top.clearfix > div.hotel-detail-info > div > h1', is_effective=False)
         self.from_page_add_data_to_data_list(page=page_shop_2, pre_page=page_shop_1, data_list=shop_data_list, extra_pagefunc=extra_pagefunc)
     except Exception as e:
         self.error_log(e=str(e))
예제 #4
0
 def get_shop_info(self):
     try:
         shop_data_list = self.from_page_get_data_list(page=page_shop_1)
         nextpagesetup = NextPageCssSelectorSetup(
             css_selector='div._j_commentlist > div.m-pagination > a.pi.pg-next',
             stop_css_selector='div._j_commentlist > div.m-pagination > a.pi.pg-last',
             page=page_comment_1, pause_time=2)
         # extra_pagefunc = PageFunc(func=self.get_newest_comment_data_by_css_selector, nextpagesetup=nextpagesetup)
         self.from_page_add_data_to_data_list(page=page_shop_2, pre_page=page_shop_1, data_list=shop_data_list)
     except Exception as e:
         self.error_log(e=str(e))
예제 #5
0
    def get_shop_info(self):
        try:

            shop_data_list = self.from_page_get_data_list(page=page_shop_1)
            nextpagesetup = NextPageCssSelectorSetup(
                css_selector='#comment_paging > a.page_next'
                ,
                stop_css_selector='#comment_paging > a.page_next.hidden',
                page=page_comment_1, pause_time=2)
            extra_pagefunc = PageFunc(func=self.get_newest_comment_data_by_css_selector, nextpagesetup=nextpagesetup)
            self.from_page_add_data_to_data_list(page=page_shop_2, pre_page=page_shop_1, data_list=shop_data_list,
                                                 extra_pagefunc=extra_pagefunc
                                                 )
        except Exception as e:
            self.error_log(e=str(e))
예제 #6
0
 def get_shop_info_list(self):
     #开启第一个页面使用fast_get_page()
     time.sleep(1)
     self.fast_get_page(url='http://piao.qunar.com/',
                        is_scroll_to_bottom=False)
     self.until_scroll_to_center_send_text_by_css_selector(
         css_selector="#searchValue", text=self.data_region)
     time.sleep(3)
     self.until_scroll_to_center_click_by_css_selector(
         css_selector='#searchBtn')
     self.until_click_no_next_page_by_css_selector(
         nextpagesetup=NextPageCssSelectorSetup(
             css_selector='#pager-container > div > a.next',
             pre_pagefunc=PageFunc(func=self.driver.refresh),
             main_pagefunc=PageFunc(func=self.get_shop_info)))
예제 #7
0
 def get_shop_info_list(self):
     self.driver.get('https://www.baidu.com')
     self.fast_new_page('http://hotels.ctrip.com/',
                        is_scroll_to_bottom=False)
     self.until_scroll_to_center_send_text_by_css_selector(
         css_selector="#txtCity", text=self.data_region)
     time.sleep(3)
     self.until_scroll_to_center_send_enter_by_css_selector(
         css_selector="#txtCity")
     time.sleep(2)
     self.fast_click_same_page_by_css_selector(
         click_css_selector='#btnSearch')
     self.until_click_no_next_page_by_css_selector(
         nextpagesetup=NextPageCssSelectorSetup(
             css_selector='#downHerf.c_down',
             main_pagefunc=PageFunc(func=self.get_shop_info)))
 def get_shop_info_list(self):
     self.fast_get_page(url='https://www.baidu.com')
     time.sleep(2)
     self.until_scroll_to_center_send_text_by_css_selector(css_selector='#kw', text=self.data_region + self.data_website)
     self.until_scroll_to_center_send_enter_by_css_selector(css_selector='#kw')
     self.fast_click_first_item_page_by_partial_link_text(link_text=self.data_website)
     with open('./cookies/dianping_cookies.json', 'r', encoding='utf-8') as f:
         listCookies = json.loads(f.read())
     for cookie in listCookies:
         self.driver.add_cookie(cookie)
     self.close_curr_page()
     self.fast_click_first_item_page_by_partial_link_text(link_text=self.data_website)
     time.sleep(2)
     self.fast_click_first_item_page_by_partial_link_text(link_text='酒店')
     self.debug_log(data='暂停20秒......')
     time.sleep(5)
     self.until_click_no_next_page_by_css_selector(nextpagesetup=NextPageCssSelectorSetup(css_selector='#poi-list > div.content-wrap > div > div.page > a.next', main_pagefunc=PageFunc(func=self.get_shop_info), is_next=False))
예제 #9
0
 def get_shop_info(self):
     try:
         shop_data_list = self.from_page_get_data_list(page=page_shop_1)
         nextpagesetup = NextPageCssSelectorSetup(
             css_selector=
             '#divCtripComment > div.c_page_box > div > a.c_down',
             page=page_comment_1,
             pause_time=2,
             pre_pagefunc=PageFunc(func=self.change_index_sort))
         extra_pagefunc = PageFunc(
             func=self.get_newest_comment_data_by_css_selector,
             nextpagesetup=nextpagesetup)
         self.from_page_add_data_to_data_list(page=page_shop_2,
                                              pre_page=page_shop_1,
                                              data_list=shop_data_list,
                                              extra_pagefunc=extra_pagefunc)
     except Exception as e:
         self.error_log(e=str(e))
예제 #10
0
 def get_shop_info_list(self):
     self.driver.get('https://www.baidu.com')
     self.fast_new_page(url='http://piao.ctrip.com/',
                        is_scroll_to_bottom=False)
     self.driver.refresh()
     self.until_scroll_to_center_send_text_by_css_selector(
         css_selector="#mainInput", text=self.data_region)
     time.sleep(3)
     self.until_scroll_to_center_send_enter_by_css_selector(
         css_selector="#mainInput")
     self.until_click_no_next_page_by_css_selector(
         nextpagesetup=NextPageCssSelectorSetup(
             css_selector=
             '#searchResultContainer > div.pkg_page.basefix > a.down',
             stop_css_selector=
             '#searchResultContainer > div.pkg_page.basefix > a.down.down_nocurrent',
             pre_pagefunc=PageFunc(func=self.driver.refresh),
             main_pagefunc=PageFunc(func=self.get_shop_info)))
예제 #11
0
    def get_shop_info_list(self):
        self.fast_get_page('http://piao.qunar.com/', is_max=False)
        time.sleep(2)
        #  self.fast_click_page_by_css_selector('#js_nva_cgy > li.c_piao.js-searchnav.cur > a')
        self.until_send_text_by_css_selector(css_selector='#searchValue',
                                             text=self.data_region)
        # self.until_send_enter_by_css_selector(css_selector='#searchValue')

        self.fast_click_page_by_css_selector('#searchBtn')
        # self.until_send_enter_by_css_selector(css_selector='#js-piao-ticket > div.qcbox > div.qunar-qcbox > input')

        time.sleep(10)
        self.until_click_no_next_page_by_css_selector(
            nextpagesetup=NextPageCssSelectorSetup(
                css_selector='#pager-container > div > a.next',
                stop_css_selector='#pager-container > div > a.next.hidden',
                main_pagefunc=PageFunc(func=self.from_page_get_data_list,
                                       page=page_shop_1)))
예제 #12
0
    def get_shop_info(self):
        try:
            shop_data_list = self.from_page_get_data_list(page=page_shop_1)

            nextpagesetup = NextPageCssSelectorSetup(
                css_selector=
                '#comment_main > div > div.wrapper > div.b_ugcpager.clrfix.js-pager-container > div.js-pager > div > a.next > span',
                stop_css_selector=
                '#comment_main > div > div.wrapper > div.b_ugcpager.clrfix.js-pager-container > div.js-pager > div > a.next > span.hidden',
                page=page_comment_1,
                pause_time=2)
            extra_pagefunc = PageFunc(
                func=self.get_newest_comment_data_by_css_selector,
                nextpagesetup=nextpagesetup)
            self.from_page_add_data_to_data_list(page=page_shop_2,
                                                 pre_page=page_shop_1,
                                                 data_list=shop_data_list,
                                                 extra_pagefunc=extra_pagefunc)

        except Exception as e:
            self.error_log(e=str(e))
예제 #13
0
 def get_shop_info(self):
     try:
         shop_data_list = self.from_page_get_data_list(page=page_shop_1)
         # root > div > div > div > div > div:nth-child(3) > div.main-bd > div > div:nth-child(5) > div.detail-left > div.content-wrapper.clearfix > ul.pkg_page > a.down
         # root > div > div > div > div > div:nth-child(3) > div.main-bd > div > div:nth-child(5) > div.detail-left > div.content-wrapper.clearfix > ul.pkg_page > a.down.disabled.nocurrent
         nextpagesetup = NextPageCssSelectorSetup(
             css_selector=
             'div.detail-left > div.content-wrapper.clearfix > ul.pkg_page > a.down',
             stop_css_selector=
             'div.detail-left > div.content-wrapper.clearfix > ul.pkg_page > a.down.disabled.nocurrent',
             page=page_comment_1,
             pause_time=2)
         extra_pagefunc = PageFunc(
             func=self.get_newest_comment_data_by_css_selector,
             nextpagesetup=nextpagesetup)
         self.from_page_add_data_to_data_list(page=page_shop_2,
                                              pre_page=page_shop_1,
                                              data_list=shop_data_list,
                                              extra_pagefunc=extra_pagefunc)
     except Exception as e:
         self.error_log(e=str(e))
예제 #14
0
    def get_shop_info_list(self):
            self.fast_get_page(url='http://hotel.tuniu.com/', is_scroll_to_bottom=False)

            city = self.get_city_from_region_CHN(self.data_region)

            self.until_send_text_by_css_selector(css_selector='#txtCity',text=city)
            time.sleep(5)
            self.fast_click_page_by_css_selector('#search_hotel')
            self.until_send_text_by_css_selector(css_selector='#keyWord', text=self.data_region)
            time.sleep(5)
            self.fast_click_page_by_css_selector('#search_hotel')
            # self.fast_click_page_by_css_selector(
            #     '#topContainer > div > div > div.box.clearfix.topnav.common > a:nth-child(14)',is_scroll_to_bottom=True)


            time.sleep(10)


            self.until_click_no_next_page_by_css_selector(
                nextpagesetup=NextPageCssSelectorSetup(css_selector='#main > div.pagination.clearfix > div > span.next',stop_css_selector='span.next > i.hidden',
                                                       main_pagefunc=PageFunc(func=self.get_shop_info)))
예제 #15
0
    def get_shop_comment(self):
        self.fast_new_page(url='http://www.baidu.com');
        shop_collcetion = Mongodb(db=TravelDriver.db, collection=TravelDriver.shop_collection,
                                 ).get_collection()
        shop_name_url_list = list()
        for i in shop_collcetion.find(self.get_data_key()):
            if i.get('shop_comment_url'):
                shop_name_url_list.append((i.get('shop_name'),i.get('shop_comment_url')))
        for i in range(len(shop_name_url_list)):

            self.info_log(data='第%s个,%s'%(i+1, shop_name_url_list[i][0]))

            self.fast_new_page(url=shop_name_url_list[i][1],is_scroll_to_bottom=False)
            time.sleep(3)
            self.driver.find_element_by_link_text(link_text='默认排序').click();
            time.sleep(2)
            self.driver.find_element_by_link_text(link_text='最新点评').click();
            time.sleep(5)
            # while (True):
            #         self.is_ready_by_proxy_ip()
            #         self.switch_window_by_index(index=-1)
            #         self.deal_with_failure_page()
            #         self.fast_new_page(url=shop_name_url_list[i][1])
            #         time.sleep(1)
            #         self.switch_window_by_index(index=-1)  # 页面选择
            #         if '验证中心' in self.driver.title:
            #               self.info_log(data='关闭验证页面!!!')
            #               self.close_curr_page()
            #         else:
            #           break

            self.until_click_no_next_page_by_css_selector(nextpagesetup=NextPageCssSelectorSetup(
                css_selector='#review-list > div.review-list-container > div.review-list-main > div.reviews-wrapper > div.bottom-area.clearfix > div > a.NextPage',
                stop_css_selector='#review-list > div.review-list-container > div.review-list-main > div.reviews-wrapper > div.bottom-area.clearfix > div > a.NextPage.hidden',
                main_pagefunc=PageFunc(
                    func=self.from_page_get_data_list,
                    page=page_comment_1), pause_time=5))
            self.close_curr_page()
예제 #16
0
    def get_comment_list(self):
        self.fast_new_page(url="http://www.baidu.com")
        shop_collcetion = Mongodb(db=TravelDriver.db,
                                  collection=TravelDriver.shop_collection,
                                  host='localhost').get_collection()
        shop_name_url_list = list()
        for i in shop_collcetion.find(self.get_data_key()):
            if i.get('shop_url'):
                shop_name_url_list.append(
                    (i.get('shop_name'), i.get('shop_url')))
        for i in range(len(shop_name_url_list)):
            self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0]))
            # while (True):
            #     self.is_ready_by_proxy_ip()
            #     self.switch_window_by_index(index=-1)
            #     self.deal_with_failure_page()
            #     self.fast_new_page(url=shop_name_url_list[i][1])
            #     time.sleep(1)
            #     self.switch_window_by_index(index=-1)  # 页面选择
            #     if '请求数据错误' in self.driver.title:
            #         self.info_log(data='关闭验证页面!!!')
            #         self.close_curr_page()
            #     else:
            #         break

            self.fast_new_page(url=shop_name_url_list[i][1])
            self.until_click_no_next_page_by_css_selector(
                nextpagesetup=NextPageCssSelectorSetup(
                    css_selector=
                    '#allCmtComment > div.paging.orangestyle > div > a.nextpage',
                    stop_css_selector=
                    '#allCmtComment > div.paging.orangestyle > div > a.nextpage.hidden',
                    main_pagefunc=PageFunc(func=self.from_page_get_data_list,
                                           page=page_comment_1),
                    pause_time=5))
            self.close_curr_page()