def get_shop_comment(self):
        self.fast_new_page(url='http://www.baidu.com')
        shop_collcetion = Mongodb(db=TravelDriver.db,
                                  collection=TravelDriver.shop_collection,
                                  host='10.1.17.15').get_collection()
        shop_name_url_list = list()
        for i in shop_collcetion.find(self.get_data_key()):
            if i.get('shop_url'):
                shop_name_url_list.append(
                    (i.get('shop_name'), i.get('shop_url')))
        for i in range(len(shop_name_url_list)):

            self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0]))
            #第一次打开的时候进行验证 后面都不需要

            while (True):
                self.is_ready_by_proxy_ip()
                self.switch_window_by_index(index=-1)
                self.deal_with_failure_page()
                self.fast_new_page(url=shop_name_url_list[i][1])
                time.sleep(1)
                self.switch_window_by_index(index=-1)  # 页面选择
                if '验证中心' in self.driver.title:
                    self.info_log(data='关闭验证页面!!!')
                    self.close_curr_page()
                else:

                    break
            self.until_click_no_next_page_by_css_selector(
                nextpagesetup=NextPageCssSelectorSetup(
                    css_selector='#remark_page > a.page-next',
                    stop_css_selector='#remark_page > a.page-next.hidden',
                    main_pagefunc=PageFunc(func=self.from_page_get_data_list,
                                           page=page_comment_1)))
            self.close_curr_page()
Beispiel #2
0
    def get_comment_info_list(self):
        self.fast_new_page(url='http://www.baidu.com')
        shop_collcetion = Mongodb(db=TravelDriver.db,
                                  collection=TravelDriver.shop_collection,
                                  host='localhost').get_collection()
        shop_name_url_list = list()
        for i in shop_collcetion.find(self.get_data_key()):
            if i.get('shop_url'):
                shop_name_url_list.append(
                    (i.get('shop_name'), i.get('shop_comment_url')))

        for i in range(len(shop_name_url_list)):
            self.fast_new_page(url="https://www.baidu.com")
            # 可能会有反爬
            self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0]))
            self.fast_new_page(url=shop_name_url_list[i][1])

            self.shop_name = shop_name_url_list[i][0]

            time.sleep(5)

            self.until_click_no_next_page_by_partial_link_text(
                nextpagesetup=NextPageLinkTextSetup(
                    link_text='下一页',
                    main_pagefunc=PageFunc(func=self.from_page_get_data_list,
                                           page=page_comment_1),
                    pause_time=2))
            self.close_curr_page()
Beispiel #3
0
    def get_comment_info_list(self):
        #打开知道
        self.fast_new_page('http://www.baidu.com')
        shop_collcetion = Mongodb(db=TravelDriver.db,
                                  collection=TravelDriver.shop_collection,
                                  host='localhost').get_collection()
        shop_name_url_list = list()
        for i in shop_collcetion.find(self.get_data_key()):
            if i.get('shop_comment_url'):
                shop_name_url_list.append(
                    (i.get('shop_name'), i.get('shop_comment_url')))

        for i in range(len(shop_name_url_list)):
            #可能会有反爬
            self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0]))
            self.fast_new_page('http://www.baidu.com')
            self.fast_new_page(url=shop_name_url_list[i][1])
            self.shop_name = shop_name_url_list[i][0]
            comment_data_list = self.from_page_get_data_list(
                page=page_comment_1)
            # for j in range(0,len(comment_data_list)):
            #     comment_data_list[j]['shop_name'] = shop_name_url_list[i][0]
            #     self.save_data_to_mongodb(fieldlist= fl_comment1, mongodb=Mongodb(db=TravelDriver.db, collection=TravelDriver.comments_collection), data=comment_data_list[j])
            # print(comment_data_list)
            self.close_curr_page()
Beispiel #4
0
 def get_shop_comment(self):
     shop_collcetion = Mongodb(db=TravelDriver.db,
                               collection=TravelDriver.shop_collection,
                               host='10.1.17.15').get_collection()
     shop_name_url_list = list()
     for i in shop_collcetion.find(self.get_data_key()):
         if i.get('shop_comment_url'):
             shop_name_url_list.append(
                 (i.get('shop_name'), i.get('shop_comment_url')))
     for i in range(len(shop_name_url_list)):
         self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0]))
         while (True):
             self.is_ready_by_proxy_ip()
             self.switch_window_by_index(index=-1)
             self.deal_with_failure_page()
             self.fast_new_page(url=shop_name_url_list[i][1])
             time.sleep(1)
             self.switch_window_by_index(index=-1)  # 页面选择
             if '验证中心' in self.driver.title:
                 self.info_log(data='关闭验证页面!!!')
                 self.close_curr_page()
             else:
                 break
         self.until_click_no_next_page_by_partial_link_text(
             nextpagesetup=NextPageLinkTextSetup(
                 link_text='下一页',
                 main_pagefunc=PageFunc(func=self.from_page_get_data_list,
                                        page=page_comment_1)))
         self.close_curr_page()
    def get_shop_des(self):
        self.fast_new_page(url="http://www.baidu.com")
        shop_collcetion = Mongodb(db=TravelDriver.db,
                                  collection=TravelDriver.shop_collection,
                                  host='localhost').get_collection()
        shop_name_url_list = list()
        for i in shop_collcetion.find(self.get_data_key()):
            # if i.get('shop_url') and (i.get('shop_flag') == None or i.get("shop_flag") == "0" or i.get("shop_flag") == ""
            #
            # or( i.get("shop_comment_num") == 0 and i.get("shop_score") > 0)
            # ):

            shop_name_url_list.append((i.get('shop_name'), i.get('shop_url')))
        for i in range(len(shop_name_url_list)):
            # self.fast_new_page(url='https://www.baidu.com');

            self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0]))
            self.fast_new_page(url=shop_name_url_list[i][1],
                               is_scroll_to_bottom=True)
            time.sleep(5)
            data = self.from_fieldlist_get_data(page=detail_shop_2)
            self.update_data_to_mongodb(
                shop_collcetion,
                self.merge_dict(
                    self.get_data_key(),
                    {FieldName.SHOP_URL: shop_name_url_list[i][1]}), data)
            self.close_curr_page()
Beispiel #6
0
 def get_comment_list(self):
     self.fast_new_page(url="http://www.baidu.com")
     shop_collcetion = Mongodb(db=TravelDriver.db,
                               collection=TravelDriver.shop_collection,
                               host='localhost').get_collection()
     shop_name_url_list = list()
     for i in shop_collcetion.find(self.get_data_key()):
         if i.get('shop_url'):
             shop_name_url_list.append(
                 (i.get('shop_name'), i.get('shop_url')))
     for i in range(len(shop_name_url_list)):
         self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0]))
         self.shop_name = shop_name_url_list[i][0]
         self.fast_new_page("http://www.baidu.com")
         self.fast_new_page(url=shop_name_url_list[i][1])
         self.until_click_no_next_page_by_css_selector(
             nextpagesetup=NextPageCssSelectorSetup(
                 css_selector=
                 '#allCmtComment > div.paging.orangestyle > div > a.nextpage',
                 stop_css_selector=
                 '#allCmtComment > div.paging.orangestyle > div > a.nextpage.hidden',
                 main_pagefunc=PageFunc(func=self.from_page_get_data_list,
                                        page=page_comment_1),
                 pause_time=2))
         self.close_curr_page()
Beispiel #7
0
 def get_shop_detail(self):
     shop_collcetion = Mongodb(db=TravelDriver.db,
                               collection=TravelDriver.shop_collection,
                               host='10.1.17.15').get_collection()
     shop_url_set = set()
     for i in shop_collcetion.find(self.get_data_key()):
         shop_url_set.add(i.get(FieldName.SHOP_URL))
     count = 0
     for url in shop_url_set:
         print(count)
         count += 1
         while (True):
             self.is_ready_by_proxy_ip()
             self.switch_window_by_index(index=-1)
             self.deal_with_failure_page()
             self.fast_new_page(url=url)
             time.sleep(1)
             self.switch_window_by_index(index=-1)  # 页面选择
             if '验证中心' in self.driver.title:
                 self.info_log(data='关闭验证页面!!!')
                 self.close_curr_page()
             else:
                 break
         data = self.from_fieldlist_get_data(page=page_shop_2)
         self.update_data_to_mongodb(
             shop_collcetion,
             self.merge_dict(self.get_data_key(),
                             {FieldName.SHOP_URL: url}), data)
         self.close_curr_page()
    def get_comment_info_list(self):
        shop_collcetion = Mongodb(db=TravelDriver.db,
                                  collection=TravelDriver.shop_collection,
                                  host='localhost').get_collection()
        shop_name_url_list = list()
        for i in shop_collcetion.find(self.get_data_key()):
            if i.get('shop_comment_url'):
                shop_name_url_list.append(
                    (i.get('shop_name'), i.get('shop_comment_url')))

        for i in range(len(shop_name_url_list)):
            # 可能会有反爬
            self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0]))
            self.shop_name = shop_name_url_list[i][0]
            self.fast_new_page(url=shop_name_url_list[i][1])
            time.sleep(5)

            # main-page > header > h2 > div:nth-child(2)
            try:
                #查看是否有顶部按钮 有就点击
                dianping = self.driver.find_element_by_css_selector(
                    css_selector='#main-page > header > h2 > div:nth-child(2)')
                self.fast_click_same_page_by_css_selector(
                    click_css_selector=
                    '#main-page > header > h2 > div:nth-child(2)')

                time.sleep(6)
            except Exception as e:
                print(111)
            #点击最新的
            try:
                new = self.driver.find_element_by_xpath(
                    '//li[@data-tagtype="44"]')
                ActionChains(self.driver).click(new).perform()

                time.sleep(5)
            except Exception as e:
                print(222)

            #向下进行滚动
            try:
                button = self.driver.find_element_by_css_selector(
                    css_selector=
                    '#main-page > div.mp-comment-mpcon > div.mp-addcomment.mp-border-top > a > div'
                )

                Action = TouchActions(self.driver)
                Action.scroll_from_element(on_element=button,
                                           xoffset=0,
                                           yoffset=int(8000)).perform()
                time.sleep(5)
            except Exception as e:
                print(333)
            self.fast_click_same_page_by_css_selector(
                click_css_selector='#main-page > div.mp-gotop > div')
            time.sleep(6)
            comment_data_list = self.from_page_get_data_list(
                page=page_comment_1)
Beispiel #9
0
    def get_shop_detail(self):
        shop_collcetion = Mongodb(db=TravelDriver.db,
                                  collection=TravelDriver.shop_collection,
                                  host='localhost').get_collection()

        shop_url_set = set()
        for i in shop_collcetion.find(self.get_data_key()):
            shop_url_set.add(i.get(FieldName.SHOP_URL))

        for url in shop_url_set:
            self.fast_new_page(url=url)
            time.sleep(5)
            data = self.from_fieldlist_get_data(page=page_shop_2)
            self.update_data_to_mongodb(
                shop_collcetion,
                self.merge_dict(self.get_data_key(),
                                {FieldName.SHOP_URL: url}), data)
            self.close_curr_page()
    def get_comment_info_list(self):
        shop_collcetion = Mongodb(db=TravelDriver.db,
                                  collection=TravelDriver.shop_collection,
                                  host='localhost').get_collection()
        shop_name_url_list = list()
        for i in shop_collcetion.find(self.get_data_key()):
            if i.get('shop_comment_url'):
                shop_name_url_list.append(
                    (i.get('shop_name'), i.get('shop_comment_url')))

        for i in range(len(shop_name_url_list)):
            # 可能会有反爬
            self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0]))
            self.fast_new_page(url=shop_name_url_list[i][1])
            time.sleep(5)

            comment_data_list = self.from_page_get_data_list(
                page=page_comment_1)
            self.close_curr_page()
Beispiel #11
0
 def get_shop_list(subtype):
     shop_data_list = self.from_page_get_data_list(page=page_shop_1)
     for shop_data in shop_data_list:
         self.save_data_to_mongodb(
             fieldlist=fl_shop1,
             mongodb=Mongodb(db=TravelDriver.db,
                             collection=TravelDriver.shop_collection,
                             host='10.1.17.15'),
             data=self.merge_dict(shop_data, subtype),
             external_key_name=[FieldName.SUBTYPE_NAME])
Beispiel #12
0
    def get_comment_info_list(self):
       shop_collcetion = Mongodb(db=TravelDriver.db, collection=TravelDriver.shop_collection,
                                 host='localhost').get_collection()
       shop_name_url_list = list()
       for i in shop_collcetion.find(self.get_data_key()):
           if i.get('shop_url'):
               shop_name_url_list.append((i.get('shop_name'), i.get('shop_url')))

       for i in range(len(shop_name_url_list)):
           self.fast_new_page(url="https://www.baidu.com");
           # 可能会有反爬
           self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0]))
           self.fast_new_page(url=shop_name_url_list[i][1])
           self.shop_name = shop_name_url_list[i][0];
           time.sleep(5)
           self.driver.refresh()
           self.fast_click_same_page_by_css_selector(click_css_selector='#detail-placeholder > div.main > div.tabs-box > div:nth-child(1) > ul > li:nth-child(3)')

           comment_data_list = self.from_page_get_data_list(page=page_comment_1)
           self.close_curr_page()
Beispiel #13
0
    def get_shop_comment(self):
        shop_collcetion = Mongodb(db=TravelDriver.db,
                                  collection=TravelDriver.shop_collection,
                                  host='10.1.17.15').get_collection()
        shop_name_url_list = list()
        for i in shop_collcetion.find(self.get_data_key()):
            if i.get('shop_comment_url'):
                shop_name_url_list.append(
                    (i.get('shop_name'), i.get('shop_comment_url')))
        # for i in range(len(shop_name_url_list)):
        #     self.info_log(data='第%s个,%s'%(i+1, shop_name_url_list[i][0]))
        # while (True):
        #     self.is_ready_by_proxy_ip()
        #     self.switch_window_by_index(index=-1)
        #     self.deal_with_failure_page()
        #     self.fast_new_page(url=shop_name_url_list[i][1])
        #     time.sleep(1)
        #     self.switch_window_by_index(index=-1)  # 页面选择
        #     if '验证中心' in self.driver.title:
        #         self.info_log(data='关闭验证页面!!!')
        #         self.close_curr_page()
        #     else:
        #         break
        # dianping = self.driver.find_element_by_css_selector(
        #     css_selector='#review-list > div.review-list-container > div.review-list-main > div.reviews-wrapper > div.reviews-tags > div.content')
        # self.fast_click_same_page_by_css_selector(click_css_selector='#main-page > header > h2 > div:nth-child(2)')
            self.fast_new_page(
                url='http://www.dianping.com/shop/110833569/review_all')
            span = '#review-list > div.review-list-container > div.review-list-main > div.reviews-wrapper > div.reviews-tags > div.content > span:nth-child(' + str(
                1) + ')'
            links = self.driver.find_elements_by_css_selector(
                span)[0].get_attribute('text')

            self.until_click_no_next_page_by_partial_link_text(
                nextpagesetup=NextPageLinkTextSetup(
                    link_text='下一页',
                    main_pagefunc=PageFunc(func=self.from_page_get_data_list,
                                           page=page_comment_1)))
            self.close_curr_page()
    def get_shop_comment(self):
        self.fast_new_page(url='http://www.baidu.com');
        shop_collcetion = Mongodb(db=TravelDriver.db, collection=TravelDriver.shop_collection,
                                 ).get_collection()
        shop_name_url_list = list()
        for i in shop_collcetion.find(self.get_data_key()):
            if i.get('shop_comment_url'):
                shop_name_url_list.append((i.get('shop_name'),i.get('shop_comment_url')))
        for i in range(len(shop_name_url_list)):

            self.info_log(data='第%s个,%s'%(i+1, shop_name_url_list[i][0]))

            self.fast_new_page(url=shop_name_url_list[i][1],is_scroll_to_bottom=False)
            time.sleep(3)
            self.driver.find_element_by_link_text(link_text='默认排序').click();
            time.sleep(2)
            self.driver.find_element_by_link_text(link_text='最新点评').click();
            time.sleep(5)
            # while (True):
            #         self.is_ready_by_proxy_ip()
            #         self.switch_window_by_index(index=-1)
            #         self.deal_with_failure_page()
            #         self.fast_new_page(url=shop_name_url_list[i][1])
            #         time.sleep(1)
            #         self.switch_window_by_index(index=-1)  # 页面选择
            #         if '验证中心' in self.driver.title:
            #               self.info_log(data='关闭验证页面!!!')
            #               self.close_curr_page()
            #         else:
            #           break

            self.until_click_no_next_page_by_css_selector(nextpagesetup=NextPageCssSelectorSetup(
                css_selector='#review-list > div.review-list-container > div.review-list-main > div.reviews-wrapper > div.bottom-area.clearfix > div > a.NextPage',
                stop_css_selector='#review-list > div.review-list-container > div.review-list-main > div.reviews-wrapper > div.bottom-area.clearfix > div > a.NextPage.hidden',
                main_pagefunc=PageFunc(
                    func=self.from_page_get_data_list,
                    page=page_comment_1), pause_time=5))
            self.close_curr_page()
    def get_comment_list(self):
        self.fast_new_page(url="http://www.baidu.com")
        shop_collcetion = Mongodb(db=TravelDriver.db,
                                  collection=TravelDriver.shop_collection,
                                  host='localhost').get_collection()
        shop_name_url_list = list()
        for i in shop_collcetion.find(self.get_data_key()):
            if i.get('shop_url'):
                shop_name_url_list.append(
                    (i.get('shop_name'), i.get('shop_url')))
        for i in range(len(shop_name_url_list)):
            self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0]))
            # while (True):
            #     self.is_ready_by_proxy_ip()
            #     self.switch_window_by_index(index=-1)
            #     self.deal_with_failure_page()
            #     self.fast_new_page(url=shop_name_url_list[i][1])
            #     time.sleep(1)
            #     self.switch_window_by_index(index=-1)  # 页面选择
            #     if '请求数据错误' in self.driver.title:
            #         self.info_log(data='关闭验证页面!!!')
            #         self.close_curr_page()
            #     else:
            #         break

            self.fast_new_page(url=shop_name_url_list[i][1])
            self.until_click_no_next_page_by_css_selector(
                nextpagesetup=NextPageCssSelectorSetup(
                    css_selector=
                    '#allCmtComment > div.paging.orangestyle > div > a.nextpage',
                    stop_css_selector=
                    '#allCmtComment > div.paging.orangestyle > div > a.nextpage.hidden',
                    main_pagefunc=PageFunc(func=self.from_page_get_data_list,
                                           page=page_comment_1),
                    pause_time=5))
            self.close_curr_page()
    def get_shop_detial(self):
        self.fast_new_page(url="http://www.baidu.com")
        shop_collcetion = Mongodb(db=TravelDriver.db,
                                  collection=TravelDriver.shop_collection,
                                  host='localhost').get_collection()
        shop_name_url_list = list()
        for i in shop_collcetion.find(self.get_data_key()):
            if i.get('shop_url'):
                shop_name_url_list.append(
                    (i.get('shop_name'), i.get('shop_url')))
        for i in range(len(shop_name_url_list)):
            self.fast_new_page(url='http://www.baidu.com')
            self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0]))
            # while (True):
            #     self.is_ready_by_proxy_ip()
            #     self.switch_window_by_index(index=-1)
            #     self.deal_with_failure_page()
            #     self.fast_new_page(url=shop_name_url_list[i][1])
            #     time.sleep(1)
            #     self.switch_window_by_index(index=-1)  # 页面选择
            #     if '请求数据错误' in self.driver.title:
            #         self.info_log(data='关闭验证页面!!!')
            #         self.close_curr_page()
            #     else:
            #         break

            self.fast_new_page(url=shop_name_url_list[i][1])
            self.fast_click_first_item_same_page_by_partial_link_text(
                link_text='只看文字')
            data = self.from_fieldlist_get_data(page=page_shop_2)
            self.update_data_to_mongodb(
                shop_collcetion,
                self.merge_dict(
                    self.get_data_key(),
                    {FieldName.SHOP_URL: shop_name_url_list[i][1]}), data)
            self.close_curr_page()
Beispiel #17
0
    for i in p('div.htl_info_table > table > tbody').items('tr'):
        item = (lambda x: x if x else '')(i.text()).split('\n')
        if len(item) >= 2:
            around.setdefault(item[0], (lambda x: x[1:] if len(x) >= 2 else [''])(item))
    return json.dumps(around, ensure_ascii=False)

fl_shop2 = Fieldlist(
    Field(fieldname=FieldName.SHOP_ROOM_RECOMMEND_ALL,css_selector='#hotelRoomBox', attr='innerHTML', filter_func=get_recommend_all_room_dict, pause_time=1, is_focus=True),
    Field(fieldname=FieldName.SHOP_ROOM_FAVOURABLE,css_selector='#divDetailMain > div.htl_room_table',attr='innerHTML', filter_func=get_favourable_room, is_focus=True),
    Field(fieldname=FieldName.SHOP_INTRO, css_selector='#hotel_info_comment > div',attr='innerHTML', filter_func=get_hotel_intro, is_focus=True),
    Field(fieldname=FieldName.SHOP_PHONE, css_selector='#J_realContact', attr='data-real', regex='^([^<]*).*$', repl=r'\1', is_focus=True),
    Field(fieldname=FieldName.SHOP_STATISTICS, css_selector='#commentList > div.detail_cmt_box',attr='innerHTML',filter_func=get_shop_statistics, is_focus=True),
    Field(fieldname=FieldName.SHOP_AROUND_FACILITIES, css_selector='#hotel_info_comment > div', attr='innerHTML',filter_func=get_around_facilities, is_focus=True),
)

page_shop_1 = Page(name='携程酒店店铺列表页面', fieldlist=fl_shop1, listcssselector=ListCssSelector(list_css_selector='#hotel_list > div.hotel_new_list', item_css_selector='ul.hotel_item'), mongodb=Mongodb(db=TravelDriver.db, collection=TravelDriver.shop_collection))

page_shop_2 = Page(name='携程酒店店铺详情页面', fieldlist=fl_shop2, tabsetup=TabSetup(click_css_selector='li.hotel_price_icon > div.action_info > p > a'), mongodb=Mongodb(db=TravelDriver.db,collection=TravelDriver.shop_collection), is_save=True)

fl_comment1 = Fieldlist(
    Field(fieldname=FieldName.COMMENT_USER_NAME, css_selector='div.user_info.J_ctrip_pop > p.name'),
    Field(fieldname=FieldName.COMMENT_TIME, css_selector='div.comment_main > div.comment_txt > div.comment_bar > p > span', regex=r'[^\d-]*'),
    Field(fieldname=FieldName.SHOP_NAME, css_selector='#J_htl_info > div.name > h2.cn_n', is_isolated=True),
    Field(fieldname=FieldName.COMMENT_CONTENT, css_selector='div.comment_main > div.comment_txt > div.J_commentDetail'),
    Field(fieldname=FieldName.COMMENT_USER_IMG, css_selector='div.user_info.J_ctrip_pop > p.head > span > img', attr='src'),
    Field(fieldname=FieldName.COMMENT_USER_CHECK_IN, css_selector='div.comment_main > p > span.date'),
    Field(fieldname=FieldName.COMMENT_USER_ROOM, css_selector='div.comment_main > p > a'),
    Field(fieldname=FieldName.COMMENT_TYPE, css_selector='div.comment_main > p > span.type'),
    Field(fieldname=FieldName.COMMENT_SCORE, css_selector='div.comment_main > p > span.score', regex=r'[^\d.]*'),
    Field(fieldname=FieldName.COMMENT_SCORE_TEXT, css_selector='div.comment_main > p > span.small_c', attr='data-value'),
    Field(fieldname=FieldName.COMMENT_USER_NUM, css_selector='div.user_info.J_ctrip_pop > p.num'),
Beispiel #18
0
    Field(fieldname=FieldName.SHOP_TAG,
          css_selector='div.txt > span.comment-list',
          attr='innerHTML',
          filter_func=get_shop_tag,
          pause_time=1),
    Field(fieldname=FieldName.SUBTYPE_NAME,
          css_selector='div.txt > div.tag-addr > a:nth-child(1)',
          filter_func=get_shop_subtype_name),
    Field(fieldname=FieldName.SHOP_ADDRESS,
          css_selector='div.txt > div.tag-addr > span.addr'))

page_shop_1 = Page(name='大众点评爱车店铺列表页面',
                   fieldlist=fl_shop1,
                   listcssselector=ListCssSelector(
                       list_css_selector='#shop-all-list > ul > li'),
                   mongodb=Mongodb(db=TravelDriver.db,
                                   collection=TravelDriver.shop_collection),
                   is_save=True)


def get_shop_time(self, _str):
    try:
        p = PyQuery(_str)
        shop_time = ''
        for i in p('p.info.info-indent').items():
            if '营业时间' in i.text():
                shop_time = i.text()
        return shop_time
    except Exception:
        return ''

        if len(star) == 2:
            stars.setdefault(star[0], re.sub(r'[^\d]*', '', star[1]))
    tag_star_dict.setdefault('star', stars)
    return json.dumps(tag_star_dict, ensure_ascii=False)

fl_shop2 = Fieldlist(
    Field(fieldname=FieldName.SHOP_GRADE, css_selector='#poi-detail > div.container > div.base-info > div.main-detail.clearfix > div.main-detail-right > div.hotel-appraise > div.hotel-scope > span', pause_time=5, is_focus=True),
    Field(fieldname=FieldName.SHOP_PHONE, css_selector='#poi-detail > div.container > div.base-info > div.main-detail.clearfix > div.main-detail-left > div.main-detail-left-top.clearfix > div.hotel-detail-info > div > div.call-info > div > span.call-number', is_focus=True),
    Field(fieldname=FieldName.SHOP_ADDRESS, css_selector='#poi-detail > div.container > div.base-info > div.main-detail.clearfix > div.main-detail-left > div.main-detail-left-top.clearfix > div.hotel-detail-price > div.hotel-address-box.clearfix > span.hotel-address', is_focus=True),
    Field(fieldname=FieldName.SHOP_ROOM_RECOMMEND_ALL, css_selector='#deal', attr='innerHTML', filter_func=get_shop_room_all, is_focus=True),
    Field(fieldname=FieldName.SHOP_INTRO, css_selector='#poi-detail > div.container > div.sub-content.clearfix > div.main > div> div.hotel-info', attr='innerHTML', filter_func=get_shop_intro, is_focus=True),
    Field(fieldname=FieldName.SHOP_STATISTICS, css_selector='#poi-detail > div.container > div.sub-content.clearfix > div.main > div.user-comment-info', attr='innerHTML', filter_func=get_shop_statistics, is_focus=True),
    Field(fieldname=FieldName.SHOP_COMMENT_NUM, css_selector='#comment > div > h2 > a > span.count', regex=r'[^\d]*'),
)

page_shop_1 = Page(name='大众点评酒店店铺列表页面', fieldlist=fl_shop1, listcssselector=ListCssSelector(list_css_selector='#poi-list > div.content-wrap > div > div.list-wrapper > div.content > ul > li',item_start=11,item_end=12), mongodb=Mongodb(db=TravelDriver.db, collection=TravelDriver.shop_collection))

page_shop_2 = Page(name='大众点评酒店店铺详情页面', fieldlist=fl_shop2, tabsetup=TabSetup(click_css_selector='div.hotel-info-ctn > div.hotel-info-main > h2 > a.hotel-name-link'),mongodb=Mongodb(db=TravelDriver.db, collection=TravelDriver.shop_collection), is_save=True)

def get_rate(self, _str):
    return str(int(re.sub('[^\d]*','',_str))/10)

def get_comment_rate_tag(self, _str):
    p = PyQuery(_str)
    tag_list = []
    for i in p('span.item').items():
        tag_list.append(i.text().strip())
    return json.dumps(tag_list, ensure_ascii=False)

def get_comment_content(self, _str):
    return PyQuery(_str).text().replace('收起评论','')
          attr='innerHTML',
          filter_func=get_shop_facilities,
          offset=6,
          try_times=10,
          pause_time=1),
    Field(fieldname=FieldName.SHOP_STATISTICS,
          css_selector='#_j_comment',
          attr='innerHTML',
          filter_func=get_shop_stattistics),
)

page_shop_1 = Page(
    name='马蜂窝酒店店铺列表页面',
    fieldlist=fl_shop1,
    listcssselector=ListCssSelector(list_css_selector='#_j_hotel_list > div'),
    mongodb=Mongodb(db=TravelDriver.db,
                    collection=TravelDriver.shop_collection))

page_shop_2 = Page(name='马蜂窝酒店店铺详情页面',
                   fieldlist=fl_shop2,
                   tabsetup=TabSetup(click_css_selector='div.hotel-pic > a'),
                   mongodb=Mongodb(db=TravelDriver.db,
                                   collection=TravelDriver.shop_collection),
                   is_save=True)


class MafengwoHotelSpider(TravelDriver):
    def get_shop_info(self):
        try:
            shop_data_list = self.from_page_get_data_list(page=page_shop_1)
            self.from_page_add_data_to_data_list(page=page_shop_2,
                                                 data_list=shop_data_list,
    Field(fieldname=FieldName.SHOP_IMG, css_selector=' div > div.flt1 > a > img', attr='src',is_info=True),
    Field(fieldname=FieldName.SHOP_ADDRESS, css_selector='div > div.ct-text > ul > li:nth-child(1) > a',
          is_info=True),

    Field(fieldname=FieldName.SHOP_GRADE,css_selector='',filter_func=get_shop_grade),
    #正则表达式不一样
    Field(fieldname=FieldName.SHOP_COMMENT_NUM,css_selector='div > div.ct-text > ul > li:nth-child(2) > a',filter_func=get_comment_num, is_info=True),

    Field(fieldname=FieldName.SHOP_FEATURE, css_selector='div > div.ct-text > p',is_info=True),
    Field(fieldname=FieldName.SHOP_PRICE,css_selector= '',filter_func=get_shop_price, is_info=True)
)


fl_shop2 = Fieldlist(
)
page_shop_1 = Page(name='马蜂窝景点店铺列表页面', fieldlist=fl_shop1, listcssselector=ListCssSelector(list_css_selector='#_j_search_result_left > div:nth-child(1) > div > ul > li',), mongodb=Mongodb(db=TravelDriver.db, collection=TravelDriver.shop_collection),is_save=True)
page_shop_2 = Page()
page_shop_2 = Page(name='马蜂窝景点店铺详情页面', fieldlist=fl_shop2, tabsetup=TabSetup(click_css_selector='div > div.ct-text > h3 > a'), mongodb=Mongodb(db=TravelDriver.db,collection=TravelDriver.shop_collection))




def get_comment_grade(self,_str):
    return str(_str[-1])
def get_comment_time(self,_str):
    #时间格式统一为2018-12-08

    return _str[0:10]
def get_comment_year(self,_str):
    time = _str[0:10]
    return time[0:4];
    return json.dumps({'grade_list': p(
        'div:nth-child(3) > div.review-filter > ul.ta-list.clearfix > li.taService').text().split('\n'),
                      'comment_num_list': p(
                          'div:nth-child(3) > div.review-filter > ul.filter-list.clearfix').text().split('\n')[1:]},
                     ensure_ascii=False)

fl_shop2 = Fieldlist(
    Field(fieldname=FieldName.SHOP_ADDRESS, css_selector='#hotel-page > div > div.hotel-box.hotel-baseinfo > div.info > div.base > p.address', offset=6, try_times=10, pause_time=5),
    Field(fieldname=FieldName.SHOP_ROOM_RECOMMEND_ALL, css_selector='#J_RoomList', attr='innerHTML', filter_func=get_room_all),
    Field(fieldname=FieldName.SHOP_INTRO, css_selector='#hotel-desc', attr='innerHTML', filter_func=get_shop_intro),
    Field(fieldname=FieldName.SHOP_FACILITIES, css_selector='#hotel-facility', attr='innerHTML', filter_func=get_shop_facility),
    Field(fieldname=FieldName.SHOP_TRAFFIC, css_selector='#rich-map-wrap', attr='innerHTML', filter_func=get_shop_traffic),
    Field(fieldname=FieldName.SHOP_STATISTICS, css_selector='#hotel-review', attr='innerHTML', filter_func=get_shop_statistics),
)

page_shop_1 = Page(name='飞猪酒店店铺列表页面', fieldlist=fl_shop1, listcssselector=ListCssSelector(list_css_selector='#J_List > div'), mongodb=Mongodb(db=TravelDriver.db, collection=TravelDriver.shop_collection))

page_shop_2 = Page(name='飞猪酒店店铺详情页面', fieldlist=fl_shop2, tabsetup=TabSetup(click_css_selector='div > div.row-center > div > h5 > a'),mongodb=Mongodb(db=TravelDriver.db, collection=TravelDriver.shop_collection), is_save=True)

class FliggyHotelSpider(TravelDriver):

    def page_shop_2_func(self):
        try:
            self.move_to_element_by_css_selector(css_selector='#rich-map-wrap > div.J_RichCon > div.tabs > ul')
            self.vertical_scroll_by(offset=-200)
            for i in self.until_presence_of_all_elements_located_by_css_selector(css_selector='#rich-map-wrap > div.J_RichCon > div.tabs > ul > li.J_Tab'):
                i.click()
        except Exception:
            self.error_log(e='找不到元素')
        time.sleep(3)
from selenium.webdriver.remote.webelement import WebElement

from spider.driver.base.tabsetup import TabSetup
from spider.driver.base.field import Field, Fieldlist
from spider.driver.base.page import Page, PageGroup
from spider.driver.base.listcssselector import ListCssSelector
from spider.driver.base.mongodb import Mongodb

fl = Fieldlist(Field(fieldname=12), Field(fieldname=13))
mongo = Mongodb(db='122', collection='12')
lcs = ListCssSelector(list_css_selector=12)
tab = TabSetup(url_name=12)
p = Page(name=122,
         fieldlist=fl,
         mongodb=mongo,
         listcssselector=lcs,
         tabsetup=tab)
p1 = Page(name=123,
          fieldlist=fl,
          mongodb=mongo,
          listcssselector=lcs,
          tabsetup=tab)
pg = PageGroup(p, p1)
print(next(pg))
Beispiel #24
0
    def get_shop_address(self):

        #self.fast_new_page(url="http://www.baidu.com");
        self.fast_new_page(
            url='http://api.map.baidu.com/lbsapi/getpoint/index.html')
        self.fast_click_first_item_same_page_by_partial_link_text(
            link_text='更换城市')
        self.until_scroll_to_center_send_text_by_css_selector(
            css_selector='#selCityInput', text='淳安')
        time.sleep(2)
        self.fast_click_same_page_by_css_selector(
            click_css_selector='#selCityButton')
        time.sleep(2)
        shop_collcetion = Mongodb(db=TravelDriver.db,
                                  collection=TravelDriver.shop_collection,
                                  host='localhost').get_collection()

        shop_name_url_list = list()

        for i in shop_collcetion.find(self.get_data_key()):

            if i.get('shop_url'):
                shop_name_url_list.append(
                    (i.get('shop_name'), i.get('shop_url')))

        for i in range(len(shop_name_url_list)):
            #self.fast_new_page(url='https://www.baidu.com');

            self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0]))

            self.fast_click_first_item_same_page_by_partial_link_text(
                link_text='更换城市')
            self.until_scroll_to_center_send_text_by_css_selector(
                css_selector='#selCityInput', text='淳安')
            time.sleep(2)
            self.fast_click_same_page_by_css_selector(
                click_css_selector='#selCityButton')
            time.sleep(2)

            # while (True):
            #     self.is_ready_by_proxy_ip()
            #     self.switch_window_by_index(index=-1)
            #     self.deal_with_failure_page()
            #     self.fast_new_page(url=shop_name_url_list[i][1])
            #     time.sleep(1)
            #     self.switch_window_by_index(index=-1)  # 页面选择
            #     if '请求数据错误' in self.driver.title:
            #         self.info_log(data='关闭验证页面!!!')
            #         self.close_curr_page()
            #     else:
            #         break
            if ('千岛湖' in shop_name_url_list[i][0]):
                self.until_scroll_to_center_send_text_by_css_selector(
                    css_selector='#localvalue', text=shop_name_url_list[i][0])
            else:
                self.until_scroll_to_center_send_text_by_css_selector(
                    css_selector='#localvalue',
                    text='千岛湖' + shop_name_url_list[i][0])
            time.sleep(2)
            self.fast_click_same_page_by_css_selector(
                click_css_selector='#localsearch')
            time.sleep(2)
            try:
                self.driver.find_element_by_css_selector(
                    css_selector='#no_0 > a').click()
                time.sleep(2)
                # while (True):
                #     self.is_ready_by_proxy_ip()
                #     self.switch_window_by_index(index=-1)
                #     self.deal_with_failure_page()
                #     self.fast_new_page(url=shop_name_url_list[i][1])
                #     time.sleep(1)
                #     self.switch_window_by_index(index=-1)  # 页面选择
                #     if '请求数据错误' in self.driver.title:
                #         self.info_log(data='关闭验证页面!!!')
                #         self.close_curr_page()
                #     else:
                #         break

                data = self.from_fieldlist_get_data(page=page_shop_2)
                self.update_data_to_mongodb(
                    shop_collcetion,
                    self.merge_dict(
                        self.get_data_key(),
                        {FieldName.SHOP_URL: shop_name_url_list[i][1]}), data)
            except Exception:
                print("改地址无经纬度")
Beispiel #25
0
# -*- coding:utf-8 -*-
import os,signal
from .params import *
from .models import Project
from spider.driver.base.field import FieldName
from spider.driver.travel.core.traveldriver import WEBSITE_NAME_LIST,DataSourceName,TravelDriver
from spider.driver.base.mongodb import Mongodb

shops_collection = Mongodb(host=TravelDriver.host, port=TravelDriver.port, db=TravelDriver.db,
                           collection=TravelDriver.shop_collection).get_collection()
comments_collection = Mongodb(host=TravelDriver.host, port=TravelDriver.port, db=TravelDriver.db,
                              collection=TravelDriver.comments_collection).get_collection()

def ProjectStatistics():
    """

    :return:
    """
    project_statistics_data = '\"<ul>'
    comment_count_sum = 0
    predict_comment_count_sum = 0
    for website in WEBSITE_NAME_LIST:
        shop_count = shops_collection.find({FieldName.DATA_WEBSITE:website,FieldName.DATA_SOURCE:DataSourceName.SPOT}).count()
        comment_count = comments_collection.find({FieldName.DATA_WEBSITE:website,FieldName.DATA_SOURCE:DataSourceName.SPOT}).count()
        comment_sum = 0
        for shop in shops_collection.find({FieldName.DATA_WEBSITE:website,FieldName.DATA_SOURCE:DataSourceName.SPOT}):
            comment_sum += shop.get(FieldName.SHOP_COMMENT_NUM)
            predict_comment_count_sum += shop.get(FieldName.SHOP_COMMENT_NUM)
        project_statistics_data += '<li>{}-景点 店铺:{}家,评论预计{}条,当前{}条</li>'.format(website,shop_count,comment_sum,comment_count)
        comment_count_sum += comment_count
        shop_count = shops_collection.find({FieldName.DATA_WEBSITE: website, FieldName.DATA_SOURCE: DataSourceName.HOTEL}).count()
def get_comment_url(self,_str):
    return _str + "/review_all"
fl_shop1 = Fieldlist(
    Field(fieldname=FieldName.SHOP_NAME, css_selector='div.txt > div.tit > a > h4'),
    Field(fieldname=FieldName.SHOP_URL, css_selector='div.txt > div.tit > a', attr='href'),
    Field(fieldname=FieldName.SHOP_COMMENT_NUM, css_selector='div.txt > div.comment > a.review-num'),
    Field(fieldname=FieldName.SHOP_PRICE, css_selector='div.txt > div.comment > a.mean-price'),
    Field(fieldname=FieldName.SHOP_RATE, css_selector='div.txt > div.comment > span', attr='class', regex=r'[^\d]*', filter_func=get_shop_rate),
    Field(fieldname=FieldName.SHOP_ADDRESS, css_selector='div.txt > div.tag-addr > span.addr'),
    Field(fieldname=FieldName.SHOP_IMG,css_selector='div.pic > a > img',is_info=True),
    Field(fieldname=FieldName.SHOP_FEATURE,css_selector='',filter_func=get_shop_feature, is_info=True),
    Field(fieldname=FieldName.SHOP_GRADE,css_selector='div.txt > span > span:nth-child(1) > b',is_info=True),
Field(fieldname=FieldName.SHOP_COMMENT_URL, css_selector='div.txt > div.tit > a', attr='href',filter_func=get_comment_url, is_info=True)
)

page_shop_1 = Page(name='大众点评景点店铺列表页面', fieldlist=fl_shop1, listcssselector=ListCssSelector(list_css_selector='#shop-all-list > ul > li'), mongodb=Mongodb(db=TravelDriver.db, collection=TravelDriver.shop_collection), is_save=True)

def get_shop_time(self, _str):
    try:
        p = PyQuery(_str)
        shop_time = ''
        for i in p('p.info.info-indent').items():
            if '营业时间' in i.text():
                shop_time = i.text()
        return shop_time
    except Exception:
        return ''

def get_shop_promotion(self, _str):
    try:
        p = PyQuery(_str)
    Field(fieldname=FieldName.SHOP_NAME,css_selector=' div > div.h_info > div.h_info_text > div.h_info_base > p.h_info_b1 > a > span.info_cn',attr='innerHTML', is_info=True),

    Field(fieldname=FieldName.SHOP_URL,css_selector='div > div.h_info_text > div.h_info_base > p.h_info_b1 > a',attr='href',is_info=True),
    Field(fieldname=FieldName.SHOP_IMG, css_selector='div.h_info_pic > a > img', attr='big-src',is_info=True),
    #有些问题
    Field(fieldname=FieldName.SHOP_ADDRESS, css_selector='div > div.h_info_text > div.h_info_base > p.h_info_b2',is_info=True),
    Field(fieldname=FieldName.SHOP_PRICE,css_selector='div > div.h_info_text > div.h_info_pri > p:nth-child(1) > a > span.h_pri_num',is_info=True),
    #稍许有些问题
    Field(fieldname=FieldName.SHOP_COMMENT_NUM,css_selector='div > div.h_info_text > div.h_info_comt > a > span.c555.block.mt5'),
    Field(fieldname=FieldName.SHOP_GRADE, css_selector=' div > div.h_info_text > div.h_info_comt > a > span.h_info_comt_bg > i.c37e',is_info=True),
    Field(fieldname=FieldName.SHOP_RATE,css_selector='',filter_func=get_shop_rate, is_info=True),
    Field(fieldname=FieldName.SHOP_FEATURE,css_selector='',filter_func=get_shop_feature, is_info=True)

)
fl_shop2 = Fieldlist()
page_shop_1 = Page(name='艺龙酒店店铺列表页面', fieldlist=fl_shop1, listcssselector=ListCssSelector(list_css_selector='#hotelContainer > div > div'), mongodb=Mongodb(db=TravelDriver.db, collection=TravelDriver.shop_collection),is_save=True)
# page_shop_2 = Page()
#
page_shop_2 = Page(name='艺龙酒店店铺详情页面', fieldlist=fl_shop2, tabsetup=TabSetup(click_css_selector='div > div.h_info_text > div.h_info_base > p.h_info_b1 > a'), mongodb=Mongodb(db=TravelDriver.db,collection=TravelDriver.shop_collection))
fl_comment1 = Fieldlist(
    Field(fieldname=FieldName.COMMENT_USER_NAME, css_selector=' div.cmt_userinfo > div > p.cmt_un',is_info=True),
    Field(fieldname=FieldName.COMMENT_TIME, css_selector='div.cmt_info_mn > div > div.if_hd_r > span.cmt_con_time', is_info=True),
    Field(fieldname=FieldName.SHOP_NAME,
          css_selector='body > div.hdetail_rela_wrap > div > div.hrela_ns_wrap.clearfix > div.hdetail_main.hrela_name > div > h1',
          is_isolated=True, is_info=True),
    Field(fieldname=FieldName.COMMENT_CONTENT, css_selector='div.cmt_info_mn > p.cmt_txt',is_info=True),
    #有问题
    Field(fieldname=FieldName.COMMENT_SCORE, css_selector='div.cmt_info_mn > div > div.if_hd > b',is_info=True),
)

page_comment_1 = Page(name='艺龙酒店评论列表', fieldlist=fl_comment1, listcssselector=ListCssSelector(list_css_selector='#review > ul > li'), mongodb=Mongodb(db=TravelDriver.db, collection=TravelDriver.comments_collection), is_save=True)
    return _str;

fl_shop2 = Fieldlist(

#phoenix_dom_3_0 > div > div.head-wrapper.c-title.c-color.c-flexbox.c-line-bottom > div.left > span
#phoenix_dom_3_1 > div > div.head-wrapper.c-title.c-color.c-flexbox.c-line-bottom > div.left > span
Field(fieldname=FieldName.SHOP_COMMENT_NUM, css_selector='div.card-box.special2-box.c-container >div.head-wrapper.c-title.c-color.c-flexbox.c-line-bottom > div.left > span',is_info=True),
Field(fieldname=FieldName.SHOP_SCORE, css_selector='span.left-header-visit',is_info=True),
Field(fieldname=FieldName.SHOP_CATEGORY_NAME,css_selector='span.left-header-stdtag',is_info=True),
    Field(fieldname=FieldName.SHOP_PRICE,css_selector='span.left-header-reference-price',is_info=True),
    Field(fieldname=FieldName.SHOP_NAME_SEARCH_KEY,css_selector='div.generalHead-left-header-title > span',filter_func=get_shop_name,is_info=True),
Field(fieldname=FieldName.SHOP_PHONE,css_selector='#generalinfo > div.generalInfo-address-telnum > div.generalInfo-telnum.item > span.clampword.generalInfo-telnum-text',is_info=True)
#generalheader > div.generalHead-left-header.animation-common > div.generalHead-left-header-title > span
)
#card-1 > div > ul > li:nth-child(1) > div.cf.mb_5 > div.ml_30.mr_85 > div:nth-child(2)
page_shop_1 = Page(name='百度餐饮店铺列表页面', fieldlist=fl_shop1, listcssselector=ListCssSelector(list_css_selector='ul.poilist > li'), mongodb=Mongodb(db=TravelDriver.db, collection=TravelDriver.shop_collection),is_save=True)

page_shop_2 = Page(name='百度餐饮店铺详情页面', fieldlist=fl_shop2, tabsetup=TabSetup(click_css_selector='div.cf > div.ml_30 > div:nth-child(1) > span > a'),  mongodb=Mongodb(db=TravelDriver.db, collection=TravelDriver.shop_collection),is_save=True)


def get_shop_lng(self,_str):

    doc = _str.split(',')
    #再某一个经度范围内

    if(float(doc[0]) <= 119.243071 and float(doc[0]) >= 118.650908):

     return  doc[0]
    else:

     return 119.051491
# -*- coding:utf-8 -*-
from spider.driver.base.field import *
from spider.driver.travel.core.traveldriver import WebsiteName, DataSourceName, TravelDriver
from spider.driver.base.mongodb import Mongodb

shops = Mongodb(db=TravelDriver.db,
                collection=TravelDriver.shop_collection,
                host='10.1.17.15').get_collection()
comments = Mongodb(
    db=TravelDriver.db,
    collection=TravelDriver.comments_collection).get_collection()
key = {
    FieldName.DATA_SOURCE: DataSourceName.HOTEL,
    FieldName.DATA_WEBSITE: WebsiteName.QUNAR,
    FieldName.DATA_REGION: '千岛湖',
}
shop_name_list = []
for i in shops.find(key):
    shop_name_list.append(i.get(FieldName.SHOP_NAME))
print(len(shop_name_list))
print(len(set(shop_name_list)))
Beispiel #30
0
#\33 6822720 > div:nth-child(1) > div
    Field(fieldname=FieldName.SHOP_IMG, css_selector='a > div.img-container.lazy-img-box.fl > img', attr='src', is_info=True),
    Field(fieldname=FieldName.SHOP_ADDRESS, css_selector= '',filter_func=get_shop_address, is_info=True),
    #这里应该做一个转换
#\34 187 > div:nth-child(2) > div:nth-child(3) > div > div:nth-child(2) > span:nth-child(1)
    Field(fieldname=FieldName.SHOP_GRADE,css_selector='',filter_func=get_shop_grade, is_info=True),
    #正则表达式的使用有问题
    Field(fieldname=FieldName.SHOP_COMMENT_NUM,css_selector='a > div.search-scenic-content > div.search-scenic-wrapper > div.search-scenic-detail > p',is_info=True),
    #无shop_feature

    Field(fieldname=FieldName.SHOP_FEATURE, css_selector='',filter_func=get_shop_feature, is_info=True),

    Field(fieldname=FieldName.SHOP_RATE,css_selector='',filter_func=get_shop_rate, is_info=True),
Field(fieldname=FieldName.SHOP_COMMENT_URL,css_selector='a',attr='href',filter_func=get_shop_comment_url, is_info=True)
)
page_shop_1 = Page(name='途牛景点店铺列表页面', fieldlist=fl_shop1, listcssselector=ListCssSelector(list_css_selector='#search-container > section > div > ul > li'), mongodb=Mongodb(db=TravelDriver.db, collection=TravelDriver.shop_collection), is_save=True)
fl_shop2 = Fieldlist(
    Field(fieldname=FieldName.SHOP_NAME,
          css_selector='#main-page > div.mp-main > div.mp-headfigure > div.mp-headfeagure-info > div'),
    Field(fieldname=FieldName.SHOP_COMMENT_URL,
          css_selector='#main-page > div.mp-main > div.mp-baseinfo > div.mpg-flexbox.mp-flex-card > div:nth-child(1) > a',
          attr='href', is_info=True)
)

page_shop_2 = Page(name='途牛景点店铺详情页面', fieldlist=fl_shop2)

def get_comment_user_name(self,_str):
    comment_user_name = _str.split(' ')[0];
    return comment_user_name;