def get_shop_comment(self):
        self.fast_new_page(url='http://www.baidu.com')
        shop_collcetion = Mongodb(db=TravelDriver.db,
                                  collection=TravelDriver.shop_collection,
                                  host='10.1.17.15').get_collection()
        shop_name_url_list = list()
        for i in shop_collcetion.find(self.get_data_key()):
            if i.get('shop_url'):
                shop_name_url_list.append(
                    (i.get('shop_name'), i.get('shop_url')))
        for i in range(len(shop_name_url_list)):

            self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0]))
            #第一次打开的时候进行验证 后面都不需要

            while (True):
                self.is_ready_by_proxy_ip()
                self.switch_window_by_index(index=-1)
                self.deal_with_failure_page()
                self.fast_new_page(url=shop_name_url_list[i][1])
                time.sleep(1)
                self.switch_window_by_index(index=-1)  # 页面选择
                if '验证中心' in self.driver.title:
                    self.info_log(data='关闭验证页面!!!')
                    self.close_curr_page()
                else:

                    break
            self.until_click_no_next_page_by_css_selector(
                nextpagesetup=NextPageCssSelectorSetup(
                    css_selector='#remark_page > a.page-next',
                    stop_css_selector='#remark_page > a.page-next.hidden',
                    main_pagefunc=PageFunc(func=self.from_page_get_data_list,
                                           page=page_comment_1)))
            self.close_curr_page()
Example #2
0
    def get_comment_info_list(self):
        #打开知道
        self.fast_new_page('http://www.baidu.com')
        shop_collcetion = Mongodb(db=TravelDriver.db,
                                  collection=TravelDriver.shop_collection,
                                  host='localhost').get_collection()
        shop_name_url_list = list()
        for i in shop_collcetion.find(self.get_data_key()):
            if i.get('shop_comment_url'):
                shop_name_url_list.append(
                    (i.get('shop_name'), i.get('shop_comment_url')))

        for i in range(len(shop_name_url_list)):
            #可能会有反爬
            self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0]))
            self.fast_new_page('http://www.baidu.com')
            self.fast_new_page(url=shop_name_url_list[i][1])
            self.shop_name = shop_name_url_list[i][0]
            comment_data_list = self.from_page_get_data_list(
                page=page_comment_1)
            # for j in range(0,len(comment_data_list)):
            #     comment_data_list[j]['shop_name'] = shop_name_url_list[i][0]
            #     self.save_data_to_mongodb(fieldlist= fl_comment1, mongodb=Mongodb(db=TravelDriver.db, collection=TravelDriver.comments_collection), data=comment_data_list[j])
            # print(comment_data_list)
            self.close_curr_page()
Example #3
0
    def get_comment_info_list(self):
        self.fast_new_page(url='http://www.baidu.com')
        shop_collcetion = Mongodb(db=TravelDriver.db,
                                  collection=TravelDriver.shop_collection,
                                  host='localhost').get_collection()
        shop_name_url_list = list()
        for i in shop_collcetion.find(self.get_data_key()):
            if i.get('shop_url'):
                shop_name_url_list.append(
                    (i.get('shop_name'), i.get('shop_comment_url')))

        for i in range(len(shop_name_url_list)):
            self.fast_new_page(url="https://www.baidu.com")
            # 可能会有反爬
            self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0]))
            self.fast_new_page(url=shop_name_url_list[i][1])

            self.shop_name = shop_name_url_list[i][0]

            time.sleep(5)

            self.until_click_no_next_page_by_partial_link_text(
                nextpagesetup=NextPageLinkTextSetup(
                    link_text='下一页',
                    main_pagefunc=PageFunc(func=self.from_page_get_data_list,
                                           page=page_comment_1),
                    pause_time=2))
            self.close_curr_page()
    def get_shop_des(self):
        self.fast_new_page(url="http://www.baidu.com")
        shop_collcetion = Mongodb(db=TravelDriver.db,
                                  collection=TravelDriver.shop_collection,
                                  host='localhost').get_collection()
        shop_name_url_list = list()
        for i in shop_collcetion.find(self.get_data_key()):
            # if i.get('shop_url') and (i.get('shop_flag') == None or i.get("shop_flag") == "0" or i.get("shop_flag") == ""
            #
            # or( i.get("shop_comment_num") == 0 and i.get("shop_score") > 0)
            # ):

            shop_name_url_list.append((i.get('shop_name'), i.get('shop_url')))
        for i in range(len(shop_name_url_list)):
            # self.fast_new_page(url='https://www.baidu.com');

            self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0]))
            self.fast_new_page(url=shop_name_url_list[i][1],
                               is_scroll_to_bottom=True)
            time.sleep(5)
            data = self.from_fieldlist_get_data(page=detail_shop_2)
            self.update_data_to_mongodb(
                shop_collcetion,
                self.merge_dict(
                    self.get_data_key(),
                    {FieldName.SHOP_URL: shop_name_url_list[i][1]}), data)
            self.close_curr_page()
Example #5
0
 def get_comment_list(self):
     self.fast_new_page(url="http://www.baidu.com")
     shop_collcetion = Mongodb(db=TravelDriver.db,
                               collection=TravelDriver.shop_collection,
                               host='localhost').get_collection()
     shop_name_url_list = list()
     for i in shop_collcetion.find(self.get_data_key()):
         if i.get('shop_url'):
             shop_name_url_list.append(
                 (i.get('shop_name'), i.get('shop_url')))
     for i in range(len(shop_name_url_list)):
         self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0]))
         self.shop_name = shop_name_url_list[i][0]
         self.fast_new_page("http://www.baidu.com")
         self.fast_new_page(url=shop_name_url_list[i][1])
         self.until_click_no_next_page_by_css_selector(
             nextpagesetup=NextPageCssSelectorSetup(
                 css_selector=
                 '#allCmtComment > div.paging.orangestyle > div > a.nextpage',
                 stop_css_selector=
                 '#allCmtComment > div.paging.orangestyle > div > a.nextpage.hidden',
                 main_pagefunc=PageFunc(func=self.from_page_get_data_list,
                                        page=page_comment_1),
                 pause_time=2))
         self.close_curr_page()
Example #6
0
 def get_shop_comment(self):
     shop_collcetion = Mongodb(db=TravelDriver.db,
                               collection=TravelDriver.shop_collection,
                               host='10.1.17.15').get_collection()
     shop_name_url_list = list()
     for i in shop_collcetion.find(self.get_data_key()):
         if i.get('shop_comment_url'):
             shop_name_url_list.append(
                 (i.get('shop_name'), i.get('shop_comment_url')))
     for i in range(len(shop_name_url_list)):
         self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0]))
         while (True):
             self.is_ready_by_proxy_ip()
             self.switch_window_by_index(index=-1)
             self.deal_with_failure_page()
             self.fast_new_page(url=shop_name_url_list[i][1])
             time.sleep(1)
             self.switch_window_by_index(index=-1)  # 页面选择
             if '验证中心' in self.driver.title:
                 self.info_log(data='关闭验证页面!!!')
                 self.close_curr_page()
             else:
                 break
         self.until_click_no_next_page_by_partial_link_text(
             nextpagesetup=NextPageLinkTextSetup(
                 link_text='下一页',
                 main_pagefunc=PageFunc(func=self.from_page_get_data_list,
                                        page=page_comment_1)))
         self.close_curr_page()
Example #7
0
 def get_shop_detail(self):
     shop_collcetion = Mongodb(db=TravelDriver.db,
                               collection=TravelDriver.shop_collection,
                               host='10.1.17.15').get_collection()
     shop_url_set = set()
     for i in shop_collcetion.find(self.get_data_key()):
         shop_url_set.add(i.get(FieldName.SHOP_URL))
     count = 0
     for url in shop_url_set:
         print(count)
         count += 1
         while (True):
             self.is_ready_by_proxy_ip()
             self.switch_window_by_index(index=-1)
             self.deal_with_failure_page()
             self.fast_new_page(url=url)
             time.sleep(1)
             self.switch_window_by_index(index=-1)  # 页面选择
             if '验证中心' in self.driver.title:
                 self.info_log(data='关闭验证页面!!!')
                 self.close_curr_page()
             else:
                 break
         data = self.from_fieldlist_get_data(page=page_shop_2)
         self.update_data_to_mongodb(
             shop_collcetion,
             self.merge_dict(self.get_data_key(),
                             {FieldName.SHOP_URL: url}), data)
         self.close_curr_page()
    def get_comment_info_list(self):
        shop_collcetion = Mongodb(db=TravelDriver.db,
                                  collection=TravelDriver.shop_collection,
                                  host='localhost').get_collection()
        shop_name_url_list = list()
        for i in shop_collcetion.find(self.get_data_key()):
            if i.get('shop_comment_url'):
                shop_name_url_list.append(
                    (i.get('shop_name'), i.get('shop_comment_url')))

        for i in range(len(shop_name_url_list)):
            # 可能会有反爬
            self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0]))
            self.shop_name = shop_name_url_list[i][0]
            self.fast_new_page(url=shop_name_url_list[i][1])
            time.sleep(5)

            # main-page > header > h2 > div:nth-child(2)
            try:
                #查看是否有顶部按钮 有就点击
                dianping = self.driver.find_element_by_css_selector(
                    css_selector='#main-page > header > h2 > div:nth-child(2)')
                self.fast_click_same_page_by_css_selector(
                    click_css_selector=
                    '#main-page > header > h2 > div:nth-child(2)')

                time.sleep(6)
            except Exception as e:
                print(111)
            #点击最新的
            try:
                new = self.driver.find_element_by_xpath(
                    '//li[@data-tagtype="44"]')
                ActionChains(self.driver).click(new).perform()

                time.sleep(5)
            except Exception as e:
                print(222)

            #向下进行滚动
            try:
                button = self.driver.find_element_by_css_selector(
                    css_selector=
                    '#main-page > div.mp-comment-mpcon > div.mp-addcomment.mp-border-top > a > div'
                )

                Action = TouchActions(self.driver)
                Action.scroll_from_element(on_element=button,
                                           xoffset=0,
                                           yoffset=int(8000)).perform()
                time.sleep(5)
            except Exception as e:
                print(333)
            self.fast_click_same_page_by_css_selector(
                click_css_selector='#main-page > div.mp-gotop > div')
            time.sleep(6)
            comment_data_list = self.from_page_get_data_list(
                page=page_comment_1)
Example #9
0
    def get_shop_detail(self):
        shop_collcetion = Mongodb(db=TravelDriver.db,
                                  collection=TravelDriver.shop_collection,
                                  host='localhost').get_collection()

        shop_url_set = set()
        for i in shop_collcetion.find(self.get_data_key()):
            shop_url_set.add(i.get(FieldName.SHOP_URL))

        for url in shop_url_set:
            self.fast_new_page(url=url)
            time.sleep(5)
            data = self.from_fieldlist_get_data(page=page_shop_2)
            self.update_data_to_mongodb(
                shop_collcetion,
                self.merge_dict(self.get_data_key(),
                                {FieldName.SHOP_URL: url}), data)
            self.close_curr_page()
    def get_comment_info_list(self):
        shop_collcetion = Mongodb(db=TravelDriver.db,
                                  collection=TravelDriver.shop_collection,
                                  host='localhost').get_collection()
        shop_name_url_list = list()
        for i in shop_collcetion.find(self.get_data_key()):
            if i.get('shop_comment_url'):
                shop_name_url_list.append(
                    (i.get('shop_name'), i.get('shop_comment_url')))

        for i in range(len(shop_name_url_list)):
            # 可能会有反爬
            self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0]))
            self.fast_new_page(url=shop_name_url_list[i][1])
            time.sleep(5)

            comment_data_list = self.from_page_get_data_list(
                page=page_comment_1)
            self.close_curr_page()
Example #11
0
    def get_comment_info_list(self):
       shop_collcetion = Mongodb(db=TravelDriver.db, collection=TravelDriver.shop_collection,
                                 host='localhost').get_collection()
       shop_name_url_list = list()
       for i in shop_collcetion.find(self.get_data_key()):
           if i.get('shop_url'):
               shop_name_url_list.append((i.get('shop_name'), i.get('shop_url')))

       for i in range(len(shop_name_url_list)):
           self.fast_new_page(url="https://www.baidu.com");
           # 可能会有反爬
           self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0]))
           self.fast_new_page(url=shop_name_url_list[i][1])
           self.shop_name = shop_name_url_list[i][0];
           time.sleep(5)
           self.driver.refresh()
           self.fast_click_same_page_by_css_selector(click_css_selector='#detail-placeholder > div.main > div.tabs-box > div:nth-child(1) > ul > li:nth-child(3)')

           comment_data_list = self.from_page_get_data_list(page=page_comment_1)
           self.close_curr_page()
Example #12
0
    def get_shop_comment(self):
        shop_collcetion = Mongodb(db=TravelDriver.db,
                                  collection=TravelDriver.shop_collection,
                                  host='10.1.17.15').get_collection()
        shop_name_url_list = list()
        for i in shop_collcetion.find(self.get_data_key()):
            if i.get('shop_comment_url'):
                shop_name_url_list.append(
                    (i.get('shop_name'), i.get('shop_comment_url')))
        # for i in range(len(shop_name_url_list)):
        #     self.info_log(data='第%s个,%s'%(i+1, shop_name_url_list[i][0]))
        # while (True):
        #     self.is_ready_by_proxy_ip()
        #     self.switch_window_by_index(index=-1)
        #     self.deal_with_failure_page()
        #     self.fast_new_page(url=shop_name_url_list[i][1])
        #     time.sleep(1)
        #     self.switch_window_by_index(index=-1)  # 页面选择
        #     if '验证中心' in self.driver.title:
        #         self.info_log(data='关闭验证页面!!!')
        #         self.close_curr_page()
        #     else:
        #         break
        # dianping = self.driver.find_element_by_css_selector(
        #     css_selector='#review-list > div.review-list-container > div.review-list-main > div.reviews-wrapper > div.reviews-tags > div.content')
        # self.fast_click_same_page_by_css_selector(click_css_selector='#main-page > header > h2 > div:nth-child(2)')
            self.fast_new_page(
                url='http://www.dianping.com/shop/110833569/review_all')
            span = '#review-list > div.review-list-container > div.review-list-main > div.reviews-wrapper > div.reviews-tags > div.content > span:nth-child(' + str(
                1) + ')'
            links = self.driver.find_elements_by_css_selector(
                span)[0].get_attribute('text')

            self.until_click_no_next_page_by_partial_link_text(
                nextpagesetup=NextPageLinkTextSetup(
                    link_text='下一页',
                    main_pagefunc=PageFunc(func=self.from_page_get_data_list,
                                           page=page_comment_1)))
            self.close_curr_page()
    def get_shop_comment(self):
        self.fast_new_page(url='http://www.baidu.com');
        shop_collcetion = Mongodb(db=TravelDriver.db, collection=TravelDriver.shop_collection,
                                 ).get_collection()
        shop_name_url_list = list()
        for i in shop_collcetion.find(self.get_data_key()):
            if i.get('shop_comment_url'):
                shop_name_url_list.append((i.get('shop_name'),i.get('shop_comment_url')))
        for i in range(len(shop_name_url_list)):

            self.info_log(data='第%s个,%s'%(i+1, shop_name_url_list[i][0]))

            self.fast_new_page(url=shop_name_url_list[i][1],is_scroll_to_bottom=False)
            time.sleep(3)
            self.driver.find_element_by_link_text(link_text='默认排序').click();
            time.sleep(2)
            self.driver.find_element_by_link_text(link_text='最新点评').click();
            time.sleep(5)
            # while (True):
            #         self.is_ready_by_proxy_ip()
            #         self.switch_window_by_index(index=-1)
            #         self.deal_with_failure_page()
            #         self.fast_new_page(url=shop_name_url_list[i][1])
            #         time.sleep(1)
            #         self.switch_window_by_index(index=-1)  # 页面选择
            #         if '验证中心' in self.driver.title:
            #               self.info_log(data='关闭验证页面!!!')
            #               self.close_curr_page()
            #         else:
            #           break

            self.until_click_no_next_page_by_css_selector(nextpagesetup=NextPageCssSelectorSetup(
                css_selector='#review-list > div.review-list-container > div.review-list-main > div.reviews-wrapper > div.bottom-area.clearfix > div > a.NextPage',
                stop_css_selector='#review-list > div.review-list-container > div.review-list-main > div.reviews-wrapper > div.bottom-area.clearfix > div > a.NextPage.hidden',
                main_pagefunc=PageFunc(
                    func=self.from_page_get_data_list,
                    page=page_comment_1), pause_time=5))
            self.close_curr_page()
    def get_comment_list(self):
        self.fast_new_page(url="http://www.baidu.com")
        shop_collcetion = Mongodb(db=TravelDriver.db,
                                  collection=TravelDriver.shop_collection,
                                  host='localhost').get_collection()
        shop_name_url_list = list()
        for i in shop_collcetion.find(self.get_data_key()):
            if i.get('shop_url'):
                shop_name_url_list.append(
                    (i.get('shop_name'), i.get('shop_url')))
        for i in range(len(shop_name_url_list)):
            self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0]))
            # while (True):
            #     self.is_ready_by_proxy_ip()
            #     self.switch_window_by_index(index=-1)
            #     self.deal_with_failure_page()
            #     self.fast_new_page(url=shop_name_url_list[i][1])
            #     time.sleep(1)
            #     self.switch_window_by_index(index=-1)  # 页面选择
            #     if '请求数据错误' in self.driver.title:
            #         self.info_log(data='关闭验证页面!!!')
            #         self.close_curr_page()
            #     else:
            #         break

            self.fast_new_page(url=shop_name_url_list[i][1])
            self.until_click_no_next_page_by_css_selector(
                nextpagesetup=NextPageCssSelectorSetup(
                    css_selector=
                    '#allCmtComment > div.paging.orangestyle > div > a.nextpage',
                    stop_css_selector=
                    '#allCmtComment > div.paging.orangestyle > div > a.nextpage.hidden',
                    main_pagefunc=PageFunc(func=self.from_page_get_data_list,
                                           page=page_comment_1),
                    pause_time=5))
            self.close_curr_page()
    def get_shop_detial(self):
        self.fast_new_page(url="http://www.baidu.com")
        shop_collcetion = Mongodb(db=TravelDriver.db,
                                  collection=TravelDriver.shop_collection,
                                  host='localhost').get_collection()
        shop_name_url_list = list()
        for i in shop_collcetion.find(self.get_data_key()):
            if i.get('shop_url'):
                shop_name_url_list.append(
                    (i.get('shop_name'), i.get('shop_url')))
        for i in range(len(shop_name_url_list)):
            self.fast_new_page(url='http://www.baidu.com')
            self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0]))
            # while (True):
            #     self.is_ready_by_proxy_ip()
            #     self.switch_window_by_index(index=-1)
            #     self.deal_with_failure_page()
            #     self.fast_new_page(url=shop_name_url_list[i][1])
            #     time.sleep(1)
            #     self.switch_window_by_index(index=-1)  # 页面选择
            #     if '请求数据错误' in self.driver.title:
            #         self.info_log(data='关闭验证页面!!!')
            #         self.close_curr_page()
            #     else:
            #         break

            self.fast_new_page(url=shop_name_url_list[i][1])
            self.fast_click_first_item_same_page_by_partial_link_text(
                link_text='只看文字')
            data = self.from_fieldlist_get_data(page=page_shop_2)
            self.update_data_to_mongodb(
                shop_collcetion,
                self.merge_dict(
                    self.get_data_key(),
                    {FieldName.SHOP_URL: shop_name_url_list[i][1]}), data)
            self.close_curr_page()
Example #16
0
def CommentsResults(request):
    """
    从数据库获取评论结果
    :param request:
    :return:
    """
    id = request.GET['id']
    shop_name = request.GET[FieldName.SHOP_NAME]
    project = Project.objects.get(id=id)
    comments = Mongodb(
        host=TravelDriver.host,
        port=TravelDriver.port,
        db=TravelDriver.db,
        collection=TravelDriver.comments_collection).get_collection()
    thead_list = list()
    tbody_list = list()
    comments_data_list = list(
        comments.find({
            FieldName.DATA_WEBSITE: str(project.data_website),
            FieldName.DATA_REGION: str(project.data_region),
            FieldName.DATA_SOURCE: str(project.data_source),
            FieldName.SHOP_NAME: shop_name,
        }))
    for comment_data in comments_data_list:
        comment_data.pop(FieldName.ID_)
        comment_data.pop(FieldName.DATA_WEBSITE)
        comment_data.pop(FieldName.DATA_REGION)
        comment_data.pop(FieldName.DATA_SOURCE)
        comment_data.pop(FieldName.SHOP_NAME)
        thead_list.extend(comment_data.keys())
    thead_list = list(set(thead_list))
    for thead in thead_list:
        if 'shop' in thead:
            thead_list.remove(thead)
    for comment_data in comments_data_list:
        td_list = list()
        for key in thead_list:
            if key not in comment_data:
                td_list.append(['nonexistent', 'nonexistent'])
            else:
                if not comment_data.get(key):
                    if FIELD_NAME_TYPE.get(
                            key) == FieldType.FLOAT or FIELD_NAME_TYPE.get(
                                key) == FieldType.INT:
                        td_list.append(['0', '0'])
                    else:
                        td_list.append(['null', 'null'])
                else:
                    value1 = comment_data.get(key)
                    value = value1
                    if isinstance(comment_data.get(key), list):  #如果是照片列表
                        value = json.dumps(value)[:10]
                    try:
                        int(value)
                    except:
                        if len(value) > 10:
                            value = value[0:10] + u'...'
                    td_list.append([value, value1])
        tbody_list.append(td_list)
    thead_chinese_list = list()
    for thead in thead_list:
        thead_chinese_list.append(FIELD_NAME_ZH.get(thead))
    return render(request,
                  'spider/commentresults.html',
                  context={
                      'thead_list': thead_chinese_list,
                      'tbody_list': tbody_list
                  })
Example #17
0
def ShopResults(request):
    """
    从数据库获取店铺结果
    :param request:
    :return:
    """
    id = request.GET['id']
    project = Project.objects.get(id=id)
    shops = Mongodb(host=TravelDriver.host,
                    port=TravelDriver.port,
                    db=TravelDriver.db,
                    collection=TravelDriver.shop_collection).get_collection()
    thead_list = list()
    tbody_list = list()
    shops_data_list = list(
        shops.find({
            FieldName.DATA_WEBSITE: str(project.data_website),
            FieldName.DATA_REGION: str(project.data_region),
            FieldName.DATA_SOURCE: str(project.data_source),
        }))
    for shop_data in shops_data_list:
        shop_data.pop(FieldName.ID_)
        shop_data.pop(FieldName.DATA_WEBSITE)
        shop_data.pop(FieldName.DATA_REGION)
        shop_data.pop(FieldName.DATA_SOURCE)
        thead_list.extend(shop_data.keys())
    thead_list = list(set(thead_list))
    if FieldName.SHOP_NAME in thead_list:
        thead_list.remove(FieldName.SHOP_NAME)
        thead_list.insert(0, FieldName.SHOP_NAME)
    if FieldName.SHOP_COMMENT_NUM in thead_list:
        thead_list.remove(FieldName.SHOP_COMMENT_NUM)
        thead_list.insert(1, FieldName.SHOP_COMMENT_NUM)
    thead_tuple_list = list()  #第一个中文,第二个是英文的元组列表
    for thead in thead_list:
        thead_tuple_list.append((FIELD_NAME_ZH.get(thead), thead))
    for shop_data in shops_data_list:
        td_list = [{
            FieldName.SHOP_NAME: shop_data.get(FieldName.SHOP_NAME),
            'id': id
        }]
        for key in thead_list[1:]:
            if key not in shop_data:
                td_list.append(['nonexistent', 'nonexistent'])
            else:
                if not shop_data.get(key):
                    if FIELD_NAME_TYPE.get(
                            key) == FieldType.FLOAT or FIELD_NAME_TYPE.get(
                                key) == FieldType.INT:
                        td_list.append(['0', '0'])
                    else:
                        td_list.append(['null', 'null'])
                else:
                    value_complete = shop_data.get(key)  #未做修改的value
                    value = value_complete
                    if isinstance(shop_data.get(key),
                                  list):  #如果是照片列表,就把照片列表转换成unicode字符串
                        value = json.dumps(value)[:10]
                    try:
                        int(value)  #如果是一个数字字符串则不进行字符串的切割检测
                    except:
                        if len(value) > 10:
                            value = value[0:10] + u'...'
                    td_list.append([value, value_complete])
        tbody_list.append(td_list)
    return render(request,
                  'spider/shopresults.html',
                  context={
                      'thead_tuple_list': thead_tuple_list,
                      'tbody_list': tbody_list,
                  })
Example #18
0
    def get_shop_address(self):

        #self.fast_new_page(url="http://www.baidu.com");
        self.fast_new_page(
            url='http://api.map.baidu.com/lbsapi/getpoint/index.html')
        self.fast_click_first_item_same_page_by_partial_link_text(
            link_text='更换城市')
        self.until_scroll_to_center_send_text_by_css_selector(
            css_selector='#selCityInput', text='淳安')
        time.sleep(2)
        self.fast_click_same_page_by_css_selector(
            click_css_selector='#selCityButton')
        time.sleep(2)
        shop_collcetion = Mongodb(db=TravelDriver.db,
                                  collection=TravelDriver.shop_collection,
                                  host='localhost').get_collection()

        shop_name_url_list = list()

        for i in shop_collcetion.find(self.get_data_key()):

            if i.get('shop_url'):
                shop_name_url_list.append(
                    (i.get('shop_name'), i.get('shop_url')))

        for i in range(len(shop_name_url_list)):
            #self.fast_new_page(url='https://www.baidu.com');

            self.info_log(data='第%s个,%s' % (i + 1, shop_name_url_list[i][0]))

            self.fast_click_first_item_same_page_by_partial_link_text(
                link_text='更换城市')
            self.until_scroll_to_center_send_text_by_css_selector(
                css_selector='#selCityInput', text='淳安')
            time.sleep(2)
            self.fast_click_same_page_by_css_selector(
                click_css_selector='#selCityButton')
            time.sleep(2)

            # while (True):
            #     self.is_ready_by_proxy_ip()
            #     self.switch_window_by_index(index=-1)
            #     self.deal_with_failure_page()
            #     self.fast_new_page(url=shop_name_url_list[i][1])
            #     time.sleep(1)
            #     self.switch_window_by_index(index=-1)  # 页面选择
            #     if '请求数据错误' in self.driver.title:
            #         self.info_log(data='关闭验证页面!!!')
            #         self.close_curr_page()
            #     else:
            #         break
            if ('千岛湖' in shop_name_url_list[i][0]):
                self.until_scroll_to_center_send_text_by_css_selector(
                    css_selector='#localvalue', text=shop_name_url_list[i][0])
            else:
                self.until_scroll_to_center_send_text_by_css_selector(
                    css_selector='#localvalue',
                    text='千岛湖' + shop_name_url_list[i][0])
            time.sleep(2)
            self.fast_click_same_page_by_css_selector(
                click_css_selector='#localsearch')
            time.sleep(2)
            try:
                self.driver.find_element_by_css_selector(
                    css_selector='#no_0 > a').click()
                time.sleep(2)
                # while (True):
                #     self.is_ready_by_proxy_ip()
                #     self.switch_window_by_index(index=-1)
                #     self.deal_with_failure_page()
                #     self.fast_new_page(url=shop_name_url_list[i][1])
                #     time.sleep(1)
                #     self.switch_window_by_index(index=-1)  # 页面选择
                #     if '请求数据错误' in self.driver.title:
                #         self.info_log(data='关闭验证页面!!!')
                #         self.close_curr_page()
                #     else:
                #         break

                data = self.from_fieldlist_get_data(page=page_shop_2)
                self.update_data_to_mongodb(
                    shop_collcetion,
                    self.merge_dict(
                        self.get_data_key(),
                        {FieldName.SHOP_URL: shop_name_url_list[i][1]}), data)
            except Exception:
                print("改地址无经纬度")
Example #19
0
from spider.driver.base.field import *
from spider.driver.travel.core.traveldriver import WebsiteName, DataSourceName, TravelDriver
from spider.driver.base.mongodb import Mongodb

shops = Mongodb(db=TravelDriver.db,
                collection=TravelDriver.shop_collection,
                host='127.0.0.1').get_collection()
comments = Mongodb(
    db=TravelDriver.db,
    collection=TravelDriver.comments_collection).get_collection()
key = {
    FieldName.DATA_SOURCE: DataSourceName.HOTEL,
    FieldName.DATA_WEBSITE: WebsiteName.XIECHENG,
    FieldName.DATA_REGION: '千岛湖',
}
for i in shops.find(key, {'shop_url': 1}):
    print(i)
Example #20
0
# -*- coding:utf-8 -*-
from spider.driver.base.field import *
from spider.driver.travel.core.traveldriver import WebsiteName, DataSourceName, TravelDriver
from spider.driver.base.mongodb import Mongodb

shops = Mongodb(db=TravelDriver.db,
                collection=TravelDriver.shop_collection,
                host='10.1.17.15').get_collection()
comments = Mongodb(
    db=TravelDriver.db,
    collection=TravelDriver.comments_collection).get_collection()
key = {
    FieldName.DATA_SOURCE: DataSourceName.HOTEL,
    FieldName.DATA_WEBSITE: WebsiteName.QUNAR,
    FieldName.DATA_REGION: '千岛湖',
}
shop_name_list = []
for i in shops.find(key):
    shop_name_list.append(i.get(FieldName.SHOP_NAME))
print(len(shop_name_list))
print(len(set(shop_name_list)))