Beispiel #1
0
    def search(self, fans_driver):
        print(self.visited_url)
        try:

            if fans_driver.current_url in self.visited_url:
                print fans_driver.current_url
                print('存在')
                return
            self.visited_url.add(fans_driver.current_url)
            address = FriendHomePage.address(fans_driver)
            username = FriendHomePage.user_name(fans_driver)
            if not '江苏' in address[0]:
                print('不在江苏')
                print(address[0])
                print(fans_driver.current_url)
                return
            if self.focus_check(fans_driver, address):
                FriendHomePage.focus_friend_action(fans_driver)
            fans_url = []
            while self.last_weibo_in_time:
                fans_url.extend(self.weibo_list(fans_driver, username))
                try:
                    WeiboPageCommon.next_page_action(fans_driver)
                except Exception as e:
                    print(e)
                    break

            print ''
            #搜索关注列表
            FriendHomePage.focus_action(fans_driver)
            focus_fans_list = FriendFoucsPage.fans_list_parsed(fans_driver)
            #搜索粉丝列表
            FriendFoucsPage.fans_action(fans_driver)
            fans_list = FriendFoucsPage.fans_list_parsed(fans_driver)
            focus_fans_list.extend(fans_list)
            url_list = []
            for fans in focus_fans_list:
                if '江苏' in fans.address[0]:
                    url_list.append(fans.href)

            fans_url.extend(url_list)
            fans_url = set(fans_url)
            self.un_visited_url = fans_url
            with open(self.un_visited_url_file_name, 'w') as file:
                pickle.dump(self.un_visited_url, file)
                print '保存未访问url'
            for url in fans_url:
                fans_driver.get(url)
                fans_driver.implicitly_wait(10)
                self.search(fans_driver)
                self.visited_url.add(url)
                self.un_visited_url.remove(url)
            with open(self.visited_url_file_name, 'w') as file:
                pickle.dump(self.visited_url, file)
                print '保存访问url'

        except Exception as e:
            print e
            return