def __open_browser(self): self.browser = webdriver.Chrome(chrome_options=self.options) self.browser.set_window_size(1800, 1000) sleep_time(1) self.browser.get('https://weibo.com/') sleep_time(1) log.info('打开浏览器,准备登陆')
def __clear_info(self): sleep_time(1) self.browser.find_element_by_xpath( './/div[@class="W_login_form"]/div[contains(@class,"username")]//input' ).clear() self.browser.find_element_by_xpath( './/div[@class="W_login_form"]/div[contains(@class,"password")]//input' ).clear()
def __has_verify_code(self): sleep_time(1) html = etree.HTML(self.browser.page_source) res = html.xpath('.//div[@node-type="verifycode_box"]/@style')[0] if res == 'display: none;': log.info('没有验证码') return False else: log.info('有验证码') return True
def __is_succeed(self): sleep_time(2) try: self.browser.find_element_by_xpath( './/div[@class="gn_nav"]/ul/li[5]') log.info('登录成功!') return True except: log.info('登录失败') return False
def is_into_recommend(self): sleep_time(2) try: self.browser.find_element_by_xpath( '/html/body/div/div[1]/div/div/div[2]/div[1]/a').click() log.info('进入了推荐页') return True except: log.info('没有进入推荐页') return False
def follow(self, follow_page): """ 直接打开被关注的人的主页进行关注 :param follow_page: 被关注人的主页链接 :return: """ sleep_time(2) self.__open_page(follow_page) if not self.__is_followed(): self.__click_follow()
def __inter_info(self, acc, pwd): sleep_time(1) log.info('输入用户名') self.browser.find_element_by_xpath( './/div[@class="W_login_form"]/div[contains(@class,"username")]//input' ).send_keys(acc) sleep_time(1) log.info('输入密码') self.browser.find_element_by_xpath( './/div[@class="W_login_form"]/div[contains(@class,"password")]//input' ).send_keys(pwd)
def __is_followed(self): sleep_time(1) html = etree.HTML(self.browser.page_source) res = html.xpath( './/div[contains(@class,"opt_box")]/div[1]/a[1]/text()')[0] if res == '关注': log.info('未关注') return False elif res == '已关注': log.info('已关注') return True
def is_into_verify(self): """ 判断是否进入了验证页面 :return: """ sleep_time(2) try: self.browser.find_element_by_xpath( './/div[contains(@class,"geetest_holder")]') log.warning('为了解除帐号异常,请点击按钮进行验证') return True except: log.info('账号正常,不需要验证身份') return False
def is_follow_failed(self): """ 判断是否出现'抱歉,关注失败(>_<) ,稍后再试啦。'的弹窗,这个弹窗说明还没有把本机养成常用设备。 :return: """ sleep_time(2) try: self.browser.find_element_by_xpath( './/div[contains(@class,"W_layer")]/div[contains(@class,"content")]/div[contains(@class,"W_layer_btn")]/a' ).click() log.warning('抱歉,关注失败(>_<) ,稍后再试啦。') return True except: log.info('没有出现关注失败') return False
def is_account_abnormal(self): """ 判断页面是否弹出了‘你的帐号存在异常,暂时无法发博、发评论、加关注等,请先验证身份解除异常。(100003)’ :return: """ sleep_time(2) try: self.browser.find_element_by_xpath( './/div[contains(@class,"W_layer")]//div[contains(@class,"W_layer_btn")]/a[1]' ) log.warning('你的帐号存在异常,暂时无法发博、发评论、加关注等,请先验证身份解除异常。(100003)') return True except: log.info('账号正常') return False
def __to_login(self, acc, pwd): for i in range(10): self.__clear_info() self.__inter_info(acc, pwd) if self.__has_verify_code(): self.__deal_with_code() self.__click_login() if self.__is_succeed(): return True elif self.__has_verify_code(): self.__deal_with_code() self.__click_login() if self.__is_succeed(): return True if self.login_abnormal.is_into_inter_phone_num(): return False sleep_time(2) return False
def do_forward(self): sleep_time(2) comment = self.__get_comment() self.browser.find_element_by_xpath( './/div[contains(@class,"WB_feed_publish")]//textarea[@node-type]' ).send_keys(comment) sleep_time(1) self.browser.find_element_by_xpath( './/div[contains(@class,"WB_feed_publish")]//input[@id="ipt11"]' ).click() sleep_time(1) self.browser.find_element_by_xpath( './/div[contains(@class,"WB_feed_publish")]//a[contains(@class,"W_btn_a")]' ).click() sleep_time(1)
def __into_main_page(self): sleep_time(1) self.browser.find_element_by_xpath( './/div[@class="gn_nav"]/ul/li[5]').click() log.info("进入个人主页,查看自己转发的微博内容")
def into_host(self): sleep_time(2) self.browser.find_element_by_xpath( './/div[@class="lev"]/a[@page_id="102803_ctg1_1760_-_ctg1_1760"]//span[2]' ).click() log.info("点击进入热门")
def __click_login(self): sleep_time(1) log.info('点击登录') self.browser.find_element_by_xpath( './/div[@class="login_innerwrap"]/div[3]/div[contains(@class,"login_btn")]' ).click()
def into_new_page(self, url): sleep_time(2) self.browser.get(url)
def __click_follow(self): sleep_time(2) self.browser.find_element_by_xpath( './/div[contains(@class,"opt_box")]/div[1]/a[1]').click() log.info('点击了关注')
def __open_page(self, follow_page): sleep_time(1) self.browser.get(follow_page) log.info('打开被关注者主页:{}'.format(follow_page))
def get_self_forward(self): self.__into_main_page() sleep_time(2) res = [] html = etree.HTML(self.browser.page_source) item_list = html.xpath( './/div[@node-type="feed_list"]//div[contains(@class,"WB_detail")]' ) if item_list: for item in item_list: try: timestamp = item.xpath( './div[contains(@class,"WB_from")]/a[1]/@date')[0] log.debug(timestamp) except: timestamp = '' try: action_url = 'https://weibo.com' + item.xpath( './div[contains(@class,"WB_from")]/a[1]/@href')[0] log.debug(action_url) except: action_url = '' try: robot_name = item.xpath( './div[contains(@class,"WB_info")]/a/text()')[0] log.debug(robot_name) except: robot_name = '' try: robot_comment_list = [] robot_comment_res = item.xpath( './div[contains(@class,"WB_text")]/text()') for comment in robot_comment_res: robot_comment_list.append(comment.strip()) robot_comment = ' '.join(robot_comment_list) log.debug(robot_comment) except: robot_comment = '' try: client_acc_name = item.xpath( './div[contains(@class,"WB_feed_expand")]//div[contains(@class,"WB_info")]/a/@title' )[0] log.debug(client_acc_name) except: client_acc_name = '' try: client_title_list = [] client_title_res = item.xpath( './div[contains(@class,"WB_feed_expand")]//div[contains(@class,"WB_text")]/text()' ) for title in client_title_res: client_title_list.append(title.strip()) client_post_title = ' '.join(client_title_list) log.debug(client_post_title) except: client_post_title = '' try: client_acc = item.xpath( './div[contains(@class,"WB_feed_expand")]//div[contains(@class,"WB_func")]/div[contains(@class,"WB_from")]/a[1]/@href' )[0] client_acc_id, client_post_id = client_acc.split('/')[1:] client_post_url = 'https://weibo.com' + client_acc log.debug("客户ID: {} 文章ID: {}".format( client_acc_id, client_post_id)) except: client_acc_id = '' client_post_id = '' client_post_url = '' weibo_data = { 'ActionTyp': 3, 'RobotName': robot_name, 'Comment': robot_comment, 'ActionUrl': action_url, 'PlatformId': 2, 'ClientAccId': client_acc_id, 'ClientPostUrl': client_post_url, 'ClientPostId': client_post_id, 'ClientPostTitle': client_post_title, } res.append(weibo_data) return res
def __inter_code(self, verify_code): sleep_time(2) self.browser.find_element_by_xpath( './/input[@node-type="verifycode"]').send_keys(verify_code)