Ejemplo n.º 1
0
def get_article(driver, biz):
    logging.info('获取历史文章内容')
    time_start = time.time()
    driver.implicitly_wait(15)
    time.sleep(4)
    utils.switch_to_context(driver, 'WEBVIEW_com.tencent.mm:toolsmp')
    handles = driver.window_handles  # 获取当前所有窗口的句柄
    for handle in handles:
        try:
            driver.switch_to.window(handle)
            driver.find_element_by_css_selector("body > pre")
            logging.info('定位成功')
            break
        except Exception as e:
            logging.info("切换窗口:" + format(e))
    content = driver.page_source
    utils.write_page_soure("data_analysis", content)
    soup = bs4(content, "html.parser")
    basic_soup = soup.find('pre').text
    can_continue = parse_article(biz, basic_soup)
    print('能否继续:' + str(can_continue))
    time_end = time.time()
    sum_time = int(time_end - time_start)
    logging.info('花费时间: %s s' % sum_time)

    utils.switch_to_context(driver)
    time.sleep(1)
    driver.keyevent(4)
    return can_continue
Ejemplo n.º 2
0
def get_content_and_comment(driver):
    """
    获取文章内容和评论,并写入数据库
    :return:
    """
    # 给出页面加载以及请求响应时间
    driver.implicitly_wait(3)  # 设置为2秒时可能提取不到用户评论
    # time.sleep(3)
    # h5界面需要切环境及窗口句柄(web页签)
    utils.switch_to_context(driver, 'WEBVIEW_com.tencent.mm:toolsmp')
    # handles = driver.window_handles  # 获取当前所有窗口的句柄
    # for handle in handles:
    #     try:
    #         driver.switch_to.window(handle)
    #         driver.find_element_by_css_selector("#js_article")
    #         # driver.find_element_by_id("js_content")
    #         logging.info('定位成功')
    #         break
    #     except Exception as e:
    #         logging.info("切换窗口:" + format(e))
    #         if 'Connection aborted' in str(e):
    #             logging.info('Connection aborted,程序退出')
    #             driver.quit()
    # 决定要不要跳一下
    # utils.swipe_up_with_times(driver, 1)
    html = driver.page_source
    # html = html.encode()
    utils.switch_to_context(driver)  # 需要切环境点击才能生效
    # time.sleep(1)
    # 返回对话框
    driver.keyevent(4)
    # driver.back()
    # logging.info(type(html))
    return html
Ejemplo n.º 3
0
def add_contact(driver, flag=True):
    """
    在历史消息界面定位关注按钮点击自动关注
    :param flag: True表示在h5界面,False表示在原生界面
    :return:
    """
    # 给出页面加载以及请求响应时间
    driver.implicitly_wait(10)
    if flag:
        # h5界面需要切环境和窗口句柄,即web页签
        utils.switch_to_context(driver, 'WEBVIEW_com.tencent.mm:toolsmp')
        handles = driver.window_handles  # 获取当前所有窗口的句柄
        for handle in handles:
            try:
                driver.switch_to.window(handle)
                driver.find_element_by_id('js_btn_add_contact')  # 定位“关注 ”按钮
                break
            except Exception as e:
                print("已关注公众号或者关注异常:" + format(e))
    try:
        driver.find_element_by_id("js_btn_add_contact").click()
        print('关注成功')
        time.sleep(4)  # 等待关注成功
    except Exception as e:
        print("已关注公众号或者关注异常:" + format(e))
    if flag:
        utils.switch_to_context(driver)
    # 返回对话框
    driver.back()
    time.sleep(1)
Ejemplo n.º 4
0
 def getBizInfo(self, driver, biz):
     """
     h5界面 获取公众号主体信息
     :param driver:
     :param biz:
     :return:
     """
     driver.implicitly_wait(6)
     time.sleep(10)
     utils.switch_to_context(driver, 'WEBVIEW_com.tencent.mm:toolsmp')
     handles = driver.window_handles  # 获取当前所有窗口的句柄
     owner_info = ''
     for handle in handles:
         try:
             driver.switch_to.window(handle)
             owner_info = driver.find_element_by_css_selector(
                 "body>div.profile_container>div.profile_info.appmsg")
             break
         except Exception as e:
             logging.info("切换窗口:" + format(e))
     # 获取公司主体信息
     # owner_info = driver.find_element_by_css_selector("body>div.profile_container>div.profile_info.appmsg")
     self.biz_info['biz'] = biz
     self.biz_info['nickname'] = owner_info.find_element_by_id(
         "nickname").text
     self.biz_info['icon'] = owner_info.find_element_by_id(
         "icon").get_attribute("src")
     try:
         self.biz_info[
             'operator'] = owner_info.find_element_by_css_selector(
                 "#js_verify_info>span").text.split(' ')[-1]
     except:
         self.biz_info['operator'] = '个人'
     try:
         self.biz_info[
             'data_url'] = owner_info.find_element_by_css_selector(
                 "#js_verify_info>span").get_attribute("data-url")
     except:
         self.biz_info['data_url'] = ''
     self.biz_info['profile_desc'] = owner_info.find_element_by_class_name(
         "profile_desc").text