Ejemplo n.º 1
0
def crawler():
    browser = Chrome(cache_path=r"E:\temp")

    account_list = Utils.io.load_string("huya_account_list.txt")
    for account_url in account_list.split("\n"):
        browser.get(account_url)

        # 读取直播间订阅数量
        text_subscribe = ""
        try:
            label_subscribe = browser.find_element_by_xpath(
                '//*[@id="activityCount"]')
            if label_subscribe is not None:
                text_subscribe = label_subscribe.text
        except NoSuchElementException:
            pass

        # 读取直播间ID
        text_id = ""
        try:
            label_id = browser.find_element_by_css_selector(
                '#J_roomHeader > div.room-hd-l > div.host-info > div.host-detail.J_roomHdDetail > span.host-rid'
            )
            if label_id is not None:
                text_id = label_id.text
        except NoSuchElementException:
            pass

        print(account_url, text_id, text_subscribe)

        time.sleep(3)
Ejemplo n.º 2
0
def crawler(live_list_path):
    driver = Chrome(cache_path=r"E:\Temp")

    account_list = Utils.io.load_string(live_list_path)

    for account_url in account_list.split("\n"):
        driver.get(account_url)

        time.sleep(3)

        text_subscribe = ""

        for _ in range(10):
            try:
                label_subscribe = driver.find_element_by_xpath(
                    '//*[@id="js-player-title"]/div/div[4]/div/span')
                if label_subscribe.text is not None and label_subscribe.text != "":
                    text_subscribe = label_subscribe.text
                    break
                time.sleep(1)
            except NoSuchElementException:
                time.sleep(1)

        print(account_url, text_subscribe)