Python Browser.expandCommentsの例

プログラミング言語: Python

名前空間/パッケージ名: utils.browser

クラス/型: Browser

メソッド/関数: expandComments

hotexamples.comのコード掲載数: 2

Python Browser.expandComments - 2件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのutils.browser.Browser.expandCommentsの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

Browser(9)

get(3)

post(3)

clearLink(2)

scrollPageToBottomUntilEnd(2)

quit(2)

goToPage(2)

getPageSource(2)

expandComments(2)

get_driver(1)

get_form(1)

get_html(1)

get_page(1)

go_to(1)

list_cookies(1)

log_in(1)

open_browser(1)

getDriver(1)

find_element_by_partial_link_text(1)

select_tag(1)

switch_to_new_page(1)

title(1)

wait(1)

コード例 #1

ファイルを表示

def runCrawl(limitNum=0, queryList=[], is_all_comments=False):
    browser = Browser("driver/chromedriver")
    for query in queryList:
        browser.clearLink()
        makeDir("data")
        makeDir("data/" + query)
        mUrl = ""
        if query[0] == "#":
            mUrl = "https://www.instagram.com/explore/tags/" + query[
                1:] + "/?hl=en"
        else:
            mUrl = "https://www.instagram.com/" + query + "/?hl=en"
        browser.goToPage(mUrl)
        print("collecting url of " + query + "...")
        browser.scrollPageToBottomUntilEnd(browser.collectDpageUrl, limitNum)
        print("finish scoll collecting!")

        print("collecting data...")
        slist = list(set(browser.urlList))
        for url in tqdm(slist):
            dirName = url.split("/")[4]
            # skip if already crawled
            if not makeDir("data/" + query + "/" + dirName):
                continue
            browser.goToPage(url)
            if is_all_comments:
                browser.expandComments()
            cur = browser.getPageSource()
            writeToFile("data/" + query + "/" + dirName + "/raw.html", [cur])
            infoData = cur.split("<meta content=")[1].split(" ")
            # extract data
            lang = extractLang(cur)
            # likes = extractLikes(infoData, lang)
            likes = extractLikes_cur(cur)
            comments = extractComments(infoData, lang)
            caption = extractCaption(cur)
            dateTime = extractDateTime(cur)
            commentMessages = extractCommentsMessage(cur)
            # print("likes:",likes," comments:", comments," caption:", caption,
            #     "commentMessages:", commentMessages, "dateTime:", dateTime)
            writeToFile("data/" + query + "/" + dirName + "/info.txt", [
                "likes: ", likes, "", "comments: ", comments, "", "caption: ",
                caption, "", "commentMessages: ", commentMessages, "",
                "dateTime: ", dateTime, ""
            ])
            # download image
            imageUrl = html.unescape(
                cur.split('meta property="og:image" content="')[1].split('"')
                [0])
            downloadImage(imageUrl,
                          "data/" + query + "/" + dirName + "/image.jpg")
            time.sleep(1)
        print("query " + query + " collecting finish")

    time.sleep(2)
    browser.driver.quit()
    print("FINISH!")

コード例 #2

ファイルを表示

def runCrawl(limitNum=0, queryList=[], is_all_comments=False, userinfo={}):
    browser = Browser("driver/chromedriver")
    if userinfo != {}:
        print('Start logging in')
        browser.goToPage('https://www.instagram.com/accounts/login/?hl=en')
        if browser.log_in(userinfo):
            print('Success to log in')
        else:
            print('Fail to log in')
            return
    else:
        print('Continue Without logging in')
    for query in queryList:
        browser.clearLink()
        makeDir("data")
        makeDir("data/" + query)
        mUrl = ""
        if query[0] == "#":
            mUrl = "https://www.instagram.com/explore/tags/" + query[
                1:] + "/?hl=en"
        else:
            mUrl = "https://www.instagram.com/" + query + "/?hl=en"
        browser.goToPage(mUrl)
        print("collecting url of " + query + "...")
        browser.scrollPageToBottomUntilEnd(browser.collectDpageUrl, limitNum)
        print("finish scoll collecting!")

        print("collecting data...")
        slist = list(set(browser.urlList))
        for url in tqdm(slist):
            dirName = url.split("/")[4]
            # skip if already crawled
            if not makeDir("data/" + query + "/" + dirName):
                continue
            browser.goToPage(url)
            if is_all_comments:
                browser.expandComments()
            cur = browser.getPageSource()
            writeToFile("data/" + query + "/" + dirName + "/raw.html", [cur])
            infoData = BeautifulSoup(cur, "lxml")
            imageData = infoData.find("img", class_="FFVAD")
            # extract data
            likes = extractLikes(infoData)
            comments_list = extractComments(infoData)
            comments = comments_list.__len__()
            caption = extractCaption(imageData)
            dateTime = extractDateTime(infoData)
            commentMessages = extractCommentsMessage(comments_list)
            # print("likes:",likes," comments:", comments," caption:", caption,
            #     "commentMessages:", commentMessages, "dateTime:", dateTime)
            writeToFile("data/" + query + "/" + dirName + "/info.txt", [
                "likes: ", likes, "", "comments: ", comments, "", "caption: ",
                caption, "", "commentMessages: ", commentMessages, "",
                "dateTime: ", dateTime, ""
            ])
            # download image
            imageUrl = imageData.get("srcset")
            downloadImage(imageUrl,
                          "data/" + query + "/" + dirName + "/image.jpg")
            time.sleep(1)
        print("query " + query + " collecting finish")

    time.sleep(2)
    browser.driver.quit()
    print("FINISH!")