Python get_soup Beispiele, pornhub.download.get_soup Python Beispiele

Beispiel #1

0

Datei anzeigen

def get_user_video_viewkeys(user):
    """Scrape viewkeys from the user's user/videos route."""
    is_premium = os.path.exists("premium")
    if is_premium:
        url = f"https://www.pornhubpremium.com/{user.user_type}/{user.key}/videos/premium"
    else:
        url = f"https://www.pornhub.com/{user.user_type}/{user.key}/videos"

    soup = get_soup(url)
    if soup is None:
        logger.info(f"Nothing on {url}")
        return []

    pages = 1
    hasNavigation = False
    hasEndlessScrolling = False

    # Some sites have a navigation at the bottom
    navigation = soup.find("div", {"class": "pagination3"})
    if navigation is not None:
        children = navigation.findChildren("li", {"class": "page_number"})
        pages = len(children) + 1
        hasNavigation = True
    # Others have a button for "endless scrolling"
    # In that case we have to search as long as
    elif soup.find(id="moreDataBtnStream"):
        hasEndlessScrolling = True

    keys = []
    current_page = 1
    next_url = url
    while current_page <= pages:
        # Check if the next site has another "endless scrolling" button as qell
        # If that's the case, increase the counter
        if hasEndlessScrolling and soup.find(id="moreDataBtnStream"):
            pages += 1

        logger.info(f"Crawling {next_url}")
        # Users with normal video upload list
        videos = soup.find("div", {"class": "mostRecentVideosSection"})

        if videos is None:
            return []

        for video in videos.find_all("li"):
            if video.has_attr("data-video-vkey"):
                keys.append(video["data-video-vkey"])

        current_page += 1
        next_url = url + f"?page={current_page}"

        time.sleep(4)

        soup = get_soup(next_url)
        # We couldn't get the next url.
        if soup is None:
            break

    return keys

Beispiel #2

0

Datei anzeigen

def get_user_type_and_url(key):
    """Detect the user type and the respective url for this user."""
    possible_urls = {}
    for user_type in [User.MODEL, User.USER, User.PORNSTAR]:
        possible_urls[user_type] = get_user_video_url(user_type, key)

    for user_type, url in possible_urls.items():
        soup = get_soup(url, False)

        if soup is not None:
            return user_type, url, soup

    raise Exception(f"Couldn't detect type for user {key}")

Beispiel #3

0

Datei anzeigen

Datei: playlist.py Projekt: warexify/pornhub-dl

def get_playlist_video_viewkeys(playlist):
    """Scrape all viewkeys of the playlist's videos."""
    url = get_playlist_video_url(playlist.id)
    soup = get_soup(url)
    if soup is None:
        logger.error(f"Couldn't find site for playlist {playlist.id}")
        sys.exit(1)

    videos = soup.find(id="videoPlaylist")

    keys = []
    for video in videos.find_all("li"):
        # Only get entries with data-video-vkey attribute
        # There exist some elements, which have programmatic purpose
        if video.has_attr("data-video-vkey"):
            keys.append(video["data-video-vkey"])

    return keys

Beispiel #4

0

Datei anzeigen

Datei: playlist.py Projekt: warexify/pornhub-dl

def get_playlist_info(playlist_id):
    """Get meta information from playlist website."""
    url = get_playlist_video_url(playlist_id)
    soup = get_soup(url)
    if soup is None:
        logger.error("Got invalid response for playlist: {url}")
        sys.exit(1)

    header = soup.find(id="playlistTopHeader")
    if header is None:
        logger.info(f"Couldn't get info for playlist: {url}")
        check_logged_out(soup)
        sys.exit(1)

    title = header.find("span", {"id": "watchPlaylist"})
    name = title.text.strip()

    name = name.replace(" ", "_")
    name = re.sub(r"[\W]+", "_", name)

    return {"name": name}

Beispiel #5

0

Datei anzeigen

Datei: channel.py Projekt: mildewremoval/pornhub-dl

def get_channel_info(channel_id):
    """Get meta information from channel website."""
    url = get_channel_video_url(channel_id)
    soup = get_soup(url)
    if soup is None:
        logger.error("Got invalid response for channel: {url}")
        sys.exit(1)

    profile = soup.find(id="channelsProfile")
    if profile is None:
        logger.info(f"Couldn't get info for channel: {url}")
        check_logged_out(soup)
        sys.exit(1)

    header = profile.find("div", {"class": "header"})
    wrapper = profile.find("div", {"class": "bottomExtendedWrapper"})
    title = profile.find("div", {"class": "title"})
    name = title.find("h1").text.strip()

    name = name.replace(" ", "_")
    name = re.sub(r"[\W]+", "_", name)

    return {"name": name}

Beispiel #6

0

Datei anzeigen

Datei: channel.py Projekt: mildewremoval/pornhub-dl

def get_channel_viewkeys(channel):
    """Scrape all public viewkeys of the channel's videos."""
    is_premium = os.path.exists("http_cookie_file")
    if is_premium:
        url = f"https://www.pornhubpremium.com/channels/{channel.id}/videos"
    else:
        url = f"https://www.pornhub.com/channels/{channel.id}/videos"

    soup = get_soup(url)
    if soup is None:
        logger.error(f"Failed to find video page for channel {channel.id}")
        check_logged_out(soup)
        sys.exit(1)

    pages = 1
    hasNavigation = False
    hasEndlessScrolling = False

    # Some sites have a navigation at the bottom
    navigation = soup.find("div", {"class": "pagination3"})
    if navigation is not None:
        children = navigation.findChildren("li", {"class": "page_number"})
        pages = len(children) + 1
        hasNavigation = True
    # Others have a button for "endless scrolling"
    # In that case we have to search as long as
    elif soup.find(id="moreDataBtnStream"):
        hasEndlessScrolling = True

    keys = []
    current_page = 1
    next_url = url
    while current_page <= pages:
        # Check if the next site has another "endless scrolling" button as qell
        # If that's the case, increase the counter
        if hasEndlessScrolling and soup.find(id="moreDataBtnStream"):
            pages += 1

        logger.info(f"Crawling {next_url}")
        # Channel with normal video upload list
        videos = soup.find(id="showAllChanelVideos")

        if videos is None:
            logger.error(f"Couldn't find channel videos in site: {url}")
            check_logged_out(soup)
            sys.exit(1)

        for video in videos.find_all("li"):
            if video.has_attr("_vkey"):
                keys.append(video["_vkey"])

        current_page += 1
        next_url = url + f"?page={current_page}"

        time.sleep(4)

        soup = get_soup(next_url)
        # We couldn't get the next url.
        if soup is None:
            break

    return keys

Beispiel #7

0

Datei anzeigen

def get_video_upload_viewkeys(user, public=False):
    """Scrape viewkeys from the user's user/videos/upload route."""
    is_premium = os.path.exists("premium")
    if is_premium:
        url = (
            f"https://www.pornhubpremium.com/{user.user_type}/{user.key}/videos/premium"
        )
    else:
        url = f"https://www.pornhub.com/{user.user_type}/{user.key}/videos/upload"

    if public:
        if is_premium:
            url = f"https://www.pornhubpremium.com/{user.user_type}/{user.key}/videos/upload"
        else:
            url = f"https://www.pornhub.com/{user.user_type}/{user.key}/videos/public"

    soup = get_soup(url)
    if soup is None:
        logger.info(f"Nothing on {url}")
        return []

    pages = 1
    hasNavigation = False
    hasEndlessScrolling = False

    # Some sites have a navigation at the bottom
    navigation = soup.find("div", {"class": "pagination3"})
    if navigation is not None:
        children = navigation.findChildren("li", {"class": "page_number"})
        pages = len(children) + 1
        hasNavigation = True
    # Others have a button for "endless scrolling"
    # In that case we have to search as long as
    elif soup.find(id="moreDataBtnStream"):
        hasEndlessScrolling = True

    keys = []
    current_page = 1
    next_url = url
    while current_page <= pages:
        # Check if the next site has another "endless scrolling" button as qell
        # If that's the case, increase the counter
        if hasEndlessScrolling and soup.find(id="moreDataBtnStream"):
            pages += 1

        logger.info(f"Crawling {next_url}")
        videoSection = soup.find("div", {"class": "videoUList"})
        pornstarVideoSection = soup.find(id="pornstarsVideoSection")
        claimedUploadedVideoSection = soup.find(
            id="claimedUploadedVideoSection")

        # Users with normal video upload list
        if videoSection is not None:
            videos = videoSection.find(id="moreData")
        # Users with pornstarVideoSection
        elif pornstarVideoSection is not None:
            videos = pornstarVideoSection
        # Dunno what this is
        elif claimedUploadedVideoSection is not None:
            videos = claimedUploadedVideoSection
        else:
            logger.error(
                f"Couldn't find video section on {next_url}. Did we log out?")
            if check_logged_out(soup):
                sys.exit(1)
            return []

        for video in videos.find_all("li"):
            if video.has_attr("data-video-vkey"):
                keys.append(video["data-video-vkey"])

        current_page += 1
        next_url = url + f"?page={current_page}"

        time.sleep(4)

        soup = get_soup(next_url)
        # We couldn't get the next url.
        if soup is None:
            break

    return keys