Python get_soup Exemples, pornhub.scraping.get_soup Python Exemples

Exemple #1

0

Afficher le fichier

def get_public_user_video_viewkeys(user):
    """Scrape all public viewkeys of the user's videos."""
    url = get_secondary_user_video_url(user.user_type, user.key)

    # Couldn't find a public/upload video site
    if url is None:
        return []

    soup = get_soup(url)

    navigation = soup.find('div', {'class': 'pagination3'})
    if navigation is not None:
        children = navigation.findChildren('li', {'class': 'page_number'})
        pages = len(children) + 1
    else:
        pages = 1

    keys = []
    current_page = 1
    next_url = url
    while current_page <= pages:
        print(f'Crawling {next_url}')
        # Videos for normal users/models
        wrapper = soup.find('div', {'class': 'videoUList'})

        # Videos for pornstars
        if wrapper is None:
            videos = soup.find(id='pornstarsVideoSection')
        else:
            videos = wrapper.find('ul')

        if videos is None:
            return []

        for video in videos.find_all('li'):
            if video.has_attr('_vkey'):
                keys.append(video['_vkey'])

        current_page += 1
        next_url = url + f'?page={current_page}'

        time.sleep(4)

        soup = get_soup(next_url)
        # We couldn't get the next url.
        if soup is None:
            break

    return keys

Exemple #2

0

Afficher le fichier

def get_recent_video_viewkeys(user):
    """Scrape all viewkeys of the user's videos."""
    url = get_user_video_url(user.user_type, user.key)
    soup = get_soup(url)

    navigation = soup.find('div', {'class': 'pagination3'})
    if navigation is not None:
        children = navigation.findChildren('li', {'class': 'page_number'})
        pages = len(children) + 1
    else:
        pages = 1

    keys = []
    current_page = 1
    next_url = url
    while current_page <= pages:
        print(f'Crawling {next_url}')
        # Videos for normal users/models
        videos = soup.find(id='mostRecentVideosSection')

        # Videos for pornstars
        if videos is None:
            videos = soup.find(id='pornstarsVideoSection')

        # User has no most recent videos section
        if videos is None:
            return []

        for video in videos.find_all('li'):
            if video.has_attr('_vkey'):
                keys.append(video['_vkey'])

        current_page += 1
        next_url = url + f'?page={current_page}'

        time.sleep(4)
        soup = get_soup(next_url)
        # We couldn't get the next url.
        if soup is None:
            break

    return keys

Exemple #3

0

Afficher le fichier

Fichier : playlist.py Projet : joeaney/pornhub-dl

def get_playlist_video_viewkeys(playlist):
    """Scrape all viewkeys of the playlist's videos."""
    url = f'https://www.pornhub.com/playlist/{playlist.id}'
    soup = get_soup(url)

    videos = soup.find(id='videoPlaylist')

    keys = []
    for video in videos.find_all('li'):
        keys.append(video['_vkey'])

    return keys

Exemple #4

0

Afficher le fichier

Fichier : playlist.py Projet : philippemarlin/pornhub-dl

def get_playlist_video_viewkeys(playlist):
    """Scrape all viewkeys of the playlist's videos."""
    url = f'https://www.pornhub.com/playlist/{playlist.id}'
    soup = get_soup(url)

    videos = soup.find(id='videoPlaylist')

    keys = []
    for video in videos.find_all('li'):
        # Only get entries with _vkey attribute
        # There exist some elements, which have programmatic purpose
        if video.has_attr('_vkey'):
            keys.append(video['_vkey'])

    return keys