def get_playlist_info(playlist_id): """Get meta information from playlist website.""" url = get_playlist_video_url(playlist_id) soup = get_soup(url) if soup is None: logger.error("Got invalid response for playlist: {url}") sys.exit(1) header = soup.find(id="playlistTopHeader") if header is None: logger.info(f"Couldn't get info for playlist: {url}") check_logged_out(soup) sys.exit(1) title = header.find("span", {"id": "watchPlaylist"}) name = title.text.strip() name = name.replace(" ", "_") name = re.sub(r"[\W]+", "_", name) return {"name": name}
def get_channel_info(channel_id): """Get meta information from channel website.""" url = get_channel_video_url(channel_id) soup = get_soup(url) if soup is None: logger.error("Got invalid response for channel: {url}") sys.exit(1) profile = soup.find(id="channelsProfile") if profile is None: logger.info(f"Couldn't get info for channel: {url}") check_logged_out(soup) sys.exit(1) header = profile.find("div", {"class": "header"}) wrapper = profile.find("div", {"class": "bottomExtendedWrapper"}) title = profile.find("div", {"class": "title"}) name = title.find("h1").text.strip() name = name.replace(" ", "_") name = re.sub(r"[\W]+", "_", name) return {"name": name}
def get_channel_viewkeys(channel): """Scrape all public viewkeys of the channel's videos.""" is_premium = os.path.exists("http_cookie_file") if is_premium: url = f"https://www.pornhubpremium.com/channels/{channel.id}/videos" else: url = f"https://www.pornhub.com/channels/{channel.id}/videos" soup = get_soup(url) if soup is None: logger.error(f"Failed to find video page for channel {channel.id}") check_logged_out(soup) sys.exit(1) pages = 1 hasNavigation = False hasEndlessScrolling = False # Some sites have a navigation at the bottom navigation = soup.find("div", {"class": "pagination3"}) if navigation is not None: children = navigation.findChildren("li", {"class": "page_number"}) pages = len(children) + 1 hasNavigation = True # Others have a button for "endless scrolling" # In that case we have to search as long as elif soup.find(id="moreDataBtnStream"): hasEndlessScrolling = True keys = [] current_page = 1 next_url = url while current_page <= pages: # Check if the next site has another "endless scrolling" button as qell # If that's the case, increase the counter if hasEndlessScrolling and soup.find(id="moreDataBtnStream"): pages += 1 logger.info(f"Crawling {next_url}") # Channel with normal video upload list videos = soup.find(id="showAllChanelVideos") if videos is None: logger.error(f"Couldn't find channel videos in site: {url}") check_logged_out(soup) sys.exit(1) for video in videos.find_all("li"): if video.has_attr("_vkey"): keys.append(video["_vkey"]) current_page += 1 next_url = url + f"?page={current_page}" time.sleep(4) soup = get_soup(next_url) # We couldn't get the next url. if soup is None: break return keys
def get_video_upload_viewkeys(user, public=False): """Scrape viewkeys from the user's user/videos/upload route.""" is_premium = os.path.exists("premium") if is_premium: url = ( f"https://www.pornhubpremium.com/{user.user_type}/{user.key}/videos/premium" ) else: url = f"https://www.pornhub.com/{user.user_type}/{user.key}/videos/upload" if public: if is_premium: url = f"https://www.pornhubpremium.com/{user.user_type}/{user.key}/videos/upload" else: url = f"https://www.pornhub.com/{user.user_type}/{user.key}/videos/public" soup = get_soup(url) if soup is None: logger.info(f"Nothing on {url}") return [] pages = 1 hasNavigation = False hasEndlessScrolling = False # Some sites have a navigation at the bottom navigation = soup.find("div", {"class": "pagination3"}) if navigation is not None: children = navigation.findChildren("li", {"class": "page_number"}) pages = len(children) + 1 hasNavigation = True # Others have a button for "endless scrolling" # In that case we have to search as long as elif soup.find(id="moreDataBtnStream"): hasEndlessScrolling = True keys = [] current_page = 1 next_url = url while current_page <= pages: # Check if the next site has another "endless scrolling" button as qell # If that's the case, increase the counter if hasEndlessScrolling and soup.find(id="moreDataBtnStream"): pages += 1 logger.info(f"Crawling {next_url}") videoSection = soup.find("div", {"class": "videoUList"}) pornstarVideoSection = soup.find(id="pornstarsVideoSection") claimedUploadedVideoSection = soup.find( id="claimedUploadedVideoSection") # Users with normal video upload list if videoSection is not None: videos = videoSection.find(id="moreData") # Users with pornstarVideoSection elif pornstarVideoSection is not None: videos = pornstarVideoSection # Dunno what this is elif claimedUploadedVideoSection is not None: videos = claimedUploadedVideoSection else: logger.error( f"Couldn't find video section on {next_url}. Did we log out?") if check_logged_out(soup): sys.exit(1) return [] for video in videos.find_all("li"): if video.has_attr("data-video-vkey"): keys.append(video["data-video-vkey"]) current_page += 1 next_url = url + f"?page={current_page}" time.sleep(4) soup = get_soup(next_url) # We couldn't get the next url. if soup is None: break return keys