def video_config_url(url): soup = _get_soup(url, cache=True) player = soup.find("div", class_="player-jv") if not player: return None # get the config file url return utils.get_absolute_url(player.get('data-src'))
def video_pages(url): soup = _get_soup(url) # articles often contain videos articles = soup.find_all("article") if not articles: return [] # list all links found in articles links = [_get_video_link(article) for article in articles] # remove dead entries (articles without valid link) links = filter(None, links) # return a list of absolute URLs from that list return [utils.get_absolute_url(link.get('href')) for link in links]
def _filename_to_url(filename): url = urllib.parse.unquote(filename) return utils.get_absolute_url(url)
def test_get_absolute_url_relative(self): expected = 'http://www.jeuxvideo.com/videos/chroniques/434958/speed-game-live-any-majora-s-mask-fini-en-moins-de-1h35.htm' actual = get_absolute_url('/videos/chroniques/434958/speed-game-live-any-majora-s-mask-fini-en-moins-de-1h35.htm') self.assertEqual(actual, expected)
def test_get_relative_url_homepage_noslash(self): expected = 'http://www.jeuxvideo.com' actual = get_absolute_url('http://www.jeuxvideo.com') self.assertEqual(actual, expected)