def generate_m3u(track_file): tracks = internals.get_unique_tracks(track_file) target_file = "{}.m3u".format(track_file.split(".")[0]) total_tracks = len(tracks) log.info("Generating {0} from {1} YouTube URLs".format( target_file, total_tracks)) with open(target_file, "w") as output_file: output_file.write("#EXTM3U\n\n") videos = [] for n, track in enumerate(tracks, 1): content, _ = match_video_and_metadata(track) if content is None: log.warning("Skipping {}".format(track)) else: log.info("Matched track {0}/{1} ({2})".format( n, total_tracks, content.watchv_url)) log.debug(track) m3u_key = "#EXTINF:{duration},{title}\n{youtube_url}\n".format( duration=internals.get_sec(content.duration), title=content.title, youtube_url=content.watchv_url, ) log.debug(m3u_key) with open(target_file, "a") as output_file: output_file.write(m3u_key) videos.append(content.watchv_url) return videos
def scrape(self, bestmatch=True, tries_remaining=5): """ Search and scrape YouTube to return a list of matching videos. """ # prevents an infinite loop but allows for a few retries if tries_remaining == 0: log.debug("No tries left. I quit.") sys.stdout.flush() return search_url = generate_search_url(self.search_query) log.debug("Opening URL: {0}".format(search_url)) sys.stdout.flush() item = self._fetch_response(search_url).read() items_parse = BeautifulSoup(item, "html.parser") videos = [] for x in items_parse.find_all( "div", {"class": "yt-lockup-dismissable yt-uix-tile"} ): if not is_video(x): continue y = x.find("div", class_="yt-lockup-content") link = y.find("a")["href"][-11:] title = y.find("a")["title"] try: videotime = x.find("span", class_="video-time").get_text() except AttributeError: log.debug("Could not find video duration on YouTube, retrying..") sys.stdout.flush() return self.scrape( bestmatch=bestmatch, tries_remaining=tries_remaining - 1 ) youtubedetails = { "link": link, "title": title, "videotime": videotime, "seconds": internals.get_sec(videotime), } videos.append(youtubedetails) if bestmatch: return self._best_match(videos) return videos
def scrape(self, bestmatch=True, tries_remaining=5): """ Search and scrape YouTube to return a list of matching videos. """ # prevents an infinite loop but allows for a few retries if tries_remaining == 0: log.debug('No tries left. I quit.') return search_url = generate_search_url(self.search_query) log.debug('Opening URL: {0}'.format(search_url)) item = urllib.request.urlopen(search_url).read() items_parse = BeautifulSoup(item, "html.parser") videos = [] for x in items_parse.find_all( 'div', {'class': 'yt-lockup-dismissable yt-uix-tile'}): if not is_video(x): continue y = x.find('div', class_='yt-lockup-content') link = y.find('a')['href'][-11:] title = y.find('a')['title'] try: videotime = x.find('span', class_="video-time").get_text() except AttributeError: log.debug( 'Could not find video duration on YouTube, retrying..') return self.scrape(bestmatch=bestmatch, tries_remaining=tries_remaining - 1) youtubedetails = { 'link': link, 'title': title, 'videotime': videotime, 'seconds': internals.get_sec(videotime) } videos.append(youtubedetails) if bestmatch: return self._best_match(videos) return videos
def test_raise_error(self): with pytest.raises(ValueError): internals.get_sec("10*05") with pytest.raises(ValueError): internals.get_sec("02,28,46")
def test_from_hours(self): expect_secs = 5405 secs = internals.get_sec("1.30.05") assert secs == expect_secs secs = internals.get_sec("1:30:05") assert secs == expect_secs
def test_from_minutes(self): expect_secs = 213 secs = internals.get_sec("3.33") assert secs == expect_secs secs = internals.get_sec("3:33") assert secs == expect_secs
def test_from_seconds(self): expect_secs = 45 secs = internals.get_sec("0:45") assert secs == expect_secs secs = internals.get_sec("0.45") assert secs == expect_secs
def test_raise_error(self): with pytest.raises(ValueError): internals.get_sec('10*05') with pytest.raises(ValueError): internals.get_sec('02,28,46')
def test_get_seconds_from_video_time(str_duration, sec_duration): secs = internals.get_sec(str_duration) assert secs == sec_duration