def test_ordered_set(self): self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7]) self.assertEqual(orderedSet([]), []) self.assertEqual(orderedSet([1]), [1]) # keep the list ordered self.assertEqual(orderedSet([135, 1, 1, 1]), [135, 1])
def test_ordered_set(self): self.assertEqual( orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7]) self.assertEqual(orderedSet([]), []) self.assertEqual(orderedSet([1]), [1]) #keep the list ordered self.assertEqual(orderedSet([135, 1, 1, 1]), [135, 1])
def _get_n_results(self, query, n, search_str=""): """Get a specified number of results for a query""" videos = [] channels = set() limit = n for pagenum in itertools.count(1): url_query = { "search_query": query.encode("utf-8"), "page": pagenum, "spf": "navigate" } url_query.update(self._EXTRA_QUERY_ARGS) if len(search_str) == 0: search_str = "CAISBhABGAEwAQ%253D%253D" # Video, Short (<4 minutes), Creative Commons, Upload Date result_url = ("https://www.youtube.com/results?sp=" + search_str + "&" + compat_urllib_parse.urlencode(url_query)) data = self._download_json( result_url, video_id='query "%s"' % query, note="Downloading page %s" % pagenum, errnote="Unable to download API page", ) html_content = data[1]["body"]["content"] if 'class="search-message' in html_content: raise Exception("[youtube] No video results", result_url) # video_urls = re.findall(r'href="/watch\?v=(.{11})', html_content) vids_and_channels = re.findall( r'href="\/watch\?v=(.{11}).+(?:user|channel)\/(.+?")', html_content) # print('got vids', len(videos), 'for', query) if len(vids_and_channels) == 0: break for vid, channel in vids_and_channels: if channel in channels: continue channels.add(channel) videos.append(vid) # videos += new_videos if len(videos) > limit: break if len(videos) > n: videos = videos[:n] videos = self._ids_to_results(orderedSet(videos)) return self.playlist_result(videos, query)
def process_ie_result(self, ie_result, download=True, extra_info={}): """ Take the result of the ie(may be modified) and resolve all unresolved references (URLs, playlist items). It will also download the videos if 'download'. Returns the resolved ie_result. """ result_type = ie_result.get('_type', 'video') if result_type in ('url', 'url_transparent'): ie_result['url'] = youtube_dl.utils.sanitize_url(ie_result['url']) extract_flat = self.params.get('extract_flat', False) if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or extract_flat is True): if self.params.get('forcejson', False): self.to_stdout(json.dumps(ie_result)) return ie_result if result_type == 'video': self.add_extra_info(ie_result, extra_info) return self.process_video_result(ie_result, download=download) elif result_type == 'url': # We have to add extra_info to the results because it may be # contained in a playlist return self.extract_info(ie_result['url'], download, ie_key=ie_result.get('ie_key'), extra_info=extra_info) elif result_type == 'url_transparent': # Use the information from the embedding page info = self.extract_info(ie_result['url'], ie_key=ie_result.get('ie_key'), extra_info=extra_info, download=False, process=False) # extract_info may return None when ignoreerrors is enabled and # extraction failed with an error, don't crash and return early # in this case if not info: return info force_properties = dict( (k, v) for k, v in ie_result.items() if v is not None) for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'): if f in force_properties: del force_properties[f] new_result = info.copy() new_result.update(force_properties) # Extracted info may not be a video result (i.e. # info.get('_type', 'video') != video) but rather an url or # url_transparent. In such cases outer metadata (from ie_result) # should be propagated to inner one (info). For this to happen # _type of info should be overridden with url_transparent. This # fixes issue from https://github.com/rg3/youtube-dl/pull/11163. if new_result.get('_type') == 'url': new_result['_type'] = 'url_transparent' return self.process_ie_result(new_result, download=download, extra_info=extra_info) elif result_type in ('playlist', 'multi_video'): # We process each entry in the playlist playlist = ie_result.get('title') or ie_result.get('id') self.to_screen('[download] Downloading playlist: %s' % playlist) playlist_results = [] playliststart = self.params.get('playliststart', 1) - 1 playlistend = self.params.get('playlistend') # For backwards compatibility, interpret -1 as whole list if playlistend == -1: playlistend = None playlistitems_str = self.params.get('playlist_items') playlistitems = None if playlistitems_str is not None: def iter_playlistitems(format): for string_segment in format.split(','): if '-' in string_segment: start, end = string_segment.split('-') for item in range(int(start), int(end) + 1): yield int(item) else: yield int(string_segment) playlistitems = orderedSet( iter_playlistitems(playlistitems_str)) ie_entries = ie_result['entries'] def make_playlistitems_entries(list_ie_entries): num_entries = len(list_ie_entries) return [ list_ie_entries[i - 1] for i in playlistitems if -num_entries <= i - 1 < num_entries ] def report_download(num_entries): self.to_screen('[%s] playlist %s: Downloading %d videos' % (ie_result['extractor'], playlist, num_entries)) if isinstance(ie_entries, list): n_all_entries = len(ie_entries) if playlistitems: entries = make_playlistitems_entries(ie_entries) else: entries = ie_entries[playliststart:playlistend] n_entries = len(entries) _str_1 = '[%s] playlist %s: Collected %d ' _str_1 += 'video ids (downloading %d of them)' self.to_screen(_str_1 % (ie_result['extractor'], playlist, n_all_entries, n_entries)) elif isinstance(ie_entries, youtube_dl.utils.PagedList): if playlistitems: entries = [] for item in playlistitems: entries.extend(ie_entries.getslice(item - 1, item)) else: entries = ie_entries.getslice(playliststart, playlistend) n_entries = len(entries) report_download(n_entries) else: # iterable if playlistitems: entries = make_playlistitems_entries( list( itertools.islice(ie_entries, 0, max(playlistitems)))) else: entries = list( itertools.islice(ie_entries, playliststart, playlistend)) n_entries = len(entries) report_download(n_entries) if self.params.get('playlistreverse', False): entries = entries[::-1] if self.params.get('playlistrandom', False): random.shuffle(entries) x_forwarded_for = ie_result.get('__x_forwarded_for_ip') for i, entry in enumerate(entries, 1): self.to_screen('[download] Downloading video %s of %s' % (i, n_entries)) # This __x_forwarded_for_ip thing is a bit ugly but requires # minimal changes if x_forwarded_for: entry['__x_forwarded_for_ip'] = x_forwarded_for extra = { 'n_entries': n_entries, 'playlist': playlist, 'playlist_id': ie_result.get('id'), 'playlist_title': ie_result.get('title'), 'playlist_uploader': ie_result.get('uploader'), 'playlist_uploader_id': ie_result.get('uploader_id'), 'playlist_index': i + playliststart, 'extractor': ie_result['extractor'], 'webpage_url': ie_result['webpage_url'], 'webpage_url_basename': youtube_dl.utils.url_basename(ie_result['webpage_url']), 'extractor_key': ie_result['extractor_key'], } reason = self._match_entry(entry, incomplete=True) if reason is not None: self.to_screen('[download] ' + reason) continue try: entry_result = self.process_ie_result(entry, download=download, extra_info=extra) playlist_results.append(entry_result) except BaseException as e: print("Problem occured downloading a file: {}".format(e)) continue ie_result['entries'] = playlist_results self.to_screen('[download] Finished downloading playlist: %s' % playlist) return ie_result elif result_type == 'compat_list': self.report_warning('Extractor %s returned a compat_list result. ' 'It needs to be updated.' % ie_result.get('extractor')) def _fixup(r): self.add_extra_info( r, { 'extractor': ie_result['extractor'], 'webpage_url': ie_result['webpage_url'], 'webpage_url_basename': youtube_dl.utils.url_basename( ie_result['webpage_url']), 'extractor_key': ie_result['extractor_key'], }) return r ie_result['entries'] = [ self.process_ie_result(_fixup(r), download, extra_info) for r in ie_result['entries'] ] return ie_result else: raise Exception('Invalid result type: %s' % result_type)
def extractPlayListDetail(self, ie_result, max_downloads, path='', download=False): ydl_opts = { 'outtmpl': os.path.join(path, "%(title)s.%(ext)s"), 'writesubtitles': True, 'writethumbnail': True, # 'playlist_items': '2,3,7,10', "max_downloads": max_downloads } youtube_dl.utils.std_headers['User-Agent'] = self._user_agent with youtube_dl.YoutubeDL(ydl_opts) as ydl: result_type = ie_result.get('_type', 'video') if result_type in ('playlist', 'multi_video'): # We process each entry in the playlist playlist = ie_result.get('title') or ie_result.get('id') ydl.to_screen('[Extract Information] Extracting playlist: %s' % playlist) playlist_results = [] playliststart = ydl.params.get('playliststart', 1) - 1 playlistend = ydl.params.get('playlistend') # For backwards compatibility, interpret -1 as whole list if playlistend == -1: playlistend = None playlistitems_str = ydl.params.get('playlist_items') playlistitems = None if playlistitems_str is not None: def iter_playlistitems(format): for string_segment in format.split(','): if '-' in string_segment: start, end = string_segment.split('-') for item in range(int(start), int(end) + 1): yield int(item) else: yield int(string_segment) playlistitems = orderedSet( iter_playlistitems(playlistitems_str)) ie_entries = ie_result['entries'] def make_playlistitems_entries(list_ie_entries): num_entries = len(list_ie_entries) return [ list_ie_entries[i - 1] for i in playlistitems if -num_entries <= i - 1 < num_entries ] def report_download(num_entries): ydl.to_screen( '[%s] playlist %s: Extracting %d videos' % (ie_result['extractor'], playlist, num_entries)) if isinstance(ie_entries, list): n_all_entries = len(ie_entries) if playlistitems: entries = make_playlistitems_entries(ie_entries) else: entries = ie_entries[playliststart:playlistend] n_entries = len(entries) ydl.to_screen( '[%s] playlist %s: Collected %d video ids (extractng %d of them)' % (ie_result['extractor'], playlist, n_all_entries, n_entries)) elif isinstance(ie_entries, PagedList): if playlistitems: entries = [] for item in playlistitems: entries.extend(ie_entries.getslice(item - 1, item)) else: entries = ie_entries.getslice(playliststart, playlistend) n_entries = len(entries) report_download(n_entries) else: # iterable if playlistitems: entries = make_playlistitems_entries( list( itertools.islice(ie_entries, 0, max(playlistitems)))) else: entries = list( itertools.islice(ie_entries, playliststart, playlistend)) n_entries = len(entries) report_download(n_entries) if ydl.params.get('playlistreverse', False): entries = entries[::-1] if ydl.params.get('playlistrandom', False): random.shuffle(entries) x_forwarded_for = ie_result.get('__x_forwarded_for_ip') for i, entry in enumerate(entries, 1): ydl.to_screen( '[extract information] Extracting video %s of %s' % (i, n_entries)) # This __x_forwarded_for_ip thing is a bit ugly but requires # minimal changes if x_forwarded_for: entry['__x_forwarded_for_ip'] = x_forwarded_for extra = { 'n_entries': n_entries, 'playlist': playlist, 'playlist_id': ie_result.get('id'), 'playlist_title': ie_result.get('title'), 'playlist_uploader': ie_result.get('uploader'), 'playlist_uploader_id': ie_result.get('uploader_id'), 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart, 'extractor': ie_result['extractor'], 'webpage_url': ie_result['webpage_url'], 'webpage_url_basename': url_basename(ie_result['webpage_url']), 'extractor_key': ie_result['extractor_key'], } reason = ydl._match_entry(entry, incomplete=True) if reason is not None: ydl.to_screen('[Extract] ' + reason) continue try: entry_result = ydl.process_ie_result(entry, download=download, extra_info=extra) except MaxDownloadsReached: ydl.to_screen( '[info] Maximum number of downloaded files reached.' ) break playlist_results.append(entry_result) ie_result['entries'] = playlist_results ydl.to_screen('[Extract] Finished extracting playlist: %s' % playlist) return ie_result