def read_playlist(url, n, cw=None): print_ = get_print(cw) for header in ['channel', 'user', 'c']: if '/{}/'.format(header) in url.lower(): username = re.find(r'/{}/([^/\?]+)'.format(header), url, re.IGNORECASE) url = urljoin(url, '/{}/{}/videos'.format(header, username)) options = { 'extract_flat': True, 'playlistend': n, } ydl = ytdl.YoutubeDL(options) info = ydl.extract_info(url) es = info['entries'] urls = [] for e in es: href = 'https://www.youtube.com/watch?v={}'.format(e['id']) urls.append(href) info['urls'] = urls if 'uploader' not in info: title = info['title'] if title.lower().endswith(' - videos'): title = title[:-len(' - videos')] info['uploader'] = title print_('⚠️ Fix uploader: None -> {}'.format(title)) return info
def get_video(url, cw=None): options = { 'noplaylist': True, } ydl = ytdl.YoutubeDL(options, cw=cw) info = ydl.extract_info(url) fs = [] for f in info['formats']: if f['ext'] != 'mp4': continue f['quality'] = f.get('vbr') or re.find('([0-9]+)p', f['format'], re.IGNORECASE) print(f['format'], f['quality']) fs.append(f) if not fs: raise Exception('No videos') f = sorted(fs, key=lambda f: f['quality'])[-1] subs = {} for sub, items in info['subtitles'].items(): sub = sub.split('_')[0] for item in items: if item['ext'] != 'vtt': continue subs[sub] = item['url'] video = Video(f, info, subs, cw) return video
def get(self, _): if self._url_cache: return self._url_cache print_ = get_print(self.cw) for try_ in range(self.try_n): try: d = ytdl.YoutubeDL() info = d.extract_info(self._url) url = info['url'] ext = get_ext(url) self.ext = ext print_('get_video: {} {}'.format(url, ext)) if ext.lower() == '.m3u8': url = M3u8_stream(url, n_thread=self.n_thread, post_processing=True) self._url_cache = url return url except Exception as e: e_ = e msg = print_error(e)[(-1)] print_('\nTwitter video Error:\n{}'.format(msg)) if try_ < self.try_n - 1: sleep(10, self.cw) else: raise e_
def get(self, url): print_ = get_print(self.cw) if self._url: return self._url ydl = ytdl.YoutubeDL(cw=self.cw) try: info = ydl.extract_info(url) except Exception as e: ex = type(ytdl.get_extractor(url))(ydl) _download_info = getattr(ex, '_download_info', None) if _download_info is not None: vod_id = ex._match_id(url) info = _download_info(vod_id) print_(info) raise video_best = info['formats'][-1] video = video_best['url'] ext = get_ext(video) self.title = info['title'] id = info['display_id'] if ext.lower() == '.m3u8': video = M3u8_stream(video, n_thread=4, alter=alter) ext = '.mp4' self.filename = format_filename(self.title, id, ext) self.url_thumb = info['thumbnail'] self.thumb = BytesIO() downloader.download(self.url_thumb, buffer=self.thumb) self._url = video return self._url
def get(self, url): if self._url: return self._url ydl = ytdl.YoutubeDL() info = ydl.extract_info(url) # get best video fs = info['formats'] fs = sorted(fs, key=lambda x: int(x['width']), reverse=True) f = fs[0] url_video = f['url'] # thumb self.thumb_url = info['thumbnails'][0]['url'] self.thumb = BytesIO() downloader.download(self.thumb_url, buffer=self.thumb) # m3u8 print(f['protocol']) if 'm3u8' in f['protocol']: url_video = M3u8_stream(url_video, referer=url) # title & filename self.title = info['title'] self.filename = format_filename(self.title, info['id'], '.mp4') self._url = url_video return self._url
def get_cid(force=False): global CLIENT_ID if CLIENT_ID is None or force: print('update cid...') d = ytdl.YoutubeDL() e = ytdl.extractor.soundcloud.SoundcloudIE(d) e._update_client_id() CLIENT_ID = e._CLIENT_ID return CLIENT_ID
def get(self, url): print_ = get_print(self.cw) if self._url: return self._url ydl = ytdl.YoutubeDL(cw=self.cw) try: info = ydl.extract_info(url) except Exception as e: ex = type(ytdl.get_extractor(url))(ydl) _download_info = getattr(ex, '_download_info', None) if _download_info is not None: vod_id = ex._match_id(url) info = _download_info(vod_id) print_(info) if 'HTTPError 403' in str(e): raise errors.LoginRequired() raise def print_video(video): print_('[{}] [{}] [{}] {}'.format(video['format_id'], video.get('height'), video.get('tbr'), video['url'])) videos = [video for video in info['formats'] if video.get('height')] videos = sorted(videos, key=lambda video: (video.get('height', 0), video.get('tbr', 0)), reverse=True) for video in videos: print_video(video) for video in videos: if video.get('height', 0) <= get_resolution(): #3723 video_best = video break else: video_best = videos[-1] print_video(video) video = video_best['url'] ext = get_ext(video) self.title = info['title'] id = info['display_id'] if ext.lower() == '.m3u8': video = M3u8_stream(video, n_thread=4, alter=alter) ext = '.mp4' self.filename = format_filename(self.title, id, ext) self.url_thumb = info['thumbnail'] self.thumb = BytesIO() downloader.download(self.url_thumb, buffer=self.thumb) self._url = video return self._url
def __init__(self, url): ydl = ytdl.YoutubeDL() info = ydl.extract_info(url) f = info['formats'][-1] url_video = f['url'] self.url = LazyUrl(url, lambda _: url_video, self) self.url_thumb = info['thumbnails'][0]['url'] self.thumb = BytesIO() downloader.download(self.url_thumb, buffer=self.thumb) self.title = info['title'] ext = get_ext(url_video) self.filename = format_filename(self.title, info['id'], ext)
def get(self, url): if self._url: return self._url m = re.search(PATTERN_VID, url) id = m.group('id') ext = '.mp4' self.title = id # self.filename = format_filename(self.title, id, ext) ydl = ytdl.YoutubeDL() info = ydl.extract_info(url) self._url = info['url'] return self._url
def extract_info(url, cw=None): ydl = ytdl.YoutubeDL(cw=cw) try: info = ydl.extract_info(url) except Exception as e: ex = type(ytdl.get_extractor(url))(ydl) _download_info = getattr(ex, '_download_info', None) if _download_info is not None: vod_id = ex._match_id(url) info = _download_info(vod_id) print_(info) if 'HTTPError 403' in str(e): raise errors.LoginRequired() raise return info
def get(self, url): if self._url: return self._url ydl = ytdl.YoutubeDL() info = ydl.extract_info(url) fs = [f for f in info['formats'] if f['ext'] == 'mp4'] f = sorted(fs, key=lambda f: f['height'])[-1] self._url = f['url'] self.thumb_url = info['thumbnails'][0]['url'] self.thumb = IO() downloader.download(self.thumb_url, buffer=self.thumb) self.title = info['title'] ext = get_ext(self._url) self.filename = format_filename(self.title, info['id'], ext) return self._url
def get(self, _=None): if self._url_cache: return self._url_cache print_ = get_print(self.cw) for try_ in range(self.try_n): try: d = ytdl.YoutubeDL(cw=self.cw) info = d.extract_info(self.referer) fs = info['formats'] for f in fs: print_('{} {} - {}'.format(f.get('height'), f['protocol'], f['url'])) def key(f): h = f.get('height', 0) if not f['protocol'].startswith('http'): h -= .1 return h for f in sorted(fs, key=key, reverse=True): if downloader.ok_url(f['url'], self.referer): #4185 break else: print_('invalid video: {}'.format(f['url'])) else: raise Exception('no valid videos') url = f['url'] ext = get_ext(url) self.ext = ext print_('get_video: {} {}'.format(url, ext)) if ext.lower() == '.m3u8': url = ffmpeg.Stream(url) url._live = False self._url_cache = url return url except Exception as e: e_ = e msg = print_error(e)[0] print_('\nTwitter video Error:\n{}'.format(msg)) if try_ < self.try_n - 1: sleep(10, self.cw) else: raise e_
def get(self, url): if self._url: return self._url ydl = ytdl.YoutubeDL() info = ydl.extract_info(url) fs = [f for f in info['formats'] if f['protocol'] in ['http', 'https']] fs = sorted(fs, key=lambda f: int(f.get('width', 0)), reverse=True) if not fs: raise Exception('No MP4 videos') f = fs[0] self._url = f['url'] self.thumb_url = info['thumbnails'][0]['url'] self.thumb = IO() downloader.download(self.thumb_url, buffer=self.thumb) self.title = info['title'] ext = get_ext(self._url) self.filename = format_filename(self.title, info['id'], ext) return self._url
def get_audios(url, cw, album_art): print_ = get_print(cw) url = url.rstrip('/') if url.count('/') == 3: url += '/tracks' info = { #'extract_flat': True, } ydl = ytdl.YoutubeDL(cw=cw) info = ydl.extract_info(url) if 'entries' in info: entries = info['entries'] title = info['title'] for _type in [ 'All', 'Tracks', 'Albums', 'Sets', 'Reposts', 'Likes', 'Spotlight' ]: x = '({})'.format(_type) if x in title: title = title.replace(x, '') kind = _type break else: kind = 'Playlist' print_(u'kind: {}'.format(kind)) info['title'] = u'[{}] {}'.format(kind.capitalize(), title) else: entries = [info] audios = [] for e in entries: if '/sets/' in e['webpage_url']: continue audio = Audio(e, album_art, cw=cw) audios.append(audio) info['audios'] = audios return info
def get(self, url): if self._url: return self._url ydl = ytdl.YoutubeDL() info = ydl.extract_info(url) video_best = info['formats'][-1] video = video_best['url'] ext = get_ext(video) self.title = info['title'] id = info['display_id'] if ext.lower() == '.m3u8': video = M3u8_stream(video, n_thread=4) ext = '.mp4' self.filename = format_filename(self.title, id, ext) self.url_thumb = info['thumbnail'] self.thumb = BytesIO() downloader.download(self.url_thumb, buffer=self.thumb) self._url = video return self._url
def get_videos(url, cw=None): print_ = get_print(cw) print_(f'get_videos: {url}') info = {} options = { 'extract_flat': True, 'playlistend': get_max_range(cw), } videos = [] ydl = ytdl.YoutubeDL(options, cw=cw) info = ydl.extract_info(url) for e in info['entries']: video = Video(e['url'], cw) video.id = int(e['id']) videos.append(video) if 'name' not in info: info['name'] = ydl.extract_info(e['url'])['creator'] if not videos: raise Exception('no videos') info['videos'] = sorted(videos, key=lambda video: video.id, reverse=True) return info
def get_video(url): options = { 'noplaylist': True, } ydl = ytdl.YoutubeDL(options) info = ydl.extract_info(url) fs = [] for f in info['formats']: if f['ext'] != 'mp4': continue f['quality'] = f.get('vbr') or re.find('([0-9]+)p', f['format'], re.IGNORECASE) print(f['format'], f['quality']) fs.append(f) if not fs: raise Exception('No videos') f = sorted(fs, key=lambda f: f['quality'])[-1] video = Video(f, info) return video
def get(self, url): ''' get ''' cw = self.cw session = self.session print_ = get_print(cw) if self._url: return self._url id_ = re.find(r'viewkey=(\w+)', url, re.IGNORECASE) or \ re.find(r'/embed/(\w+)', url, re.IGNORECASE, err='no id') print_('id: {}'.format(id_)) if 'viewkey=' not in url.lower() and '/gif/' not in url.lower(): url = urljoin(url, '/view_video.php?viewkey={}'.format(id_)) url_test = url.replace('pornhubpremium.com', 'pornhub.com') try: html = downloader.read_html(url_test, session=session) soup = Soup(html) if soup.find('div', id='lockedPlayer'): print_('Locked player') raise Exception('Locked player') url = url_test except: #3511 url = url.replace('pornhub.com', 'pornhubpremium.com') html = downloader.read_html(url, session=session) soup = Soup(html) soup = fix_soup(soup, url, session, cw) html = str(soup) # removed if soup.find('div', class_='removed'): raise Exception('removed') gif = soup.find('div', {'id': 'gifImageSection'}) if gif: print_('GIF') id_ = url.split('/gif/')[1] id_ = re.findall('[0-9a-zA-Z]+', id_)[0] jss = list(gif.children) for js in jss: if 'data-mp4' in getattr(js, 'attrs', {}): break else: raise Exception('gif mp4 url not found') title = js['data-gif-title'] url = js['data-mp4'] url_thumb = re.find(r'https?://.+?.phncdn.com/pics/gifs/.+?\.jpg', html, err='no thumb') file = File('gif_{}'.format(id_), title, url, url_thumb) else: if id_ is None: raise Exception('no id') print_('Video') # 1968 #title = j['video_title'] title = soup.find('h1', class_='title').text.strip() ydl = ytdl.YoutubeDL(cw=self.cw) info = ydl.extract_info(url) url_thumb = info['thumbnail'] videos = [] for f in info['formats']: video = {} video['height'] = f['height'] video['quality'] = f['height'] or 0 video['protocol'] = f['protocol'] video['videoUrl'] = f['url'] if f['protocol'] == 'm3u8': video['quality'] -= 1 print_('[{}p] {} {}'.format(video['height'], video['protocol'], video['videoUrl'])) videos.append(video) if not videos: raise Exception('No videos') videos = sorted(videos, key=lambda video: video['quality']) res = get_resolution() videos_good = [video for video in videos if video['quality'] <= res] if videos_good: video = videos_good[-1] else: video = videos[0] print_('\n[{}p] {} {}'.format(video['height'], video['protocol'], video['videoUrl'])) file = File(id_, title, video['videoUrl'].strip(), url_thumb) self._url = file.url self.title = file.title self.filename = file.filename self.thumb = file.thumb return self._url
def get_video(url, session, cw, ie_key=None): print_ = get_print(cw) options = { 'noplaylist': True, #'extract_flat': True, 'playlistend': 1, } ydl = ytdl.YoutubeDL(options) info = ydl.extract_info(url) if not ie_key: ie_key = ytdl.get_extractor_name(url) info['ie_key'] = ie_key url_new = info.get('url') print('url: {} -> {}'.format(url, url_new)) formats = info.get('formats', []) print(info.keys()) if not formats and (info.get('entries') or 'title' not in info): if 'entries' in info: entry = info['entries'][0] url_new = entry.get('url') or entry['webpage_url'] if url_new != url: return get_video(url_new, session, cw, ie_key=get_ie_key(info)) session.headers.update(info.get('http_headers', {})) #session.cookies.update(ydl.cookiejar) if not formats: print('no formats') if url_new: f = {'url': url_new, 'format': ''} formats.append(f) fs = [] for i, f in enumerate(formats): f['_index'] = i f['_resolution'] = f.get('vbr') or int_or_none( re.find( '([0-9]+)p', f['format'], re.IGNORECASE)) or f.get('height') or f.get('width') or int( f.get('vcodec', 'none') != 'none') f['_audio'] = f.get('abr') or f.get('asr') or int( f.get('acodec', 'none') != 'none') print_(format_(f)) fs.append(f) if not fs: raise Exception('No videos') f = sorted(fs, key=lambda f: (f['_resolution'], f['_index']))[-1] if f['_audio']: f_audio = None else: fs_audio = sorted([ f_audio for f_audio in fs if (not f_audio['_resolution'] and f_audio['_audio']) ], key=lambda f: (f['_audio'], f['_index'])) if fs_audio: f_audio = fs_audio[-1] else: try: f = sorted([f for f in fs if f['_audio']], key=lambda f: (f['_resolution'], f['_index']))[-1] except IndexError: pass f_audio = None print_('video: {}'.format(format_(f))) print_('audio: {}'.format(format_(f_audio))) video = Video(f, f_audio, info, session, url, cw=cw) return video
def _get_video(url, session, cw, ie_key=None, allow_m3u8=True): print_ = get_print(cw) print_('get_video: {}, {}'.format(allow_m3u8, url)) options = { 'noplaylist': True, #'extract_flat': True, 'playlistend': 1, } ydl = ytdl.YoutubeDL(options, cw=cw) info = ydl.extract_info(url) if not ie_key: ie_key = ytdl.get_extractor_name(url) info['ie_key'] = ie_key url_new = info.get('url') print('url: {} -> {}'.format(url, url_new)) formats = info.get('formats', []) print(info.keys()) if not formats and (info.get('entries') or 'title' not in info): if 'entries' in info: entry = info['entries'][0] url_new = entry.get('url') or entry['webpage_url'] if url_new != url: return get_video(url_new, session, cw, ie_key=get_ie_key(info)) session.headers.update(info.get('http_headers', {})) #session.cookies.update(ydl.cookiejar) if not formats: print('no formats') if url_new: f = {'url': url_new, 'format': ''} formats.append(f) fs = [] for i, f in enumerate(formats): f['_index'] = i f['_resolution'] = f.get('vbr') or int_or_none( re.find( '([0-9]+)p', f['format'], re.IGNORECASE)) or f.get('height') or f.get('width') or int( f.get('vcodec', 'none') != 'none') f['_audio'] = f.get('abr') or f.get('asr') or int( f.get('acodec', 'none') != 'none') print_(format_(f)) fs.append(f) if not fs: raise Exception('No videos') def filter_f(fs): for f in fs: if allow_m3u8: return f ext = get_ext_(f['url'], session, url) if ext.lower() != '.m3u8': return f print_('invalid url: {}'.format(f['url'])) return list(fs)[0] # f_video = filter_f( reversed(sorted(fs, key=lambda f: (f['_resolution'], f['_index'])))) print_('video0: {}'.format(format_(f_video))) if f_video['_audio']: f_audio = None else: fs_audio = sorted([ f_audio for f_audio in fs if (not f_audio['_resolution'] and f_audio['_audio']) ], key=lambda f: (f['_audio'], f['_index'])) if fs_audio: f_audio = fs_audio[-1] else: try: print_('trying to get f_video with audio') f_video = filter_f( reversed( sorted([f for f in fs if f['_audio']], key=lambda f: (f['_resolution'], f['_index'])))) except Exception as e: print_('failed to get f_video with audio: {}'.format(e)) f_audio = None print_('video: {}'.format(format_(f_video))) print_('audio: {}'.format(format_(f_audio))) video = Video(f_video, f_audio, info, session, url, cw=cw) return video