def read_playlist(url, n, cw=None):
    print_ = get_print(cw)
    for header in ['channel', 'user', 'c']:
        if '/{}/'.format(header) in url.lower():
            username = re.find(r'/{}/([^/\?]+)'.format(header), url,
                               re.IGNORECASE)
            url = urljoin(url, '/{}/{}/videos'.format(header, username))

    options = {
        'extract_flat': True,
        'playlistend': n,
    }
    ydl = ytdl.YoutubeDL(options)
    info = ydl.extract_info(url)

    es = info['entries']
    urls = []
    for e in es:
        href = 'https://www.youtube.com/watch?v={}'.format(e['id'])
        urls.append(href)
    info['urls'] = urls

    if 'uploader' not in info:
        title = info['title']
        if title.lower().endswith(' - videos'):
            title = title[:-len(' - videos')]
        info['uploader'] = title
        print_('⚠️ Fix uploader: None -> {}'.format(title))

    return info
def get_video(url, cw=None):
    options = {
        'noplaylist': True,
    }

    ydl = ytdl.YoutubeDL(options, cw=cw)
    info = ydl.extract_info(url)

    fs = []
    for f in info['formats']:
        if f['ext'] != 'mp4':
            continue
        f['quality'] = f.get('vbr') or re.find('([0-9]+)p', f['format'],
                                               re.IGNORECASE)
        print(f['format'], f['quality'])
        fs.append(f)

    if not fs:
        raise Exception('No videos')

    f = sorted(fs, key=lambda f: f['quality'])[-1]

    subs = {}
    for sub, items in info['subtitles'].items():
        sub = sub.split('_')[0]
        for item in items:
            if item['ext'] != 'vtt':
                continue
            subs[sub] = item['url']
    video = Video(f, info, subs, cw)

    return video
    def get(self, _):
        if self._url_cache:
            return self._url_cache
        print_ = get_print(self.cw)
        for try_ in range(self.try_n):
            try:
                d = ytdl.YoutubeDL()
                info = d.extract_info(self._url)

                url = info['url']
                ext = get_ext(url)
                self.ext = ext
                print_('get_video: {} {}'.format(url, ext))
                if ext.lower() == '.m3u8':
                    url = M3u8_stream(url,
                                      n_thread=self.n_thread,
                                      post_processing=True)
                self._url_cache = url
                return url
            except Exception as e:
                e_ = e
                msg = print_error(e)[(-1)]
                print_('\nTwitter video Error:\n{}'.format(msg))
                if try_ < self.try_n - 1:
                    sleep(10, self.cw)
        else:
            raise e_
Beispiel #4
0
    def get(self, url):
        print_ = get_print(self.cw)
        if self._url:
            return self._url
        ydl = ytdl.YoutubeDL(cw=self.cw)
        try:
            info = ydl.extract_info(url)
        except Exception as e:
            ex = type(ytdl.get_extractor(url))(ydl)
            _download_info = getattr(ex, '_download_info', None)
            if _download_info is not None:
                vod_id = ex._match_id(url)
                info = _download_info(vod_id)
                print_(info)
            raise
        video_best = info['formats'][-1]
        video = video_best['url']

        ext = get_ext(video)
        self.title = info['title']
        id = info['display_id']

        if ext.lower() == '.m3u8':
            video = M3u8_stream(video, n_thread=4, alter=alter)
            ext = '.mp4'
        self.filename = format_filename(self.title, id, ext)
        self.url_thumb = info['thumbnail']
        self.thumb = BytesIO()
        downloader.download(self.url_thumb, buffer=self.thumb)
        self._url = video
        return self._url
Beispiel #5
0
    def get(self, url):
        if self._url:
            return self._url

        ydl = ytdl.YoutubeDL()
        info = ydl.extract_info(url)

        # get best video
        fs = info['formats']
        fs = sorted(fs, key=lambda x: int(x['width']), reverse=True)
        f = fs[0]
        url_video = f['url']

        # thumb
        self.thumb_url = info['thumbnails'][0]['url']
        self.thumb = BytesIO()
        downloader.download(self.thumb_url, buffer=self.thumb)

        # m3u8
        print(f['protocol'])
        if 'm3u8' in f['protocol']:
            url_video = M3u8_stream(url_video, referer=url)

        # title & filename
        self.title = info['title']
        self.filename = format_filename(self.title, info['id'], '.mp4')

        self._url = url_video

        return self._url
Beispiel #6
0
def get_cid(force=False):
    global CLIENT_ID
    if CLIENT_ID is None or force:
        print('update cid...')
        d = ytdl.YoutubeDL()
        e = ytdl.extractor.soundcloud.SoundcloudIE(d)
        e._update_client_id()
        CLIENT_ID = e._CLIENT_ID
    return CLIENT_ID
Beispiel #7
0
    def get(self, url):
        print_ = get_print(self.cw)
        if self._url:
            return self._url
        ydl = ytdl.YoutubeDL(cw=self.cw)
        try:
            info = ydl.extract_info(url)
        except Exception as e:
            ex = type(ytdl.get_extractor(url))(ydl)
            _download_info = getattr(ex, '_download_info', None)
            if _download_info is not None:
                vod_id = ex._match_id(url)
                info = _download_info(vod_id)
                print_(info)
            if 'HTTPError 403' in str(e):
                raise errors.LoginRequired()
            raise

        def print_video(video):
            print_('[{}] [{}] [{}] {}'.format(video['format_id'],
                                              video.get('height'),
                                              video.get('tbr'), video['url']))

        videos = [video for video in info['formats'] if video.get('height')]

        videos = sorted(videos,
                        key=lambda video:
                        (video.get('height', 0), video.get('tbr', 0)),
                        reverse=True)

        for video in videos:
            print_video(video)

        for video in videos:
            if video.get('height', 0) <= get_resolution():  #3723
                video_best = video
                break
        else:
            video_best = videos[-1]
        print_video(video)

        video = video_best['url']

        ext = get_ext(video)
        self.title = info['title']
        id = info['display_id']

        if ext.lower() == '.m3u8':
            video = M3u8_stream(video, n_thread=4, alter=alter)
            ext = '.mp4'
        self.filename = format_filename(self.title, id, ext)
        self.url_thumb = info['thumbnail']
        self.thumb = BytesIO()
        downloader.download(self.url_thumb, buffer=self.thumb)
        self._url = video
        return self._url
    def __init__(self, url):
        ydl = ytdl.YoutubeDL()
        info = ydl.extract_info(url)

        f = info['formats'][-1]
        url_video = f['url']
        self.url = LazyUrl(url, lambda _: url_video, self)

        self.url_thumb = info['thumbnails'][0]['url']
        self.thumb = BytesIO()
        downloader.download(self.url_thumb, buffer=self.thumb)
        self.title = info['title']
        ext = get_ext(url_video)
        self.filename = format_filename(self.title, info['id'], ext)
Beispiel #9
0
    def get(self, url):
        if self._url:
            return self._url
        m = re.search(PATTERN_VID, url)
        id = m.group('id')
        ext = '.mp4'
        self.title = id  #
        self.filename = format_filename(self.title, id, ext)

        ydl = ytdl.YoutubeDL()
        info = ydl.extract_info(url)

        self._url = info['url']

        return self._url
Beispiel #10
0
def extract_info(url, cw=None):
    ydl = ytdl.YoutubeDL(cw=cw)
    try:
        info = ydl.extract_info(url)
    except Exception as e:
        ex = type(ytdl.get_extractor(url))(ydl)
        _download_info = getattr(ex, '_download_info', None)
        if _download_info is not None:
            vod_id = ex._match_id(url)
            info = _download_info(vod_id)
            print_(info)
        if 'HTTPError 403' in str(e):
            raise errors.LoginRequired()
        raise
    return info
Beispiel #11
0
    def get(self, url):
        if self._url:
            return self._url

        ydl = ytdl.YoutubeDL()
        info = ydl.extract_info(url)
        fs = [f for f in info['formats'] if f['ext'] == 'mp4']
        f = sorted(fs, key=lambda f: f['height'])[-1]
        self._url = f['url']

        self.thumb_url = info['thumbnails'][0]['url']
        self.thumb = IO()
        downloader.download(self.thumb_url, buffer=self.thumb)
        self.title = info['title']
        ext = get_ext(self._url)
        self.filename = format_filename(self.title, info['id'], ext)
        return self._url
Beispiel #12
0
    def get(self, _=None):
        if self._url_cache:
            return self._url_cache
        print_ = get_print(self.cw)
        for try_ in range(self.try_n):
            try:
                d = ytdl.YoutubeDL(cw=self.cw)
                info = d.extract_info(self.referer)

                fs = info['formats']
                for f in fs:
                    print_('{} {} - {}'.format(f.get('height'), f['protocol'],
                                               f['url']))

                def key(f):
                    h = f.get('height', 0)
                    if not f['protocol'].startswith('http'):
                        h -= .1
                    return h

                for f in sorted(fs, key=key, reverse=True):
                    if downloader.ok_url(f['url'], self.referer):  #4185
                        break
                    else:
                        print_('invalid video: {}'.format(f['url']))
                else:
                    raise Exception('no valid videos')
                url = f['url']
                ext = get_ext(url)
                self.ext = ext
                print_('get_video: {} {}'.format(url, ext))
                if ext.lower() == '.m3u8':
                    url = ffmpeg.Stream(url)
                    url._live = False
                self._url_cache = url
                return url
            except Exception as e:
                e_ = e
                msg = print_error(e)[0]
                print_('\nTwitter video Error:\n{}'.format(msg))
                if try_ < self.try_n - 1:
                    sleep(10, self.cw)
        else:
            raise e_
    def get(self, url):
        if self._url:
            return self._url

        ydl = ytdl.YoutubeDL()
        info = ydl.extract_info(url)
        fs = [f for f in info['formats'] if f['protocol'] in ['http', 'https']]
        fs = sorted(fs, key=lambda f: int(f.get('width', 0)), reverse=True)
        if not fs:
            raise Exception('No MP4 videos')
        f = fs[0]
        self._url = f['url']

        self.thumb_url = info['thumbnails'][0]['url']
        self.thumb = IO()
        downloader.download(self.thumb_url, buffer=self.thumb)
        self.title = info['title']
        ext = get_ext(self._url)
        self.filename = format_filename(self.title, info['id'], ext)
        return self._url
Beispiel #14
0
def get_audios(url, cw, album_art):
    print_ = get_print(cw)
    url = url.rstrip('/')
    if url.count('/') == 3:
        url += '/tracks'

    info = {
        #'extract_flat': True,
    }

    ydl = ytdl.YoutubeDL(cw=cw)
    info = ydl.extract_info(url)
    if 'entries' in info:
        entries = info['entries']
        title = info['title']
        for _type in [
                'All', 'Tracks', 'Albums', 'Sets', 'Reposts', 'Likes',
                'Spotlight'
        ]:
            x = '({})'.format(_type)
            if x in title:
                title = title.replace(x, '')
                kind = _type
                break
        else:
            kind = 'Playlist'
        print_(u'kind: {}'.format(kind))
        info['title'] = u'[{}] {}'.format(kind.capitalize(), title)
    else:
        entries = [info]

    audios = []
    for e in entries:
        if '/sets/' in e['webpage_url']:
            continue
        audio = Audio(e, album_art, cw=cw)
        audios.append(audio)

    info['audios'] = audios

    return info
    def get(self, url):
        if self._url:
            return self._url
        ydl = ytdl.YoutubeDL()
        info = ydl.extract_info(url)
        video_best = info['formats'][-1]
        video = video_best['url']

        ext = get_ext(video)
        self.title = info['title']
        id = info['display_id']

        if ext.lower() == '.m3u8':
            video = M3u8_stream(video, n_thread=4)
            ext = '.mp4'
        self.filename = format_filename(self.title, id, ext)
        self.url_thumb = info['thumbnail']
        self.thumb = BytesIO()
        downloader.download(self.url_thumb, buffer=self.thumb)
        self._url = video
        return self._url
Beispiel #16
0
def get_videos(url, cw=None):
    print_ = get_print(cw)
    print_(f'get_videos: {url}')
    info = {}
    options = {
        'extract_flat': True,
        'playlistend': get_max_range(cw),
    }
    videos = []
    ydl = ytdl.YoutubeDL(options, cw=cw)
    info = ydl.extract_info(url)
    for e in info['entries']:
        video = Video(e['url'], cw)
        video.id = int(e['id'])
        videos.append(video)
        if 'name' not in info:
            info['name'] = ydl.extract_info(e['url'])['creator']
    if not videos:
        raise Exception('no videos')
    info['videos'] = sorted(videos, key=lambda video: video.id, reverse=True)
    return info
def get_video(url):
    options = {
        'noplaylist': True,
    }

    ydl = ytdl.YoutubeDL(options)
    info = ydl.extract_info(url)

    fs = []
    for f in info['formats']:
        if f['ext'] != 'mp4':
            continue
        f['quality'] = f.get('vbr') or re.find('([0-9]+)p', f['format'],
                                               re.IGNORECASE)
        print(f['format'], f['quality'])
        fs.append(f)

    if not fs:
        raise Exception('No videos')

    f = sorted(fs, key=lambda f: f['quality'])[-1]
    video = Video(f, info)

    return video
Beispiel #18
0
    def get(self, url):
        '''
        get
        '''
        cw = self.cw
        session = self.session
        print_ = get_print(cw)
        if self._url:
            return self._url

        id_ = re.find(r'viewkey=(\w+)', url, re.IGNORECASE) or \
              re.find(r'/embed/(\w+)', url, re.IGNORECASE, err='no id')
        print_('id: {}'.format(id_))
        if 'viewkey=' not in url.lower() and '/gif/' not in url.lower():
            url = urljoin(url, '/view_video.php?viewkey={}'.format(id_))

        url_test = url.replace('pornhubpremium.com', 'pornhub.com')
        try:
            html = downloader.read_html(url_test, session=session)
            soup = Soup(html)
            if soup.find('div', id='lockedPlayer'):
                print_('Locked player')
                raise Exception('Locked player')
            url = url_test
        except: #3511
            url = url.replace('pornhub.com', 'pornhubpremium.com')
            html = downloader.read_html(url, session=session)
            
        soup = Soup(html)
        soup = fix_soup(soup, url, session, cw)
        html = str(soup)

        # removed
        if soup.find('div', class_='removed'):
            raise Exception('removed')

        gif = soup.find('div', {'id': 'gifImageSection'})
        if gif:
            print_('GIF')
            id_ = url.split('/gif/')[1]
            id_ = re.findall('[0-9a-zA-Z]+', id_)[0]
            
            jss = list(gif.children)
            for js in jss:
                if 'data-mp4' in getattr(js, 'attrs', {}):
                    break
            else:
                raise Exception('gif mp4 url not found')

            title = js['data-gif-title']
            url = js['data-mp4']
            url_thumb = re.find(r'https?://.+?.phncdn.com/pics/gifs/.+?\.jpg', html, err='no thumb')
            file = File('gif_{}'.format(id_), title, url, url_thumb)
        else:
            if id_ is None:
                raise Exception('no id')

            print_('Video')

            # 1968
            #title = j['video_title']
            title = soup.find('h1', class_='title').text.strip()

            ydl = ytdl.YoutubeDL(cw=self.cw)
            info = ydl.extract_info(url)
            url_thumb = info['thumbnail']
            videos = []
            for f in info['formats']:
                video = {}
                video['height'] = f['height']
                video['quality'] = f['height'] or 0
                video['protocol'] = f['protocol']
                video['videoUrl'] = f['url']
                if f['protocol'] == 'm3u8':
                    video['quality'] -= 1
                print_('[{}p] {} {}'.format(video['height'], video['protocol'], video['videoUrl']))
                videos.append(video)

            if not videos:
                raise Exception('No videos')

            videos = sorted(videos, key=lambda video: video['quality'])

            res = get_resolution()

            videos_good = [video for video in videos if video['quality'] <= res]
            if videos_good:
                video = videos_good[-1]
            else:
                video = videos[0]
            print_('\n[{}p] {} {}'.format(video['height'], video['protocol'], video['videoUrl']))

            file = File(id_, title, video['videoUrl'].strip(), url_thumb)
        
        self._url = file.url
        self.title = file.title
        self.filename = file.filename
        self.thumb = file.thumb
        return self._url
Beispiel #19
0
def get_video(url, session, cw, ie_key=None):
    print_ = get_print(cw)
    options = {
        'noplaylist': True,
        #'extract_flat': True,
        'playlistend': 1,
    }

    ydl = ytdl.YoutubeDL(options)
    info = ydl.extract_info(url)
    if not ie_key:
        ie_key = ytdl.get_extractor_name(url)
    info['ie_key'] = ie_key
    url_new = info.get('url')
    print('url: {} -> {}'.format(url, url_new))
    formats = info.get('formats', [])
    print(info.keys())

    if not formats and (info.get('entries') or 'title' not in info):
        if 'entries' in info:
            entry = info['entries'][0]
            url_new = entry.get('url') or entry['webpage_url']
        if url_new != url:
            return get_video(url_new, session, cw, ie_key=get_ie_key(info))

    session.headers.update(info.get('http_headers', {}))
    #session.cookies.update(ydl.cookiejar)

    if not formats:
        print('no formats')
        if url_new:
            f = {'url': url_new, 'format': ''}
            formats.append(f)

    fs = []
    for i, f in enumerate(formats):
        f['_index'] = i
        f['_resolution'] = f.get('vbr') or int_or_none(
            re.find(
                '([0-9]+)p', f['format'],
                re.IGNORECASE)) or f.get('height') or f.get('width') or int(
                    f.get('vcodec', 'none') != 'none')
        f['_audio'] = f.get('abr') or f.get('asr') or int(
            f.get('acodec', 'none') != 'none')
        print_(format_(f))
        fs.append(f)

    if not fs:
        raise Exception('No videos')

    f = sorted(fs, key=lambda f: (f['_resolution'], f['_index']))[-1]
    if f['_audio']:
        f_audio = None
    else:
        fs_audio = sorted([
            f_audio for f_audio in fs
            if (not f_audio['_resolution'] and f_audio['_audio'])
        ],
                          key=lambda f: (f['_audio'], f['_index']))
        if fs_audio:
            f_audio = fs_audio[-1]
        else:
            try:
                f = sorted([f for f in fs if f['_audio']],
                           key=lambda f: (f['_resolution'], f['_index']))[-1]
            except IndexError:
                pass
            f_audio = None
    print_('video: {}'.format(format_(f)))
    print_('audio: {}'.format(format_(f_audio)))
    video = Video(f, f_audio, info, session, url, cw=cw)

    return video
Beispiel #20
0
def _get_video(url, session, cw, ie_key=None, allow_m3u8=True):
    print_ = get_print(cw)
    print_('get_video: {}, {}'.format(allow_m3u8, url))
    options = {
        'noplaylist': True,
        #'extract_flat': True,
        'playlistend': 1,
    }
    ydl = ytdl.YoutubeDL(options, cw=cw)
    info = ydl.extract_info(url)
    if not ie_key:
        ie_key = ytdl.get_extractor_name(url)
    info['ie_key'] = ie_key
    url_new = info.get('url')
    print('url: {} -> {}'.format(url, url_new))
    formats = info.get('formats', [])
    print(info.keys())

    if not formats and (info.get('entries') or 'title' not in info):
        if 'entries' in info:
            entry = info['entries'][0]
            url_new = entry.get('url') or entry['webpage_url']
        if url_new != url:
            return get_video(url_new, session, cw, ie_key=get_ie_key(info))

    session.headers.update(info.get('http_headers', {}))
    #session.cookies.update(ydl.cookiejar)

    if not formats:
        print('no formats')
        if url_new:
            f = {'url': url_new, 'format': ''}
            formats.append(f)

    fs = []
    for i, f in enumerate(formats):
        f['_index'] = i
        f['_resolution'] = f.get('vbr') or int_or_none(
            re.find(
                '([0-9]+)p', f['format'],
                re.IGNORECASE)) or f.get('height') or f.get('width') or int(
                    f.get('vcodec', 'none') != 'none')
        f['_audio'] = f.get('abr') or f.get('asr') or int(
            f.get('acodec', 'none') != 'none')
        print_(format_(f))
        fs.append(f)

    if not fs:
        raise Exception('No videos')

    def filter_f(fs):
        for f in fs:
            if allow_m3u8:
                return f
            ext = get_ext_(f['url'], session, url)
            if ext.lower() != '.m3u8':
                return f
            print_('invalid url: {}'.format(f['url']))
        return list(fs)[0]  #

    f_video = filter_f(
        reversed(sorted(fs, key=lambda f: (f['_resolution'], f['_index']))))
    print_('video0: {}'.format(format_(f_video)))

    if f_video['_audio']:
        f_audio = None
    else:
        fs_audio = sorted([
            f_audio for f_audio in fs
            if (not f_audio['_resolution'] and f_audio['_audio'])
        ],
                          key=lambda f: (f['_audio'], f['_index']))
        if fs_audio:
            f_audio = fs_audio[-1]
        else:
            try:
                print_('trying to get f_video with audio')
                f_video = filter_f(
                    reversed(
                        sorted([f for f in fs if f['_audio']],
                               key=lambda f: (f['_resolution'], f['_index']))))
            except Exception as e:
                print_('failed to get f_video with audio: {}'.format(e))
            f_audio = None
    print_('video: {}'.format(format_(f_video)))
    print_('audio: {}'.format(format_(f_audio)))
    video = Video(f_video, f_audio, info, session, url, cw=cw)

    return video