def get_files(url, session, multi_post=False, cw=None):
    print_ = get_print(cw)
    html = read_html(url, session=session)
    soup = Soup(html)
    h = soup.find('h1', class_='title')
    content = h.parent.parent.parent
    title = h.text.strip()
    youtube = content.find('div', class_='embedded-video')
    video = content.find('video')
    if youtube:
        type = 'youtube'
    elif video:
        type = 'video'
    else:
        type = 'image'
    print_(('type: {}').format(type))
    files = []
    if type == 'image':
        urls = set()
        for img in content.findAll('img'):
            img = urljoin(url, img.parent.attrs['href'])
            if '/files/' not in img:
                continue
            if img in urls:
                print('duplicate')
                continue
            urls.add(img)
            file = File(type, img, title, url, len(files), multi_post=multi_post)
            files.append(file)

    elif type == 'youtube':
        src = urljoin(url, youtube.find('iframe').attrs['src'])
        file = File(type, src, title, url)
        files.append(file)
    elif type == 'video':
        url_thumb = urljoin(url, video.attrs['poster'])
        print('url_thumb:', url_thumb)
        id = re.find(PATTERN_ID, url, err='no video id')
        url_data = urljoin(url, '/api/video/{}'.format(id))
        s_json = read_html(url_data, url, session=session)
        data = json.loads(s_json)
        video = data[0]
        url_video = urljoin(url, video['uri'])
        if not downloader.ok_url(url_video, url): #4287
            print_('invalid video')
            raise Exception('Invalid video')
        file = File(type, url_video, title, url)
        file.url_thumb = url_thumb
        file.thumb = BytesIO()
        downloader.download(url_thumb, buffer=file.thumb, referer=url)
        files.append(file)
    else:
        raise NotImplementedError(type)
    return files
Exemple #2
0
    def get(self, _=None):
        if self._url_cache:
            return self._url_cache
        print_ = get_print(self.cw)
        for try_ in range(self.try_n):
            try:
                d = ytdl.YoutubeDL(cw=self.cw)
                info = d.extract_info(self.referer)

                fs = info['formats']
                for f in fs:
                    print_('{} {} - {}'.format(f.get('height'), f['protocol'],
                                               f['url']))

                def key(f):
                    h = f.get('height', 0)
                    if not f['protocol'].startswith('http'):
                        h -= .1
                    return h

                for f in sorted(fs, key=key, reverse=True):
                    if downloader.ok_url(f['url'], self.referer):  #4185
                        break
                    else:
                        print_('invalid video: {}'.format(f['url']))
                else:
                    raise Exception('no valid videos')
                url = f['url']
                ext = get_ext(url)
                self.ext = ext
                print_('get_video: {} {}'.format(url, ext))
                if ext.lower() == '.m3u8':
                    url = ffmpeg.Stream(url)
                    url._live = False
                self._url_cache = url
                return url
            except Exception as e:
                e_ = e
                msg = print_error(e)[0]
                print_('\nTwitter video Error:\n{}'.format(msg))
                if try_ < self.try_n - 1:
                    sleep(10, self.cw)
        else:
            raise e_
    def get(self, url, force=False):
        if self._url:
            return self._url

        type = self.type
        only_mp4 = self.only_mp4
        audio_included = self.audio_included
        max_res = self.max_res
        max_abr = self.max_abr
        cw = self.cw
        print_ = get_print(cw)

        if force:
            max_abr = 0

        print('max_res: {}'.format(max_res))
        for try_ in range(8):
            try:
                yt = ytdl.YouTube(url)
                break
            except Exception as e:
                e_ = e
                s = print_error(e)[-1]
                print_('### youtube retry...\n{}'.format(s))
                sleep(try_ / 2, cw)
        else:
            raise e_

        streams = yt.streams.all()
        print_streams(streams, cw)

        #3528
        time = datetime.strptime(yt.info['upload_date'], '%Y%m%d')
        self.utime = (time - datetime(1970, 1, 1)).total_seconds()
        print_('utime: {}'.format(self.utime))

        if type == 'video':
            streams[:] = [
                stream for stream in streams if stream.video_codec is not None
            ]
            # Only mp4
            if only_mp4:
                streams_ = list(streams)
                streams[:] = []
                for stream in streams_:
                    if stream.subtype == 'mp4':
                        streams.append(stream)

            # Audio included; Non-HD
            if audio_included:
                streams_ = list(streams)
                streams[:] = []
                for stream in streams_:
                    if stream.audio_codec is not None:
                        streams.append(stream)

            # Maximum resolution
            streams_ = list(streams)
            streams[:] = []
            for stream in streams_:
                if stream.resolution is None:
                    continue
                res = int(stream.resolution.replace('p', ''))
                if max_res is None or res <= max_res:
                    streams.append(stream)
            print_('')
        elif type == 'audio':
            streams[:] = [stream for stream in streams if stream.abr]
            # Maximum abr
            abrs = [stream.abr for stream in streams]
            max_abr = min(max(abrs), max_abr)
            streams_ = list(streams)
            streams[:] = []
            for stream in streams_:
                if stream.abr is None:
                    continue
                abr = stream.abr
                if max_abr is None or abr >= max_abr:
                    streams.append(stream)
            #'''
        else:
            raise Exception(u'type "{}" is not supported'.format(type))

        # Pick the best
        while streams:
            if type == 'video':
                ress = [
                    int_(stream.resolution.replace('p', ''))
                    for stream in streams
                ]
                m = max(ress)
                prefer_format = 'mp4'
            elif type == 'audio':
                ress = [stream.abr for stream in streams]
                m = min(ress)
                prefer_format = 'webm'
            print('Resolutions:', ress)
            stream_final = None
            for stream, res in zip(streams, ress):
                if res == m:
                    if type == 'video':
                        foo = (stream_final is not None) and (
                            stream_final.audio_codec is None) and bool(
                                stream.audio_codec)
                    elif type == 'audio':
                        foo = False
                    if stream_final is None or (
                            stream_final.fps <= stream.fps and
                        (foo or (stream_final.subtype.lower() != prefer_format
                                 and stream.subtype.lower() == prefer_format)
                         or stream_final.fps < stream.fps)):
                        #print(foo)
                        print_(u'# stream_final {} {} {} {} {} {}fps'.format(
                            stream, stream.format, stream.resolution,
                            stream.subtype, stream.audio_codec, stream.fps))
                        stream_final = stream

            ok = downloader.ok_url(stream_final.url,
                                   referer=url) if isinstance(
                                       stream_final.url, str) else True
            if ok:
                break
            else:
                print_('stream is not valid')
                streams.remove(stream_final)
        else:
            if type == 'audio' and not force:
                return self.get(url, force=True)  # 1776
            raise Exception('No videos')

        stream = stream_final

        ##        if stream.video_codec and stream_final.video_codec.lower().startswith('av'):
        ##            self.vcodec = 'h264'

        self.yt = yt
        self.id = yt.video_id
        self.stream = stream
        self.username = yt.info['uploader']
        self.stream_audio = None
        self.audio = None
        self.thumb = None
        self.thumb_url = None
        self.subtitles = yt.subtitles

        if type == 'audio' and 'DASH' in self.stream.format:
            self.stream.setDashType('audio')

        # Audio
        if type == 'video' and stream.audio_codec is None:
            print('audio required')
            streams = [stream for stream in yt.streams.all() if stream.abr]
            print_streams(streams, cw)

            # only mp4; https://github.com/KurtBestor/Hitomi-Downloader-issues/issues/480
            def isGood(stream):
                return stream.audio_codec.lower().startswith('mp4')

            streams_good = [stream for stream in streams if isGood(stream)]
            if streams_good:
                streams = streams_good
                print_streams(streams, cw)
            # only audio?
            if any(stream.resolution is None for stream in streams):
                streams = [
                    stream for stream in streams if stream.resolution is None
                ]
                print_streams(streams, cw)
            best_audio = None
            best_abr = 0
            for stream in streams:
                abr = stream.abr
                if abr > best_abr:
                    best_abr = abr
                    best_audio = stream
            if best_audio is None:
                raise Exception('No audio')
            print(best_audio)
            self.stream_audio = best_audio
            if 'DASH' in self.stream_audio.format:
                self.stream_audio.setDashType('audio')
            self.audio = best_audio.url
            if callable(self.audio):
                self.audio = self.audio()

        # Thumbnail
        for quality in ['sddefault', 'hqdefault', 'mqdefault', 'default']:
            print('####', yt.thumbnail_url)
            self.thumb_url = yt.thumbnail_url.replace('default', quality)
            f = BytesIO()
            try:
                downloader.download(self.thumb_url, buffer=f)
                data = f.read()
                if len(data) == 0:
                    raise AssertionError('Zero thumbnail')
                if data == empty_thumbnail:
                    raise AssertionError('Empty thumbnail')
                f.seek(0)
                break
            except Exception as e:
                print(print_error(e)[-1])
        self.thumb = f

        #
        _url = self.stream.url
        if callable(_url):
            _url = _url()
        self._url = _url
        title = yt.title
        #soup = Soup(yt.watch_html)
        #title =  soup.title.text.replace('- YouTube', '').strip()
        self.title = title
        ext = u'.' + self.stream.subtype
        self.filename = format_filename(title, self.id, ext)

        print_(u'Resolution: {}'.format(stream.resolution))
        print_(u'Codec: {} / {}'.format(stream.video_codec,
                                        stream.audio_codec))
        print_(u'Abr: {}'.format(stream.abr))
        print_(u'Subtype: {}'.format(stream.subtype))
        print_(u'FPS: {}\n'.format(stream.fps))

        return self._url