Ejemplo n.º 1
0
    def _get_storyboards_from_spec(self, video_id, sb_spec):
        storyboards = dict()

        s_parts = sb_spec.split('|')
        base_url = s_parts[0]
        for i, params in enumerate(s_parts[1:]):
            storyboard_attrib = params.split('#')
            if len(storyboard_attrib) != 8:
                logger.warning(
                    'Unable to extract thumbframe from spec {}'.format(params))
                continue

            frame_width = int_or_none(storyboard_attrib[0])
            frame_height = int_or_none(storyboard_attrib[1])
            total_frames = int_or_none(storyboard_attrib[2])
            cols = int_or_none(storyboard_attrib[3])
            rows = int_or_none(storyboard_attrib[4])
            filename = storyboard_attrib[6]
            sigh = storyboard_attrib[7]

            if frame_width and frame_height and cols and rows and total_frames:
                frames = cols * rows
                width, height = frame_width * cols, frame_height * rows
                n_images = int(math.ceil(total_frames / float(cols * rows)))
            else:
                logger.warning(
                    'Unable to extract thumbframe from spec {}'.format(params))
                continue

            storyboards_url = base_url.replace('$L', str(i)) + '&'
            storyboard_set = []
            for j in range(n_images):
                url = storyboards_url.replace('$N', filename).replace(
                    '$M', str(j)) + 'sigh=' + sigh
                if j == n_images - 1:
                    remaining_frames = total_frames % (cols * rows)
                    if remaining_frames != 0:
                        frames = remaining_frames
                        rows = int(math.ceil(float(remaining_frames) / rows))
                        height = rows * frame_height
                        if rows == 1:
                            cols = remaining_frames
                            width = cols * frame_width

                storyboard_set.append(
                    ThumbFramesImage(url=url,
                                     width=width,
                                     height=height,
                                     cols=cols,
                                     rows=rows,
                                     n_frames=frames))
            storyboards['L{}'.format(i)] = storyboard_set

        return storyboards
Ejemplo n.º 2
0
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)

        password_protected = self._search_regex(
            r'<form[^>]+?id="(password_form)"',
            webpage,
            'password field',
            fatal=False,
            default=None)
        if password_protected is not None:
            self._verify_video_password(url, display_id, webpage)
            webpage = self._download_webpage(url, display_id)

        video_url = self._search_regex(r"viewMp4Url: \'(.*)\'", webpage,
                                       'video url')
        title = self._html_search_regex(
            [r"topic: \"(.*)\",", r"<title>(.*) - Zoom</title>"], webpage,
            'title')
        viewResolvtionsWidth = self._search_regex(
            r"viewResolvtionsWidth: (\d*)", webpage, 'res width', fatal=False)
        viewResolvtionsHeight = self._search_regex(
            r"viewResolvtionsHeight: (\d*)",
            webpage,
            'res height',
            fatal=False)
        fileSize = parse_filesize(
            self._search_regex(r"fileSize: \'(.+)\'",
                               webpage,
                               'fileSize',
                               fatal=False))

        urlprefix = url.split("zoom.us")[0] + "zoom.us/"

        formats = []
        formats.append({
            'url': url_or_none(video_url),
            'width': int_or_none(viewResolvtionsWidth),
            'height': int_or_none(viewResolvtionsHeight),
            'http_headers': {
                'Accept':
                'video/webm,video/ogg,video/*;q=0.9,application/ogg;q=0.7,audio/*;q=0.6,*/*;q=0.5',
                'Referer': urlprefix,
            },
            'ext': "mp4",
            'filesize_approx': int_or_none(fileSize),
        })
        self._sort_formats(formats)

        return {'id': display_id, 'title': title, 'formats': formats}
Ejemplo n.º 3
0
    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(url, video_id)

        title = self._search_regex(
            r'<h1 class="video__body__header__title">(.+?)</h1>', webpage,
            'title')
        data_video = self._html_search_regex(
            r'data-video=(["\'])(?P<id>.+?)\1', webpage, 'data-video', group='id')
        json_url = 'https://mediazone.vrt.be/api/v1/canvas/assets/' + data_video
        data = self._download_json(json_url, video_id)

        formats = []
        for target in data['targetUrls']:
            if 'type' and 'url' in target:
                extension = utils.determine_ext(target['url'])
                if target['type'] == 'PROGRESSIVE_DOWNLOAD':
                    formats.append({
                        'format_id': extension,
                        'url': target['url'],
                        'protocol': 'http',
                    })
                elif target['type'] == 'HLS':
                    formats.extend(self._extract_m3u8_formats(
                        target['url'], video_id, entry_protocol='m3u8_native',
                        ext='mp4',
                        preference=0,
                        fatal=False,
                        m3u8_id='hls'))
                elif target['type'] == 'HDS':
                    formats.append({
                        'format_id': extension,
                        'url': target['url'],
                        'protocol': 'HDS',
                    })
                elif target['type'] == 'RTMP':
                    formats.append({
                        'format_id': extension,
                        'url': target['url'],
                        'protocol': 'rtmp',
                    })
                elif target['type'] == 'RTSP':
                    formats.append({
                        'format_id': extension,
                        'url': target['url'],
                        'protocol': 'rtsp',
                    })

        self._sort_formats(formats)
        duration = utils.int_or_none(data.get('duration')) / 1000
        return {
            'id': video_id,
            'title': title,
            'formats': formats,
            'duration': duration,
        }
Ejemplo n.º 4
0
    def extract_formats(self, loader_data):
        stream_formats = []
        for stream_obj in loader_data["videoResolutionLevels"]:
            stream_format = {
                'format_id': str(stream_obj['verticalResolution']) + "p",
                'height': stream_obj['verticalResolution'],
                'url': stream_obj['url'],
            }

            quality_information = re.search(r'([0-9]{3,4})x([0-9]{3,4})-([0-9]{2})p-([0-9]{3,4})kbit',
                                            stream_obj['url'])
            if quality_information:
                stream_format['width'] = int_or_none(quality_information.group(1))
                stream_format['height'] = int_or_none(quality_information.group(2))
                stream_format['fps'] = int_or_none(quality_information.group(3))
                stream_format['tbr'] = int_or_none(quality_information.group(4))

            stream_formats.append(stream_format)

        self._sort_formats(stream_formats)
        return stream_formats
Ejemplo n.º 5
0
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(
            'https://m.tiktok.com/v/%s.html' % video_id, video_id)

        # The webpage will have a json embedded in a <script id="__NEXT_DATA__"> tag. The JSON holds all the metadata, so fetch that out.
        json_string = self._html_search_regex(
            [r'<script\s+id="__NEXT_DATA__"[^>]*>(.*?)</script>'], webpage,
            'next_data')
        json_data = self._parse_json(json_string, video_id)
        video_data = try_get(json_data,
                             lambda x: x['props']['pageProps'],
                             expected_type=dict)

        # The watermarkless video ID is embedded in the first video file, so we need to download it and get the video ID.
        watermarked_url = video_data['videoData']['itemInfos']['video'][
            'urls'][0]
        # watermarked_response = self._download_webpage(watermarked_url, video_id)
        # idpos = watermarked_response.index("vid:")
        # watermarkless_video_id = watermarked_response[idpos + 4:idpos + 36]
        # watermarkless_url = "https://api2-16-h2.musical.ly/aweme/v1/play/?video_id={}&vr_type=0&is_play_url=1&source=PackSourceEnum_PUBLISH&media_type=4".format(watermarkless_video_id)
        watermarkless_url = watermarked_url

        # Get extra metadata
        video_info = try_get(video_data, lambda x: x['videoData']['itemInfos'],
                             dict)
        author_info = try_get(video_data,
                              lambda x: x['videoData']['authorInfos'], dict)
        share_info = try_get(video_data, lambda x: x['shareMeta'], dict)
        unique_id = str_or_none(author_info.get('uniqueId'))
        timestamp = try_get(video_info, lambda x: int(x['createTime']), int)
        height = try_get(video_info,
                         lambda x: x['video']['videoMeta']['height'], int)
        width = try_get(video_info, lambda x: x['video']['videoMeta']['width'],
                        int)
        thumbnails = []
        thumbnails.append({
            'url':
            video_info.get('thumbnail') or self._og_search_thumbnail(webpage),
            'width':
            width,
            'height':
            height
        })

        formats = []
        formats.append({
            'url': watermarkless_url,
            'ext': 'mp4',
            'height': height,
            'width': width
        })

        if video_data.get('statusCode') != 0:
            raise ExtractorError('Video not available', video_id=video_id)

        return {
            'id':
            video_id,
            'title':
            self._og_search_title(webpage),
            'description':
            str_or_none(video_info.get('text'))
            or str_or_none(share_info.get('desc')),
            'comment_count':
            int_or_none(video_info.get('commentCount')),
            'duration':
            try_get(video_info, lambda x: x['video']['videoMeta']['duration'],
                    int),
            'height':
            height,
            'like_count':
            int_or_none(video_info.get('diggCount')),
            'repost_count':
            int_or_none(video_info.get('shareCount')),
            'thumbnail':
            try_get(video_info, lambda x: x['covers'][0], str),
            'timestamp':
            timestamp,
            'width':
            width,
            'creator':
            str_or_none(author_info.get('nickName')),
            'uploader':
            unique_id,
            'uploader_id':
            str_or_none(author_info.get('userId')),
            'uploader_url':
            'https://www.tiktok.com/@' + unique_id,
            'thumbnails':
            thumbnails,
            'webpage_url':
            self._og_search_url(webpage),
            'ext':
            'mp4',
            'formats':
            formats,
            'http_headers': {
                'User-Agent': 'okhttp',
            }
        }
Ejemplo n.º 6
0
results = set()

minimum_version = (88, 0, 0, 0)  # mark 88.0.0.0 as minimum version
# https://stackoverflow.com/questions/10649814/get-last-git-tag-from-a-remote-repo-without-cloning
with subprocess.Popen(
    [
        'git', '-c', 'versionsort.suffix=-', 'ls-remote', '--tags',
        '--sort=v:refname', 'https://chromium.googlesource.com/chromium/src'
    ],
        stdout=subprocess.PIPE,
) as proc:
    for line in proc.stdout:
        commit_hash, tag_ref = line.strip().decode().split('\t')
        tag_name = tag_ref[10:]  # trim first "refs/tags/"
        version_tuple = tuple(
            int_or_none(x) for x in tag_name.split('.') if x.isdigit())
        if len(version_tuple) < 4:
            continue
        if version_tuple < minimum_version:
            continue
        results.add(tag_name)

pycode = '''# coding: utf-8
# AUTOMATICALLY GENERATED FILE. DO NOT EDIT.
# Generated by ./devscripts/make_chrome_version_list.py
# This list is created from git tags in https://chromium.googlesource.com/chromium/src
from __future__ import unicode_literals

versions = [
    "%s"
]
Ejemplo n.º 7
0
    def _parse_mediapackage(self, video):
        tracks = video.get('media', {}).get('track', [])

        video_id = video.get('id')

        formats = []
        for track in tracks:
            href = track['url']
            ext = determine_ext(href, None)
            track_obj = {'url': href}

            transport = track.get('transport')

            if transport == 'DASH' or ext == 'mpd':
                formats.extend(self._extract_mpd_formats(href, video_id, mpd_id='dash', fatal=False))
            elif transport == 'HLS' or ext == 'm3u8':
                formats.extend(
                    self._extract_m3u8_formats(href, video_id, m3u8_id='hls', entry_protocol='m3u8_native', fatal=False)
                )
            elif transport == 'HDS' or ext == 'f4m':
                formats.extend(self._extract_f4m_formats(href, video_id, f4m_id='hds', fatal=False))
            elif transport == 'SMOOTH':
                formats.extend(self._extract_ism_formats(href, video_id, ism_id='smooth', fatal=False))
            elif ext == 'smil':
                formats.extend(self._extract_smil_formats(href, video_id, fatal=False))
            else:
                if transport is not None:
                    track_obj.update({'format_note': track.get('transport')})
                    if transport == 'RTMP':
                        m_obj = re.search(r'^(?:rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', href)
                        if not m_obj:
                            continue
                        track_obj.update(
                            {
                                'app': m_obj.group('app'),
                                'play_path': m_obj.group('playpath'),
                                'rtmp_live': True,
                                'preference': -2,
                            }
                        )
                        extention = m_obj.group('playpath').split(':')
                        if len(extention) > 1:
                            track_obj.update({'ext': extention[0]})

                audio_info = track.get('audio')
                if audio_info is not None:
                    if 'bitrate' in audio_info:
                        track_obj.update({'abr': int_or_none(audio_info.get('bitrate'), 1000)})
                    if 'samplingrate' in audio_info:
                        track_obj.update({'asr': int_or_none(audio_info.get('samplingrate'))})
                    audio_encoder = audio_info.get('encoder', {})
                    if 'type' in audio_encoder:
                        track_obj.update({'acodec': audio_encoder.get('type')})

                video_info = track.get('video')
                if video_info is not None:
                    if 'resolution' in video_info:
                        track_obj.update({'resolution': video_info.get('resolution')})
                        resolution = parse_resolution(video_info.get('resolution'))
                        track_obj.update(resolution)
                    if 'framerate' in video_info:
                        track_obj.update({'fps': int_or_none(video_info.get('framerate'))})
                    if 'bitrate' in video_info:
                        track_obj.update({'vbr': int_or_none(video_info.get('bitrate'), 1000)})
                    video_encoder = video_info.get('encoder', {})
                    if 'type' in video_encoder:
                        track_obj.update({'vcodec': video_encoder.get('type')})

                formats.append(track_obj)

        self._sort_formats(formats)

        result_obj = {'formats': formats}

        if video_id is not None:
            result_obj.update({'id': video_id})

        title = video.get('title')
        if title is not None:
            result_obj.update({'title': title})

        series = video.get('seriestitle')
        if series is not None:
            result_obj.update({'series': series})

        season_id = video.get('series')
        if season_id is not None:
            result_obj.update({'season_id': season_id})

        creator = video.get('creators', {}).get('creator')
        if creator is not None:
            result_obj.update({'creator': creator})

        timestamp = parse_iso8601(video.get('start'))
        if timestamp is not None:
            result_obj.update({'timestamp': timestamp})

        attachments = video.get('attachments', {}).get('attachment', [])
        if len(attachments) > 0:
            thumbnail = attachments[0].get('url')
            result_obj.update({'thumbnail': thumbnail})

        return result_obj