Beispiel #1
0
    def _verify_video_password(self, webpage, url, video_id):
        password = self._downloader.params.get('videopassword')
        if password is None:
            raise ExtractorError(
                'This video is protected by a password, use the --video-password option', expected=True
            )
        requesttoken = self._search_regex(r'<input[^>]+?name="requesttoken" value="([^\"]+)"', webpage, 'requesttoken')
        data = urlencode_postdata({'requesttoken': requesttoken, 'password': password})

        validation_response, urlh = self._download_webpage_handle(
            url, video_id, note='Validating Password...', errnote='Wrong password?', data=data
        )

        password_protected = self._search_regex(
            r'<label[^>]+?for="(password)"', validation_response, 'password field', fatal=False, default=None
        )
        warning = self._search_regex(
            r'<div[^>]+?class="warning">([^<]*)</div>',
            validation_response,
            'warning',
            fatal=False,
            default="The password is wrong. Try again.",
        )
        if password_protected is not None:
            raise ExtractorError('Login failed, %s said: %r' % (self.IE_NAME, warning), expected=True)
        return validation_response, urlh
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        host = mobj.group('host')
        video_id = mobj.group('id')

        api_json = self._call_api(host,
                                  video_id,
                                  '',
                                  note='Downloading video JSON')

        search_results = api_json.get('search-results', {})
        if 'result' not in search_results:
            raise ExtractorError('Playlist was not found')

        result_list = search_results.get('result', {})
        if isinstance(result_list, dict):
            result_list = [result_list]

        entries = []
        for episode in result_list:
            video = episode.get('mediapackage', {})
            entries.append(self._parse_mediapackage(video))

        if len(entries) == 0:
            raise ExtractorError('Playlist has no entries')

        playlist_title = entries[0].get('series')

        result_obj = self.playlist_result(entries,
                                          playlist_id=video_id,
                                          playlist_title=playlist_title)
        return result_obj
Beispiel #3
0
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        scheme = mobj.group('scheme')
        host = mobj.group('host')
        path = mobj.group('path')
        video_id = mobj.group('id')

        # Extract launch URL
        view_webpage = self._download_webpage(
            url, video_id, 'Downloading kalvidres video view webpage')
        mobj = re.search(
            r'<iframe[^>]+class="kaltura-player-iframe"[^>]+src=(["\'])(?P<url>[^"\']+)\1',
            view_webpage)
        if not mobj:
            raise ExtractorError('Unable to extract kalvidres launch url')

        launch_url = html.unescape(mobj.group('url'))

        # Get launch parameters
        launch_webpage = self._download_webpage(
            launch_url, video_id, 'Downloading kalvidres launch webpage')
        launch_inputs = self._form_hidden_inputs(self._LAUNCH_FORM,
                                                 launch_webpage)
        launch_form_str = self._search_regex(
            r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LAUNCH_FORM,
            launch_webpage,
            'login form',
            group='form')

        action_url = extract_attributes(launch_form_str).get('action')

        # Launch kalvidres video app
        submit_page, start_urlh = self._download_webpage_handle(
            action_url,
            video_id,
            'Launch kalvidres app',
            data=urlencode_postdata(launch_inputs))

        mobj = re.search(r'window.location.href = \'(?P<url>[^\']+)\'',
                         submit_page)
        if not mobj:
            raise ExtractorError('Unable to extract kalvidres redirect url')

        # Follow kalvidres video app redirect
        redirect_page, redirect_urlh = self._download_webpage_handle(
            html.unescape(mobj.group('url')), video_id,
            'Follow kalvidres redirect')

        kultura_url = KalturaIE._extract_url(redirect_page)
        if not kultura_url:
            raise ExtractorError('Unable to extract kaltura url')

        return {
            '_type': 'url',
            'url': kultura_url,
            'ie_key': 'Kaltura',
        }
Beispiel #4
0
    def _extract_playlist(self,
                          playlist,
                          playlist_id=None,
                          require_title=True):
        if len(playlist["results"]) == 0:
            raise ExtractorError("Cannot find playlist!")

        title = (playlist["results"][0]["nombre"] if require_title else
                 playlist.get("results")[0].get("nombre"))
        thumbnail = None
        entries = try_get(playlist, lambda x: x["results"][0]["publicacion"])

        # Playlist User need update slug_url video
        for entry in entries:
            video_id = entry.get("id")
            json_url = API_BASE_URL + "publicacion/?format=json&id=%s" % video_id
            video = self._download_json(json_url, video_id,
                                        "Downloading video JSON")
            info = self._extract_video(video, video_id)
            entry["slug_url"] = info.get("slug_url")

        return {
            "id":
            try_get(playlist, lambda x: x["results"][0]["id"], compat_str)
            or playlist_id,
            "title":
            title,
            "thumbnail":
            thumbnail,
            "entries":
            entries,
        }
Beispiel #5
0
    def _entries(self, playlist_id):
        json_url = self.API_PLAYLIST_ENDPOINT + "?format=json&id=%s" % playlist_id
        headers = self._set_auth_basic()
        playlist = {}
        try:
            playlist = self._download_json(json_url,
                                           playlist_id,
                                           "Downloading playlist JSON",
                                           headers=headers)
            assert playlist.get("count", 0) >= 1
        except ExtractorError as e:
            if isinstance(e.cause,
                          compat_HTTPError) and e.cause.code in (403, ):
                raise self.raise_login_required(
                    msg=
                    "This playlist is only available for registered users. Check your username and password"
                )
        except AssertionError:
            raise ExtractorError("Playlist no exists!")

        info_playlist = self._extract_playlist(playlist, playlist_id)
        playlist_entries = info_playlist.get("entries")

        for video in playlist_entries:
            video_id = video.get("id")
            video_url = (ROOT_BASE_URL + "medias/" + video.get("slug_url") +
                         "?" + "playlist=" + playlist_id)
            yield self.url_result(video_url, PictaIE.ie_key(), video_id)
Beispiel #6
0
    def _verify_video_password(self, url, video_id, webpage):
        password = self._downloader.params.get('videopassword')
        if password is None:
            raise ExtractorError(
                'This video is protected by a password, use the --video-password option', expected=True
            )
        meetId = self._search_regex(r'<input[^>]+?id="meetId" value="([^\"]+)"', webpage, 'meetId')
        data = urlencode_postdata({'id': meetId, 'passwd': password, 'action': "viewdetailedpage", 'recaptcha': ""})
        validation_url = url.split("zoom.us")[0] + "zoom.us/rec/validate_meet_passwd"
        validation_response = self._download_json(
            validation_url, video_id, note='Validating Password...', errnote='Wrong password?', data=data
        )

        if validation_response['errorCode'] != 0:
            raise ExtractorError(
                'Login failed, %s said: %r' % (self.IE_NAME, validation_response['errorMessage']), expected=True
            )
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        host = mobj.group('host')
        video_id = mobj.group('id')

        api_json = self._call_api(host,
                                  video_id,
                                  '',
                                  note='Downloading video JSON')

        search_results = api_json.get('search-results', {})
        if 'result' not in search_results:
            raise ExtractorError('Video was not found')

        result_dict = search_results.get('result', {})
        if not isinstance(result_dict, dict):
            raise ExtractorError(
                'More than one video was unexpectedly returned.')

        video = result_dict.get('mediapackage', {})

        result_obj = self._parse_mediapackage(video)
        return result_obj
Beispiel #8
0
 def _real_extract(self, url):
     video_id = self._match_id(url)
     formats = [{
         'format_id': 'default',
         'url': 'url:',
     }]
     if video_id == '0':
         raise ExtractorError('foo')
     if video_id == '2':
         formats.append({
             'format_id': 'extra',
             'url': TEST_URL,
         })
     return {
         'id': video_id,
         'title': 'Video %s' % video_id,
         'formats': formats,
     }
Beispiel #9
0
    def _extract_video(video, video_id=None, require_title=True):
        if len(video["results"]) == 0:
            raise ExtractorError("Cannot find video!")

        title = (video["results"][0]["nombre"]
                 if require_title else video.get("results")[0].get("nombre"))
        description = try_get(video, lambda x: x["results"][0]["descripcion"],
                              compat_str)
        slug_url = try_get(video, lambda x: x["results"][0]["slug_url"],
                           compat_str)
        uploader = try_get(video,
                           lambda x: x["results"][0]["usuario"]["username"],
                           compat_str)
        add_date = try_get(video, lambda x: x["results"][0]["fecha_creacion"])
        timestamp = int_or_none(unified_timestamp(add_date))
        thumbnail = try_get(video, lambda x: x["results"][0]["url_imagen"])
        manifest_url = try_get(video,
                               lambda x: x["results"][0]["url_manifiesto"])
        category = try_get(
            video,
            lambda x: x["results"][0]["categoria"]["tipologia"]["nombre"],
            compat_str,
        )
        playlist_channel = (
            video["results"][0]["lista_reproduccion_canal"][0] if
            len(video["results"][0]["lista_reproduccion_canal"]) > 0 else None)
        subtitle_url = try_get(video,
                               lambda x: x["results"][0]["url_subtitulo"])

        return {
            "id": try_get(video, lambda x: x["results"][0]["id"], compat_str)
            or video_id,
            "title": title,
            "slug_url": slug_url,
            "description": description,
            "thumbnail": thumbnail,
            "uploader": uploader,
            "timestamp": timestamp,
            "category": [category] if category else None,
            "manifest_url": manifest_url,
            "playlist_channel": playlist_channel,
            "subtitle_url": subtitle_url,
        }
Beispiel #10
0
    def _extract_playlist(self,
                          playlist,
                          playlist_id=None,
                          require_title=True):
        if len(playlist.get("results", [])) == 0:
            raise ExtractorError("Cannot find playlist!")

        title = (playlist["results"][0]["nombre"]
                 if require_title else playlist["results"][0].get("nombre"))
        thumbnail = try_get(playlist,
                            lambda x: x["results"][0].get("url_imagen"))
        entries = try_get(playlist, lambda x: x["results"][0]["publicaciones"])

        return {
            "id":
            try_get(playlist, lambda x: x["results"][0]["id"], compat_str)
            or playlist_id,
            "title":
            title,
            "thumbnail":
            thumbnail,
            "entries":
            entries,
        }
Beispiel #11
0
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        scheme = mobj.group('scheme')
        host = mobj.group('host')
        path = mobj.group('path')
        video_id = mobj.group('id')

        launch_url = scheme + host + path + '/mod/lti/launch.php?id=' + video_id

        # webpage = self._download_webpage(url, video_id)
        launch_webpage = self._download_webpage(
            launch_url, video_id, 'Downloading opencast lti launch webpage')
        launch_inputs = self._form_hidden_inputs(self._LAUNCH_FORM,
                                                 launch_webpage)
        launch_form_str = self._search_regex(
            r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LAUNCH_FORM,
            launch_webpage,
            'login form',
            group='form')

        action_url = extract_attributes(launch_form_str).get('action')

        submit_page, start_urlh = self._download_webpage_handle(
            action_url,
            video_id,
            'Launch opencast app',
            data=urlencode_postdata(launch_inputs))

        if start_urlh.status != 200:
            raise ExtractorError('Unable to launch opencast app',
                                 expected=True)

        return {
            '_type': 'url',
            'url': start_urlh.geturl(),
        }
Beispiel #12
0
 def report_warning(self, message):
     # Don't accept warnings during tests
     raise ExtractorError(message)
Beispiel #13
0
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        scheme = mobj.group('scheme')
        host = mobj.group('host')
        path = mobj.group('path')
        video_id = mobj.group('id')

        launch_url = scheme + host + path + '/mod/helixmedia/launch.php?type=1&id=' + video_id

        # webpage = self._download_webpage(url, video_id)
        launch_webpage = self._download_webpage(
            launch_url, video_id, 'Downloading helixmedia launch webpage')
        launch_inputs = self._form_hidden_inputs(self._LAUNCH_FORM,
                                                 launch_webpage)
        launch_form_str = self._search_regex(
            r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LAUNCH_FORM,
            launch_webpage,
            'login form',
            group='form')

        action_url = extract_attributes(launch_form_str).get('action')

        submit_page, start_urlh = self._download_webpage_handle(
            action_url,
            video_id,
            'Launch helixmedia app',
            data=urlencode_postdata(launch_inputs))

        if 'UploadSessionId' not in start_urlh.geturl():
            raise ExtractorError('Unable to launch helixmedia video',
                                 expected=True)

        parsed_mediaserver_url = list(
            compat_urllib_parse_urlparse(start_urlh.geturl()))
        parsed_mediaserver_url[4] += '&mobile=N&fullWidth=940&fullHeight=906'
        parsed_mediaserver_url[2] += 'Split'
        mediaserver_url = compat_urllib_parse.urlunparse(
            parsed_mediaserver_url)

        video_webpage = self._download_webpage(mediaserver_url, video_id,
                                               'Downloading video details')

        video_model = json.loads(
            js_to_json(
                self._search_regex(r'var model = ([^;]+);', video_webpage,
                                   'video model')))

        video_title = video_model.get('VideoTitle', None)

        video_description = video_model.get('VideoDescription', '')
        video_id = str(video_model.get('VideoId', video_id))
        download_url = video_model.get('DownloadUrl', None)
        video_json = json.loads(
            video_model.get('PlayScreenVm',
                            {}).get('VodPlayerModel',
                                    {}).get('PlayerJson', '{}'))
        if video_json == {}:
            video_json = json.loads(
                video_model.get('VodPlayerModel', {}).get('PlayerJson', '{}'))
        if video_title is None:
            video_title = video_json.get('abouttext', 'Unknown title')

        thumbnail_list = video_json.get('tracks', [])
        thumbnail = None
        if len(thumbnail_list) >= 1:
            thumbnail = thumbnail_list[0].get('file', None)
            if thumbnail is not None:
                thumbnail = thumbnail.replace('vtt', 'jpg')

        sources_list = video_json.get('sources', [])

        formats = []
        for track in sources_list:
            href = track['file']
            ext = determine_ext(href, None)

            if ext == 'mpd':
                # DASH
                formats.extend(
                    self._extract_mpd_formats(href,
                                              video_id,
                                              mpd_id='dash',
                                              fatal=False))
            elif ext == 'm3u8':
                # HLS
                formats.extend(
                    self._extract_m3u8_formats(href,
                                               video_id,
                                               m3u8_id='hls',
                                               entry_protocol='m3u8_native',
                                               fatal=False))
            elif ext == 'f4m':
                # HDS
                formats.extend(
                    self._extract_f4m_formats(href,
                                              video_id,
                                              f4m_id='hds',
                                              fatal=False))
            elif ext == 'smil':
                formats.extend(
                    self._extract_smil_formats(href, video_id, fatal=False))
            else:
                track_obj = {
                    'url': href,
                    'ext': ext,
                }
                formats.append(track_obj)

        if download_url is not None:
            ext_req = HEADRequest(download_url)
            ext_handle = self._request_webpage(ext_req,
                                               video_id,
                                               note='Determining extension')
            ext = self.urlhandle_detect_ext(ext_handle)

            track_obj_direct = {
                'url': download_url,
                'ext': ext,
            }
            formats.append(track_obj_direct)

        self._sort_formats(formats)

        result_obj = {'formats': formats}

        if video_id is not None:
            result_obj.update({'id': video_id})

        if video_title is not None:
            result_obj.update({'title': video_title})

        if video_description is not None:
            result_obj.update({'creator': video_description})

        if thumbnail is not None:
            result_obj.update({'thumbnail': thumbnail})

        return result_obj
    def _real_extract(self, url):
        video_id = self._match_id(url)

        # First try the new method
        video_info = compat_parse_qs(
            self._download_webpage('https://drive.google.com/get_video_info', video_id, query={'docid': video_id})
        )

        def get_value(key):
            return try_get(video_info, lambda x: x[key][0])

        reason = get_value('reason')
        title = get_value('title')

        use_old_webpage = False
        if not title and reason:
            use_old_webpage = True
            webpage = self._download_webpage('http://docs.google.com/file/d/%s' % video_id, video_id)

            title = self._search_regex(
                r'"title"\s*,\s*"([^"]+)', webpage, 'title', default=None
            ) or self._og_search_title(webpage)
            duration = int_or_none(
                self._search_regex(r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', default=None)
            )

            formats = []
            fmt_stream_map = self._search_regex(
                r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map', default=''
            ).split(',')
            fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list', default='').split(',')

            hl = self._search_regex(r'"hl"\s*,\s*"([^"]+)', webpage, 'hl', default=None)
            subtitles_id = None
            ttsurl = self._search_regex(r'"ttsurl"\s*,\s*"([^"]+)', webpage, 'ttsurl', default=None)

            thumbnail_url = self._og_search_thumbnail(webpage, default=None)
        else:
            formats = []
            fmt_stream_map = (get_value('fmt_stream_map') or '').split(',')
            fmt_list = (get_value('fmt_list') or '').split(',')

            duration = int_or_none(get_value('length_seconds'))

            hl = get_value('hl')
            subtitles_id = None
            ttsurl = get_value('ttsurl')
            thumbnail_url = 'https://drive.google.com/thumbnail?id=' + video_id

        if ttsurl:
            # the video Id for subtitles will be the last value in the ttsurl
            # query string
            subtitles_id = ttsurl.encode('utf-8').decode('unicode_escape').split('=')[-1]

        if fmt_stream_map and fmt_list:
            resolutions = {}
            for fmt in fmt_list:
                mobj = re.search(r'^(?P<format_id>\d+)/(?P<width>\d+)[xX](?P<height>\d+)', fmt)
                if mobj:
                    resolutions[mobj.group('format_id')] = (int(mobj.group('width')), int(mobj.group('height')))

            for fmt_stream in fmt_stream_map:
                fmt_stream_split = fmt_stream.split('|')
                if len(fmt_stream_split) < 2:
                    continue
                format_id, format_url = fmt_stream_split[:2]
                f = {
                    'url': lowercase_escape(format_url),
                    'format_id': format_id,
                    'ext': self._FORMATS_EXT[format_id],
                }
                resolution = resolutions.get(format_id)
                if resolution:
                    f.update(
                        {
                            'width': resolution[0],
                            'height': resolution[1],
                        }
                    )
                formats.append(f)

        source_url = update_url_query(
            'https://drive.google.com/uc',
            {
                'id': video_id,
                'export': 'download',
            },
        )

        def request_source_file(source_url, kind):
            return self._request_webpage(
                source_url,
                video_id,
                note='Requesting %s file' % kind,
                errnote='Unable to request %s file' % kind,
                fatal=False,
            )

        urlh = request_source_file(source_url, 'source')
        if urlh:

            def add_source_format(urlh):
                formats.append(
                    {
                        # Use redirect URLs as download URLs in order to calculate
                        # correct cookies in _calc_cookies.
                        # Using original URLs may result in redirect loop due to
                        # google.com's cookies mistakenly used for googleusercontent.com
                        # redirect URLs (see #23919).
                        'url': urlh.geturl(),
                        'ext': determine_ext(title, 'mp4').lower(),
                        'format_id': 'source',
                        'quality': 1,
                    }
                )

            if urlh.headers.get('Content-Disposition'):
                add_source_format(urlh)
            else:
                confirmation_webpage = self._webpage_read_content(
                    urlh,
                    url,
                    video_id,
                    note='Downloading confirmation page',
                    errnote='Unable to confirm download',
                    fatal=False,
                )
                if confirmation_webpage:
                    confirm = self._search_regex(
                        r'confirm=([^&"\']+)', confirmation_webpage, 'confirmation code', fatal=False
                    )
                    if confirm:
                        confirmed_source_url = update_url_query(
                            source_url,
                            {
                                'confirm': confirm,
                            },
                        )
                        urlh = request_source_file(confirmed_source_url, 'confirmed source')
                        if urlh and urlh.headers.get('Content-Disposition'):
                            add_source_format(urlh)

        if not use_old_webpage and not formats and reason:
            raise ExtractorError(reason, expected=True)
        elif use_old_webpage and not formats:
            reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
            if reason:
                raise ExtractorError(reason, expected=True)

        self._sort_formats(formats)

        return {
            'id': video_id,
            'title': title,
            'thumbnail': thumbnail_url,
            'duration': duration,
            'formats': formats,
            'subtitles': self.extract_subtitles(video_id, subtitles_id, hl),
            'automatic_captions': self.extract_automatic_captions(video_id, subtitles_id, hl),
        }
Beispiel #15
0
    def _real_extract(self, url):
        playlist_id = None
        video_id = self._match_id(url)
        json_url = API_BASE_URL + "publicacion/?format=json&slug_url_raw=%s" % video_id
        video = self._download_json(json_url, video_id,
                                    "Downloading video JSON")
        info = self._extract_video(video, video_id)
        if (info["playlist_channel"] and self.playlist_id is None
                and self._match_playlist_id(url) is None):
            playlist_id = info["playlist_channel"].get("id")
            self.playlist_id = playlist_id
        # Download Playlist (--yes-playlist) in first place
        if (self.playlist_id is None and self._match_playlist_id(url)
                and not self._downloader.params.get("noplaylist")):
            playlist_id = compat_str(self._match_playlist_id(url))
            self.playlist_id = playlist_id
            self.to_screen(
                "Downloading playlist %s - add --no-playlist to just download video"
                % playlist_id)
            return self.url_result(
                ROOT_BASE_URL + "medias/" + video_id + "?" + "playlist=" +
                playlist_id,
                PictaUserPlaylistIE.ie_key(),
                playlist_id,
            )
        elif playlist_id and not self._downloader.params.get("noplaylist"):
            playlist_id = compat_str(playlist_id)
            self.to_screen(
                "Downloading playlist %s - add --no-playlist to just download video"
                % playlist_id)
            return self.url_result(
                ROOT_BASE_URL + "medias/" + video_id + "?" + "playlist=" +
                playlist_id,
                PictaChannelPlaylistIE.ie_key(),
                playlist_id,
            )
        elif self._downloader.params.get("noplaylist"):
            self.to_screen(
                "Downloading just video %s because of --no-playlist" %
                video_id)

        formats = []
        # M3U8|MPD manifest
        manifest_url = info.get("manifest_url")
        src_ext = determine_ext(manifest_url)

        if src_ext.startswith("m3u"):
            formats.extend(
                self._extract_m3u8_formats(manifest_url,
                                           video_id,
                                           "mp4",
                                           m3u8_id="hls"))
        elif src_ext == "mpd":
            formats.extend(
                self._extract_mpd_formats(manifest_url,
                                          video_id,
                                          mpd_id="dash"))

        if not formats:
            raise ExtractorError("Cannot find video formats")

        self._sort_formats(formats)
        info["formats"] = formats

        # subtitles
        video_subtitles = self.extract_subtitles(info)
        info["subtitles"] = video_subtitles
        return info