コード例 #1
0
ファイル: balticlivecam.py プロジェクト: thawtes/ipk
    def _get_streams(self):
        res = http.get(self.url)

        data = self._data_re.search(res.text)
        if data:
            self.logger.debug("Found _data_re")
            data = self.js_to_json_regex(data.group(1))
            res = http.post(self.api_url, data=data)
            m = self._hls_re.search(res.text)
            if m:
                self.logger.debug("Found _hls_re")
                hls_url = m.group("url")
                hls_url = update_scheme("http://", hls_url)
                self.logger.debug("URL={0}".format(hls_url))
                streams = HLSStream.parse_variant_playlist(self.session, hls_url)
                if not streams:
                    return {"live": HLSStream(self.session, hls_url)}
                else:
                    return streams

        iframe = self._iframe_re.search(res.text)
        if iframe:
            self.logger.debug("Found _iframe_re")
            iframe_url = iframe.group("url")
            iframe_url = update_scheme("http://", iframe_url)
            self.logger.debug("URL={0}".format(iframe_url))
            return self.session.streams(iframe_url)
コード例 #2
0
ファイル: test_utils.py プロジェクト: longsack/livecli
 def test_update_scheme(self):
     self.assertEqual(
         "https://example.com/foo",  # becomes https
         update_scheme("https://other.com/bar", "//example.com/foo"))
     self.assertEqual(
         "http://example.com/foo",  # becomes http
         update_scheme("http://other.com/bar", "//example.com/foo"))
     self.assertEqual(
         "http://example.com/foo",  # remains unchanged
         update_scheme("https://other.com/bar", "http://example.com/foo"))
     self.assertEqual(
         "https://example.com/foo",  # becomes https
         update_scheme("https://other.com/bar", "example.com/foo"))
コード例 #3
0
ファイル: akamaihd.py プロジェクト: persianpros/livecli
    def _get_streams(self):
        url, params = parse_url_params(self.url)
        urlnoproto = self._url_re.match(url).group(1)
        urlnoproto = update_scheme("http://", urlnoproto)

        self.logger.debug("URL={0}; params={1}", urlnoproto, params)
        return {"live": AkamaiHDStream(self.session, urlnoproto, **params)}
コード例 #4
0
ファイル: bloomberg.py プロジェクト: persianpros/livecli
    def _get_live_streams(self):
        # Get channel id
        match = self._url_re.match(self.url)
        channel = match.group('channel')

        # Retrieve live player URL
        res = http.get(self.PLAYER_URL)
        match = self._live_player_re.search(res.text)
        if match is None:
            return []
        live_player_url = update_scheme(self.url,
                                        match.group('live_player_url'))

        # Extract streams from the live player page
        res = http.get(live_player_url)
        stream_datas = re.findall(
            r'{0}(?:_MINI)?:({{.+?}}]}}]}})'.format(self.CHANNEL_MAP[channel]),
            res.text)
        streams = []
        for s in stream_datas:
            for u in self._live_streams_schema.validate(s):
                if u not in streams:
                    streams.append(u)

        return streams
コード例 #5
0
ファイル: webtv.py プロジェクト: persianpros/livecli
    def _get_streams(self):
        """
        Find the streams for web.tv
        :return:
        """
        headers = {}
        res = http.get(self.url, headers=headers)
        headers["Referer"] = self.url

        sources = self._sources_re.findall(res.text)
        if len(sources):
            sdata = parse_json(sources[0], schema=self._sources_schema)
            for source in sdata:
                self.logger.debug("Found stream of type: {}", source[u'type'])
                if source[u'type'] == u"application/vnd.apple.mpegurl":
                    url = update_scheme(self.url, source[u"src"])

                    try:
                        # try to parse the stream as a variant playlist
                        variant = HLSStream.parse_variant_playlist(
                            self.session, url, headers=headers)
                        if variant:
                            for q, s in variant.items():
                                yield q, s
                        else:
                            # and if that fails, try it as a plain HLS stream
                            yield 'live', HLSStream(self.session,
                                                    url,
                                                    headers=headers)
                    except IOError:
                        self.logger.warning(
                            "Could not open the stream, perhaps the channel is offline"
                        )
コード例 #6
0
    def _get_streams(self):
        url, params = parse_url_params(self.url)

        urlnoproto = self._url_re.match(url).group(2)
        urlnoproto = update_scheme("http://", urlnoproto)

        return HDSStream.parse_manifest(self.session, urlnoproto, **params)
コード例 #7
0
    def _get_streams(self):
        """Tries to find streams.

        Returns:
            Playable video from self._resolve_res
                or
            New self.url for livecli
        Raises:
            NoPluginError: if no video was found.
        """
        self.url = self.url.replace("resolve://", "")
        self._cache_self_url()
        self.url = update_scheme("http://", self.url)

        """ GET website content """
        o_res = self._res_text(self.url)

        """ HLS or HDS stream """
        x = self._resolve_res(o_res)
        if x:
            return x

        """ iframe url """
        x = self._iframe_src(o_res)

        if not x:
            """ script window.location.href """
            x = self._window_location(o_res)

        if x:
            return self.session.streams(self.url)

        raise NoPluginError
コード例 #8
0
ファイル: streamable.py プロジェクト: persianpros/livecli
    def _get_streams(self):
        data = http.get(self.url, schema=self.config_schema)

        for info in data["files"].values():
            stream_url = update_scheme(self.url, info["url"])
            # pick the smaller of the two dimensions, for landscape v. portrait videos
            res = min(info["width"], info["height"])
            yield "{0}p".format(res), HTTPStream(self.session, stream_url)
コード例 #9
0
ファイル: huya.py プロジェクト: persianpros/livecli
    def _get_streams(self):
        match = _url_re.match(self.url)
        channel = match.group("channel")

        http.headers.update({"User-Agent": useragents.IPAD})
        # Some problem with SSL on huya.com now, do not use https

        hls_url = http.get(HUYA_URL % channel, schema=_hls_schema)
        yield "live", HLSStream(self.session,
                                update_scheme("http://", hls_url))
コード例 #10
0
    def _get_streams(self):
        res = http.get(self.url)
        m = self.embed_url_re.search(res.text)
        platform_url = m and m.group("url")

        if platform_url:
            url = update_scheme(self.url, platform_url)
            # hand off to ThePlatform plugin
            p = ThePlatform(url)
            p.bind(self.session, "plugin.nbcsports")
            return p.streams()
コード例 #11
0
ファイル: abweb.py プロジェクト: longsack/livecli
    def get_iframe_url(self):
        self.logger.debug('search for an iframe')
        res = http.get(self.url)
        m = self._iframe_re.search(res.text)
        if not m:
            raise PluginError('No iframe found.')

        iframe_url = m.group('url')
        iframe_url = update_scheme('http://', iframe_url)
        self.logger.debug('IFRAME URL={0}'.format(iframe_url))
        return iframe_url
コード例 #12
0
ファイル: hls.py プロジェクト: persianpros/livecli
    def _get_streams(self):
        url, params = parse_url_params(self.url)
        urlnoproto = self._url_re.match(url).group(2)
        urlnoproto = update_scheme("http://", urlnoproto)

        self.logger.debug("URL={0}; params={1}", urlnoproto, params)
        streams = HLSStream.parse_variant_playlist(self.session, urlnoproto,
                                                   **params)
        if not streams:
            return {"live": HLSStream(self.session, urlnoproto, **params)}
        else:
            return streams
コード例 #13
0
    def _get_streams(self):
        headers = {'User-Agent': useragents.IPAD}
        channel = self._url_re.match(self.url).group('channel')

        res = http.get('https://m.huya.com/{0}'.format(channel), headers=headers)
        m = self._hls_re.search(res.text)
        if not m:
            self.logger.debug('No m3u8 url found.')
            return

        hls_url = update_scheme('https://', m.group('url'))
        self.logger.debug('URL={0}'.format(hls_url))
        return {'live': HLSStream(self.session, hls_url, headers=headers)}
コード例 #14
0
    def _get_streams(self):
        http.headers = {"User-Agent": useragents.CHROME}
        res = http.get(self.url)
        iframe_url = self.find_iframe(res)

        if iframe_url:
            self.logger.debug("Found iframe: {0}", iframe_url)
            res = http.get(iframe_url, headers={"Referer": self.url})
            stream_url = update_scheme(self.url,
                                       self.stream_schema.validate(res.text))
            return HLSStream.parse_variant_playlist(
                self.session,
                stream_url,
                headers={"User-Agent": useragents.CHROME})
コード例 #15
0
    def _get_streams(self):
        res = http.get(self.url)
        mobile_url_m = self.mobile_url_re.search(res.text)

        mobile_url = mobile_url_m and update_scheme(self.url,
                                                    mobile_url_m.group("url"))

        token = mobile_url_m and mobile_url_m.group("token")
        if not token:
            # if no token is in the url, try to find it else where in the page
            token_m = self.token_re.search(res.text)
            token = token_m and token_m.group("token")

        return HLSStream.parse_variant_playlist(self.session,
                                                mobile_url + token,
                                                headers={"Referer": self.url})
コード例 #16
0
    def merge_path_list(self, static, user):
        """merge the static list, with an user list

        Args:
           static (list): static list from this plugin
           user (list): list from an user command

        Returns:
            A new valid list
        """
        for _path_url in user:
            if not _path_url.startswith(("http", "//")):
                _path_url = update_scheme("http://", _path_url)
            _parsed_path_url = urlparse(_path_url)
            if _parsed_path_url.netloc and _parsed_path_url.path:
                static += [(_parsed_path_url.netloc, _parsed_path_url.path)]
        return static
コード例 #17
0
ファイル: abweb.py プロジェクト: longsack/livecli
    def _get_streams(self):
        http.headers.update({'User-Agent': useragents.CHROME,
                             'Referer': 'http://www.abweb.com/BIS-TV-Online/bistvo-tele-universal.aspx'})

        login_username = self.get_option('username')
        login_password = self.get_option('password')

        if self.options.get('purge_credentials'):
            self._session_attributes.set('ASP.NET_SessionId', None, expires=0)
            self._session_attributes.set('.abportail1', None, expires=0)
            self._authed = False
            self.logger.info('All credentials were successfully removed.')

        if not self._authed and not (login_username and login_password):
            self.logger.error('A login for ABweb is required, use --abweb-username USERNAME --abweb-password PASSWORD')
            return

        if self._authed:
            if self._expires < time.time():
                self.logger.debug('get new cached cookies')
                # login after 24h
                self.set_expires_time_cache()
                self._authed = False
            else:
                self.logger.info('Attempting to authenticate using cached cookies')
                http.cookies.set('ASP.NET_SessionId', self._session_attributes.get('ASP.NET_SessionId'))
                http.cookies.set('.abportail1', self._session_attributes.get('.abportail1'))

        if not self._authed and not self._login(login_username, login_password):
            return

        iframe_url = self.get_iframe_url()
        http.headers.update({'Referer': iframe_url})

        hls_url = self.get_hls_url(iframe_url)
        hls_url = update_scheme(self.url, hls_url)

        self.logger.debug('URL={0}'.format(hls_url))
        variant = HLSStream.parse_variant_playlist(self.session, hls_url)
        if variant:
            for q, s in variant.items():
                yield q, s
        else:
            yield 'live', HLSStream(self.session, hls_url)
コード例 #18
0
    def resolve_url(self, url, follow_redirect=True):
        """Attempts to find a plugin that can use this URL.

        The default protocol (http) will be prefixed to the URL if
        not specified.

        Raises :exc:`NoPluginError` on failure.

        :param url: a URL to match against loaded plugins
        :param follow_redirect: follow redirects

        """
        url = update_scheme("http://", url)

        available_plugins = []
        for name, plugin in self.plugins.items():
            if plugin.can_handle_url(url):
                available_plugins.append(plugin)

        available_plugins.sort(key=lambda x: x.priority(url), reverse=True)
        if available_plugins:
            return available_plugins[0](url)

        if follow_redirect:
            # Attempt to handle a redirect URL
            try:
                res = self.http.head(url,
                                     allow_redirects=True,
                                     acceptable_status=[501])

                # Fall back to GET request if server doesn't handle HEAD.
                if res.status_code == 501:
                    res = self.http.get(url, stream=True)

                if res.url != url:
                    return self.resolve_url(res.url,
                                            follow_redirect=follow_redirect)
            except PluginError:
                pass

        raise NoPluginError
コード例 #19
0
ファイル: earthcam.py プロジェクト: persianpros/livecli
    def _get_streams(self):
        res = http.get(self.url)
        m = self.cam_name_re.search(res.text)
        cam_name = m and m.group("name")
        json_base = self.cam_data_schema.validate(res.text)

        cam_data = json_base["cam"][cam_name]

        self.logger.debug("Found cam for {0} - {1}", cam_data["group"], cam_data["title"])

        is_live = (cam_data["liveon"] == "true" and cam_data["defaulttab"] == "live")

        # HLS data
        hls_domain = cam_data["html5_streamingdomain"]
        hls_playpath = cam_data["html5_streampath"]

        # RTMP data
        rtmp_playpath = ""
        if is_live:
            n = "live"
            rtmp_domain = cam_data["streamingdomain"]
            rtmp_path = cam_data["livestreamingpath"]
            rtmp_live = cam_data["liveon"]

            if rtmp_path:
                match = self.playpath_re.search(rtmp_path)
                rtmp_playpath = match.group("file")
                rtmp_url = rtmp_domain + match.group("folder")
        else:
            n = "vod"
            rtmp_domain = cam_data["archivedomain"]
            rtmp_path = cam_data["archivepath"]
            rtmp_live = cam_data["archiveon"]

            if rtmp_path:
                rtmp_playpath = rtmp_path
                rtmp_url = rtmp_domain

        # RTMP stream
        if rtmp_playpath:
            self.logger.debug("RTMP URL: {0}{1}", rtmp_url, rtmp_playpath)

            params = {
                "rtmp": rtmp_url,
                "playpath": rtmp_playpath,
                "pageUrl": self.url,
                "swfUrl": self.swf_url,
                "live": rtmp_live
            }

            yield n, RTMPStream(self.session, params)

        # HLS stream
        if hls_playpath and is_live:
            hls_url = hls_domain + hls_playpath
            hls_url = update_scheme(self.url, hls_url)

            self.logger.debug("HLS URL: {0}", hls_url)

            for s in HLSStream.parse_variant_playlist(self.session, hls_url).items():
                yield s

        if not (rtmp_playpath or hls_playpath):
            self.logger.error("This cam stream appears to be in offline or "
                              "snapshot mode and not live stream can be played.")
            return
コード例 #20
0
class IDF1(Plugin):
    DACAST_API_URL = 'https://json.dacast.com/b/{}/{}/{}'
    DACAST_TOKEN_URL = 'https://services.dacast.com/token/i/b/{}/{}/{}'

    _url_re = re.compile(
        r'http://www\.idf1\.fr/(videos/[^/]+/[^/]+\.html|live\b)')
    _video_id_re = re.compile(
        r"dacast\('(?P<broadcaster_id>\d+)_(?P<video_type>[a-z]+)_(?P<video_id>\d+)', 'replay_content', data\);"
    )
    _video_id_alt_re = re.compile(
        r'<script src="//player.dacast.com/js/player.js" id="(?P<broadcaster_id>\d+)_(?P<video_type>[cf])_(?P<video_id>\d+)"'
    )
    _player_url = 'http://ssl.p.jwpcdn.com/player/v/7.12.6/jwplayer.flash.swf'

    _api_schema = validate.Schema(
        validate.transform(parse_json), {
            validate.optional('html5'): validate.all([
                {
                    'src': validate.url()
                },
            ], ),
            'hls': validate.url(),
            'hds': validate.url()
        },
        validate.transform(
            lambda x: [update_scheme(IDF1.DACAST_API_URL, x['hls']), x['hds']
                       ] + [y['src'] for y in x.get('html5', [])]))

    _token_schema = validate.Schema(validate.transform(parse_json),
                                    {'token': validate.text},
                                    validate.get('token'))

    _user_agent = useragents.IE_11

    @classmethod
    def can_handle_url(cls, url):
        return IDF1._url_re.match(url)

    def _get_streams(self):
        res = http.get(self.url)
        match = self._video_id_re.search(
            res.text) or self._video_id_alt_re.search(res.text)
        if match is None:
            return
        broadcaster_id = match.group('broadcaster_id')
        video_type = match.group('video_type')
        video_id = match.group('video_id')

        videos = http.get(self.DACAST_API_URL.format(broadcaster_id,
                                                     video_type, video_id),
                          schema=self._api_schema)
        token = http.get(self.DACAST_TOKEN_URL.format(broadcaster_id,
                                                      video_type, video_id),
                         schema=self._token_schema)
        parsed = []

        for video_url in videos:
            video_url += token

            # Ignore duplicate video URLs
            if video_url in parsed:
                continue
            parsed.append(video_url)

            # Ignore HDS streams (broken)
            if '.m3u8' in video_url:
                for s in HLSStream.parse_variant_playlist(
                        self.session, video_url).items():
                    yield s
コード例 #21
0
    def _get_streams(self):
        # Retrieve geolocation data
        res = http.get(self.GEO_URL)
        geo = http.json(res, schema=self._geo_schema)
        country_code = geo['reponse']['geo_info']['country_code']

        # Retrieve URL page and search for video ID
        res = http.get(self.url)
        if 'france.tv' in self.url:
            match = self._pluzz_video_id_re.search(res.text)
        elif 'ludo.fr' in self.url or 'zouzous.fr' in self.url:
            match = self._jeunesse_video_id_re.search(res.text)
        elif 'france3-regions.francetvinfo.fr' in self.url:
            match = self._f3_regions_video_id_re.search(res.text)
        elif 'sport.francetvinfo.fr' in self.url:
            match = self._sport_video_id_re.search(res.text)
        if match is None:
            return
        video_id = match.group('video_id')

        # Retrieve SWF player URL
        swf_url = None
        res = http.get(self.PLAYER_GENERATOR_URL)
        player_url = update_scheme(
            self.url,
            http.json(res, schema=self._player_schema)['result'])
        res = http.get(player_url)
        match = self._swf_re.search(res.text)
        if match is not None:
            swf_url = update_scheme(self.url, match.group(0))

        res = http.get(self.API_URL.format(video_id))
        videos = http.json(res, schema=self._api_schema)
        now = time.time()

        offline = False
        geolocked = False
        drm = False
        expired = False

        streams = []
        for video in videos['videos']:
            video_url = video['url']

            # Check whether video format is available
            if video['statut'] != 'ONLINE':
                offline = offline or True
                continue

            # Check whether video format is geo-locked
            if video['geoblocage'] is not None and country_code not in video[
                    'geoblocage']:
                geolocked = geolocked or True
                continue

            # Check whether video is DRM-protected
            if video['drm']:
                drm = drm or True
                continue

            # Check whether video format is expired
            available = False
            for interval in video['plages_ouverture']:
                available = (interval['debut'] or 0) <= now <= (interval['fin']
                                                                or sys.maxsize)
                if available:
                    break
            if not available:
                expired = expired or True
                continue

            # TODO: add DASH streams once supported
            if '.mpd' in video_url:
                continue

            if '.f4m' in video_url or 'france.tv' in self.url:
                res = http.get(self.TOKEN_URL.format(video_url))
                video_url = res.text

            if '.f4m' in video_url and swf_url is not None:
                for bitrate, stream in HDSStream.parse_manifest(
                        self.session, video_url, is_akamai=True,
                        pvswf=swf_url).items():
                    # HDS videos with data in their manifest fragment token
                    # doesn't seem to be supported by HDSStream. Ignore such
                    # stream (but HDS stream having only the hdntl parameter in
                    # their manifest token will be provided)
                    pvtoken = stream.request_params['params'].get(
                        'pvtoken', '')
                    match = self._hds_pv_data_re.search(pvtoken)
                    if match is None:
                        streams.append((bitrate, stream))
            elif '.m3u8' in video_url:
                for stream in HLSStream.parse_variant_playlist(
                        self.session, video_url).items():
                    streams.append(stream)
            # HBB TV streams are not provided anymore by France Televisions
            elif '.mp4' in video_url and '/hbbtv/' not in video_url:
                match = self._mp4_bitrate_re.match(video_url)
                if match is not None:
                    bitrate = match.group('bitrate')
                else:
                    # Fallback bitrate (seems all France Televisions MP4 videos
                    # seem have such bitrate)
                    bitrate = '1500k'
                streams.append((bitrate, HTTPStream(self.session, video_url)))

        if self.get_option("mux_subtitles") and videos['subtitles'] != []:
            substreams = {}
            for subtitle in videos['subtitles']:
                # TTML subtitles are available but not supported by FFmpeg
                if subtitle['format'] == 'ttml':
                    continue
                substreams[subtitle['type']] = HTTPStream(
                    self.session, subtitle['url'])

            for quality, stream in streams:
                yield quality, MuxedStream(self.session,
                                           stream,
                                           subtitles=substreams)
        else:
            for stream in streams:
                yield stream

        if offline:
            self.logger.error(
                'Failed to access stream, may be due to offline content')
        if geolocked:
            self.logger.error(
                'Failed to access stream, may be due to geo-restricted content'
            )
        if drm:
            self.logger.error(
                'Failed to access stream, may be due to DRM-protected content')
        if expired:
            self.logger.error(
                'Failed to access stream, may be due to expired content')
コード例 #22
0
    def _get_streams(self):
        """Try to find streams on every website.

        Returns:
            Playable video
                or
            New session url
        Raises:
            NoPluginError: if no video was found.
        """
        new_session_url = False

        self.url = update_scheme("http://", self.url)
        self.logger.debug("resolve.py - {0}".format(self.url))

        # GET website content
        o_res = self._res_text(self.url)

        # rtmp search, will only print the url.
        m_rtmp = _rtmp_re.search(o_res)
        if m_rtmp:
            self.logger.info("Found RTMP: {0}".format(m_rtmp.group("url")))

        # Playlist URL
        playlist_all = _playlist_re.findall(o_res)
        if playlist_all:
            # m_base is used for .f4m files that doesn't have a base_url
            m_base = self._stream_base_re.search(o_res)
            if m_base:
                stream_base = m_base.group("base")
            else:
                stream_base = ""

            playlist_list = self._make_url_list(playlist_all, self.url, url_type="playlist", stream_base=stream_base)
            if playlist_list:
                self.logger.debug("Found URL: {0}".format(", ".join(playlist_list)))
                return self._resolve_playlist(playlist_list)

        # iFrame URL
        iframe_list = []
        for _iframe_list in (_iframe_re.findall(o_res),
                             self._iframe_unescape(o_res)):
            if not _iframe_list:
                continue
            iframe_list += _iframe_list

        if iframe_list:
            # repair and filter iframe url list
            new_iframe_list = self._make_url_list(iframe_list, self.url, url_type="iframe")
            if new_iframe_list:
                self.logger.info("Found iframes: {0}".format(", ".join(new_iframe_list)))
                new_session_url = new_iframe_list[0]

        if not new_session_url:
            # search for window.location.href
            new_session_url = self._window_location(o_res)

        if new_session_url:
            return self.session.streams(new_session_url)

        raise NoPluginError
コード例 #23
0
    def set_option(self, key, value):
        """Sets general options used by plugins and streams originating
        from this session object.

        :param key: key of the option
        :param value: value to set the option to


        **Available options**:

        ======================== =========================================
        hds-live-edge            ( float) Specify the time live HDS
                                 streams will start from the edge of
                                 stream, default: ``10.0``

        hds-segment-attempts     (int) How many attempts should be done
                                 to download each HDS segment, default: ``3``

        hds-segment-threads      (int) The size of the thread pool used
                                 to download segments, default: ``1``

        hds-segment-timeout      (float) HDS segment connect and read
                                 timeout, default: ``10.0``

        hds-timeout              (float) Timeout for reading data from
                                 HDS streams, default: ``60.0``

        hls-live-edge            (int) How many segments from the end
                                 to start live streams on, default: ``3``

        hls-segment-attempts     (int) How many attempts should be done
                                 to download each HLS segment, default: ``3``

        hls-segment-threads      (int) The size of the thread pool used
                                 to download segments, default: ``1``

        hls-segment-timeout      (float) HLS segment connect and read
                                 timeout, default: ``10.0``

        hls-timeout              (float) Timeout for reading data from
                                 HLS streams, default: ``60.0``

        http-proxy               (str) Specify a HTTP proxy to use for
                                 all HTTP requests

        https-proxy              (str) Specify a HTTPS proxy to use for
                                 all HTTPS requests

        http-cookies             (dict or str) A dict or a semi-colon (;)
                                 delimited str of cookies to add to each
                                 HTTP request, e.g. ``foo=bar;baz=qux``

        http-headers             (dict or str) A dict or semi-colon (;)
                                 delimited str of headers to add to each
                                 HTTP request, e.g. ``foo=bar;baz=qux``

        http-query-params        (dict or str) A dict or a ampersand (&)
                                 delimited string of query parameters to
                                 add to each HTTP request,
                                 e.g. ``foo=bar&baz=qux``

        http-trust-env           (bool) Trust HTTP settings set in the
                                 environment, such as environment
                                 variables (HTTP_PROXY, etc) and
                                 ~/.netrc authentication

        http-ssl-verify          (bool) Verify SSL certificates,
                                 default: ``True``

        http-ssl-cert            (str or tuple) SSL certificate to use,
                                 can be either a .pem file (str) or a
                                 .crt/.key pair (tuple)

        http-timeout             (float) General timeout used by all HTTP
                                 requests except the ones covered by
                                 other options, default: ``20.0``

        http-stream-timeout      (float) Timeout for reading data from
                                 HTTP streams, default: ``60.0``

        subprocess-errorlog      (bool) Log errors from subprocesses to
                                 a file located in the temp directory

        subprocess-errorlog-path (str) Log errors from subprocesses to
                                 a specific file

        ringbuffer-size          (int) The size of the internal ring
                                 buffer used by most stream types,
                                 default: ``16777216`` (16MB)

        rtmp-proxy               (str) Specify a proxy (SOCKS) that RTMP
                                 streams will use

        rtmp-rtmpdump            (str) Specify the location of the
                                 rtmpdump executable used by RTMP streams,
                                 e.g. ``/usr/local/bin/rtmpdump``

        rtmp-timeout             (float) Timeout for reading data from
                                 RTMP streams, default: ``60.0``

        ffmpeg-ffmpeg            (str) Specify the location of the
                                 ffmpeg executable use by Muxing streams
                                 e.g. ``/usr/local/bin/ffmpeg``

        ffmpeg-verbose           (bool) Log stderr from ffmpeg to the
                                 console

        ffmpeg-verbose-path      (str) Specify the location of the
                                 ffmpeg stderr log file

        ffmpeg-video-transcode   (str) The codec to use if transcoding
                                 video when muxing with ffmpeg
                                 e.g. ``h264``

        ffmpeg-audio-transcode   (str) The codec to use if transcoding
                                 audio when muxing with ffmpeg
                                 e.g. ``aac``

        stream-segment-attempts  (int) How many attempts should be done
                                 to download each segment, default: ``3``.
                                 General option used by streams not
                                 covered by other options.

        stream-segment-threads   (int) The size of the thread pool used
                                 to download segments, default: ``1``.
                                 General option used by streams not
                                 covered by other options.

        stream-segment-timeout   (float) Segment connect and read
                                 timeout, default: ``10.0``.
                                 General option used by streams not
                                 covered by other options.

        stream-timeout           (float) Timeout for reading data from
                                 stream, default: ``60.0``.
                                 General option used by streams not
                                 covered by other options.

        locale                   (str) Locale setting, in the RFC 1766 format
                                 eg. en_US or es_ES
                                 default: ``system locale``.
        ======================== =========================================

        """

        # Backwards compatibility
        if key == "rtmpdump":
            key = "rtmp-rtmpdump"
        elif key == "rtmpdump-proxy":
            key = "rtmp-proxy"
        elif key == "errorlog":
            key = "subprocess-errorlog"
        elif key == "errorlog-path":
            key = "subprocess-errorlog-path"

        if key == "http-proxy":
            self.http.proxies["http"] = update_scheme("http://", value)
        elif key == "https-proxy":
            self.http.proxies["https"] = update_scheme("https://", value)
        elif key == "http-cookies":
            if isinstance(value, dict):
                self.http.cookies.update(value)
            else:
                self.http.parse_cookies(value)
        elif key == "http-headers":
            if isinstance(value, dict):
                self.http.headers.update(value)
            else:
                self.http.parse_headers(value)
        elif key == "http-query-params":
            if isinstance(value, dict):
                self.http.params.update(value)
            else:
                self.http.parse_query_params(value)
        elif key == "http-trust-env":
            self.http.trust_env = value
        elif key == "http-ssl-verify":
            self.http.verify = value
        elif key == "http-disable-dh":
            if value:
                requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS += ':!DH'
                try:
                    requests.packages.urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST = \
                        requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS.encode("ascii")
                except AttributeError:
                    # no ssl to disable the cipher on
                    pass
        elif key == "http-ssl-cert":
            self.http.cert = value
        elif key == "http-timeout":
            self.http.timeout = value
        else:
            self.options.set(key, value)
コード例 #24
0
    def _get_streams(self):
        res = http.get(self.url)
        match = self._player_js.search(res.text)
        if match:
            player_js = match.group(0)
            self.logger.info("Found player js {0}", player_js)
        else:
            self.logger.info(
                "Didn't find player js. Probably this page doesn't contain a video"
            )
            return

        res = http.get(player_js)
        m = self._data_re.search(res.text)
        if not m:
            self.logger.info(
                "Couldn't extract json metadata from player.js: {0}",
                player_js)
            return

        stream_metadata = json.loads(m.group("data"))
        is_video = stream_metadata["mediaType"] in ["live", "vod"]
        is_audio = stream_metadata["mediaType"] == "aod"

        media_version = tuple([
            int(d)
            for d in stream_metadata["mediaVersion"].split("-")[0].split(".")
        ])

        if is_video or is_audio:
            media_url = stream_metadata["mediaResource"]["dflt"][
                "videoURL" if is_video else "audioURL"]
            media_url_alt = stream_metadata["mediaResource"]["alt"][
                "videoURL" if is_video else "audioURL"]
            media_name = "audio" if is_audio else "vod"

            if media_version >= (1, 2, 0):
                media_format = stream_metadata["mediaResource"]["dflt"][
                    "mediaFormat"]
                media_format_alt = stream_metadata["mediaResource"]["alt"][
                    "mediaFormat"]
            else:
                media_format = stream_metadata["mediaFormat"]
                media_format_alt = media_url_alt[-4:]

            stream_url = {
                "url": media_url,
                "format": media_format,
                "name": media_name
            }

            stream_url_alt = {
                "url": media_url_alt,
                "format": media_format_alt,
                "name": media_name
            }

            for stream in [stream_url, stream_url_alt]:
                url = update_scheme("http://", stream["url"])
                try:
                    if stream["format"] in ["hds", ".f4m"]:
                        for s in HDSStream.parse_manifest(
                                self.session, url, is_akamai=True).items():
                            yield s
                    elif stream["format"] in ["hls", "m3u8"]:
                        streams = HLSStream.parse_variant_playlist(
                            self.session, url).items()
                        if not streams:
                            yield "live", HLSStream(self.session, url)
                        for s in streams:
                            yield s
                    elif stream["format"] in ["mp3", "mp4", ".mp3", ".mp4"]:
                        yield stream["name"], HTTPStream(self.session, url)
                except IOError as err:
                    self.logger.error("Failed to extract {0} streams: {1}",
                                      stream["format"], err)
コード例 #25
0
    def _make_url_list(self, old_list, base_url, stream_base=""):
        """Creates a list of validate urls from a list of broken urls
           and removes every blacklisted url

        Args:
            old_list: List of broken urls
            base_url: url that will get used for scheme and netloc
            stream_base: basically same as base_url, but used for .f4m files.

        Returns:
            List of validate urls
        """
        blacklist_netloc_user = self.get_option("blacklist_netloc")
        blacklist_netloc = (
            "about:blank",
            "adfox.ru",
            "googletagmanager.com",
            "javascript:false",
        )

        blacklist_path = [
            ("facebook.com", "/plugins"),
            ("vesti.ru", "/native_widget.html"),
        ]
        # Add --resolve-blacklist-path to blacklist_path
        blacklist_path_user = self.get_option("blacklist_path")
        if blacklist_path_user is not None:
            for _path_url in blacklist_path_user:
                if not _path_url.startswith(("http", "//")):
                    _path_url = update_scheme("http://", _path_url)
                _parsed_path_url = urlparse(_path_url)
                if _parsed_path_url.netloc and _parsed_path_url.path:
                    blacklist_path += [(_parsed_path_url.netloc,
                                        _parsed_path_url.path)]

        new_list = []
        for url in old_list:
            # Don't add the same url as self.url to the list.
            if url == self.url:
                continue
            # Repair the scheme
            new_url = url.replace("\\", "")
            if new_url.startswith("http&#58;//"):
                new_url = "http:" + new_url[9:]
            elif new_url.startswith("https&#58;//"):
                new_url = "https:" + new_url[10:]
            # Repair the domain
            if stream_base and new_url[1] is not "/":
                if new_url[0] is "/":
                    new_url = new_url[1:]
                new_url = urljoin(stream_base, new_url)
            else:
                new_url = urljoin(base_url, new_url)
            # Parse the url and remove not wanted urls
            parse_new_url = urlparse(new_url)
            REMOVE = False
            # Removes blacklisted domains
            if REMOVE is False and parse_new_url.netloc.endswith(
                    blacklist_netloc):
                REMOVE = True
            # Removes blacklisted domains from --resolve-blacklist-netloc
            if REMOVE is False and blacklist_netloc_user is not None and parse_new_url.netloc.endswith(
                    tuple(blacklist_netloc_user)):
                REMOVE = True
            # Removes blacklisted paths from a domain
            if REMOVE is False:
                for netloc, path in blacklist_path:
                    if parse_new_url.netloc.endswith(
                            netloc) and parse_new_url.path.startswith(path):
                        REMOVE = True
                        continue
            # Removes images and chatrooms
            if REMOVE is False and parse_new_url.path.endswith(
                (".jpg", ".png", ".svg", "/chat")):
                REMOVE = True
            # Remove obviously ad urls
            if REMOVE is False and self._ads_path.match(parse_new_url.path):
                REMOVE = True
            if REMOVE is True:
                self.logger.debug("Removed url: {0}".format(new_url))
                continue
            # Add url to the list
            new_list += [new_url]
        # Remove duplicates
        new_list = list(set(new_list))
        return new_list