Exemple #1
0
    def _get_content_hls_url(self, content_id):
        d = self.session.http.get(
            urljoin(self.url, self.content_api.format(id=content_id)))
        d = self.session.http.json(d, schema=self.content_api_schema)

        return urljoin((d["serviceUrl"] or d["defaultServiceUrl"]),
                       d["securePath"])
    def _get_streams(self):
        api_url = self.session.http.get(self.url,
                                        schema=self._data_content_schema)
        if api_url and (api_url.startswith("/") or api_url.startswith("http")):
            api_url = urljoin(self.url, api_url)
            stream_url = self.session.http.get(api_url,
                                               schema=self._api_schema,
                                               headers={"Referer": self.url})
        elif api_url and api_url.startswith("[{"):
            stream_url = self._api_schema.validate(api_url)
        else:
            if api_url is not None:
                log.error(
                    "_data_content_schema returns invalid data: {0}".format(
                        api_url))
            return

        parsed = urlparse(stream_url)
        api_url = urljoin(
            self.url,
            self._token_api_path.format(url=stream_url,
                                        netloc="{0}://{1}".format(
                                            parsed.scheme, parsed.netloc),
                                        time=int(time())))
        stream_url = self.session.http.get(api_url,
                                           schema=self._stream_schema,
                                           headers={"Referer": self.url})
        return HLSStream.parse_variant_playlist(self.session, stream_url)
Exemple #3
0
    def get_live(self, id):
        res = self.session.http.get(self.live_api_url.format(id))
        user_data = self.session.http.json(res, schema=self.live_schema)

        if user_data['channel']['is_streaming']:
            self.category = 'Live'
            stream_id = user_data['channel']['channel_id']
        elif 'hostee' in user_data['channel']:
            self.category = 'Hosted by {}'.format(
                user_data["channel"]["hostee"]["nickname"])
            stream_id = user_data['channel']['hostee']['channel_id']
        else:
            log.info('User is offline')
            return

        self.author = user_data['user']['nickname']
        self.title = user_data['channel']['name']

        res = self.session.http.get(self.streams_api_url.format(stream_id))
        streams = self.session.http.json(res,
                                         schema=validate.Schema({
                                             "default_mirror":
                                             validate.text,
                                             "mirror_list": [{
                                                 "name":
                                                 validate.text,
                                                 "url_domain":
                                                 validate.url(),
                                             }],
                                             "source_stream_url_path":
                                             validate.text,
                                             "stream_addr_list": [{
                                                 "resolution":
                                                 validate.text,
                                                 "url_path":
                                                 validate.text,
                                             }],
                                         }))

        mirror = (next(
            filter(lambda item: item["name"] == streams["default_mirror"],
                   streams["mirror_list"]), None)
                  or next(iter(streams["mirror_list"]), None))
        if not mirror:
            return

        auto = next(
            filter(lambda item: item["resolution"] == "Auto",
                   streams["stream_addr_list"]), None)
        if auto:
            for s in HLSStream.parse_variant_playlist(
                    self.session,
                    urljoin(mirror["url_domain"], auto["url_path"])).items():
                yield s

        if streams["source_stream_url_path"]:
            yield "source", HLSStream(
                self.session,
                urljoin(mirror["url_domain"],
                        streams["source_stream_url_path"]))
Exemple #4
0
    def _get_streams(self):
        """
        Get the config object from the page source and call the
        API to get the list of streams
        :return:
        """
        # attempt a login
        self.login()

        res = self.session.http.get(self.url)
        # decode the config for the page
        matches = self.config_re.finditer(res.text)
        try:
            config = self.config_schema.validate(
                dict([m.group("key", "value") for m in matches]))
        except PluginError:
            return

        if config["selectedVideoHID"]:
            log.debug("Found video hash ID: {0}".format(
                config["selectedVideoHID"]))
            api_url = urljoin(
                self.url,
                urljoin(config["videosURL"], config["selectedVideoHID"]))
        elif config["livestreamURL"]:
            log.debug("Found live stream URL: {0}".format(
                config["livestreamURL"]))
            api_url = urljoin(self.url, config["livestreamURL"])
        else:
            return

        ares = self.session.http.get(api_url)
        data = self.session.http.json(ares, schema=self.api_schema)
        viewing_urls = data["viewing_urls"]

        if "error" in viewing_urls:
            log.error("Failed to load streams: {0}".format(
                viewing_urls["error"]))
        else:
            for url in viewing_urls["urls"]:
                try:
                    label = "{0}p".format(url.get("res", url["label"]))
                except KeyError:
                    label = "live"

                if url["type"] == "rtmp/mp4" and RTMPStream.is_usable(
                        self.session):
                    params = {
                        "rtmp": url["src"],
                        "pageUrl": self.url,
                        "live": True,
                    }
                    yield label, RTMPStream(self.session, params)

                elif url["type"] == "application/x-mpegURL":
                    for s in HLSStream.parse_variant_playlist(
                            self.session, url["src"]).items():
                        yield s
 def _get_vod_stream(self, vod_id):
     res = http.get(self.api_url.format(type="recordings", id=vod_id))
     for sdata in http.json(res, schema=self._vod_schema):
         if sdata["format"] == "hls":
             hls_url = urljoin(sdata["url"], "manifest.m3u8")
             yield "{0}p".format(sdata["height"]), HLSStream(self.session, hls_url)
         elif sdata["format"] == "raw":
             raw_url = urljoin(sdata["url"], "source.mp4")
             yield "{0}p".format(sdata["height"]), HTTPStream(self.session, raw_url)
Exemple #6
0
    def _get_api_info(self, page):
        for div in itertags(page.text, 'div'):
            if div.attributes.get("class") == "vrtvideo":
                api_base = div.attributes.get("data-mediaapiurl") + "/"

                data = {"token_url": urljoin(api_base, "tokens")}
                if div.attributes.get("data-videotype") == "live":
                    data["stream_url"] = urljoin(urljoin(api_base, "videos/"), div.attributes.get("data-livestream"))
                else:
                    resource = "{0}%24{1}".format(div.attributes.get("data-publicationid"), div.attributes.get("data-videoid"))
                    data["stream_url"] = urljoin(urljoin(api_base, "videos/"), resource)
                return data
Exemple #7
0
    def _get_api_info(self, page):
        for div in itertags(page.text, 'div'):
            if div.attributes.get("class") == "vrtvideo":
                api_base = div.attributes.get("data-mediaapiurl") + "/"

                data = {"token_url": urljoin(api_base, "tokens")}
                if div.attributes.get("data-videotype") == "live":
                    data["stream_url"] = urljoin(urljoin(api_base, "videos/"), div.attributes.get("data-livestream"))
                else:
                    resource = "{0}%24{1}".format(div.attributes.get("data-publicationid"), div.attributes.get("data-videoid"))
                    data["stream_url"] = urljoin(urljoin(api_base, "videos/"), resource)
                return data
Exemple #8
0
    def _get_streams(self):
        """
        Get the config object from the page source and call the
        API to get the list of streams
        :return:
        """
        # attempt a login
        self.login()

        res = http.get(self.url)
        # decode the config for the page
        matches = self.config_re.finditer(res.text)
        try:
            config = self.config_schema.validate(dict(
                [m.group("key", "value") for m in matches]
            ))
        except PluginError:
            return

        if config["selectedVideoHID"]:
            self.logger.debug("Found video hash ID: {0}", config["selectedVideoHID"])
            api_url = urljoin(self.url, urljoin(config["videosURL"], config["selectedVideoHID"]))
        elif config["livestreamURL"]:
            self.logger.debug("Found live stream URL: {0}", config["livestreamURL"])
            api_url = urljoin(self.url, config["livestreamURL"])
        else:
            return

        ares = http.get(api_url)
        data = http.json(ares, schema=self.api_schema)
        viewing_urls = data["viewing_urls"]

        if "error" in viewing_urls:
            self.logger.error("Failed to load streams: {0}", viewing_urls["error"])
        else:
            for url in viewing_urls["urls"]:
                try:
                    label = "{0}p".format(url.get("res", url["label"]))
                except KeyError:
                    label = "live"

                if url["type"] == "rtmp/mp4" and RTMPStream.is_usable(self.session):
                    params = {
                        "rtmp": url["src"],
                        "pageUrl": self.url,
                        "live": True,
                    }
                    yield label, RTMPStream(self.session, params)

                elif url["type"] == "application/x-mpegURL":
                    for s in HLSStream.parse_variant_playlist(self.session, url["src"]).items():
                        yield s
Exemple #9
0
    def _get_new_content_hls_url(self, content_id, api_url):
        log.debug("Using new content API url")
        d = self.session.http.get(urljoin(self.url, api_url.format(id=content_id)))
        d = self.session.http.json(d, schema=self.new_content_api_schema)

        if d["DefaultServiceUrl"] == "https://www.kanald.com.tr":
            self.url = d["DefaultServiceUrl"]
            return self._get_content_hls_url(content_id)
        else:
            if d["SecurePath"].startswith("http"):
                return d["SecurePath"]
            else:
                return urljoin((d["ServiceUrl"] or d["DefaultServiceUrl"]), d["SecurePath"])
Exemple #10
0
    def _get_hls_url(self, content_id):
        # make the api url relative to the current domain
        if "cnnturk" in self.url or "teve2.com.tr" in self.url:
            self.logger.debug("Using new content API url")
            api_url = urljoin(self.url, self.new_content_api.format(id=content_id))
        else:
            api_url = urljoin(self.url, self.content_api.format(id=content_id))

        apires = http.get(api_url)

        stream_data = http.json(apires, schema=self.content_api_schema)
        d = stream_data["Media"]["Link"]
        return urljoin((d["ServiceUrl"] or d["DefaultServiceUrl"]), d["SecurePath"])
Exemple #11
0
    def _get_hls_url(self, content_id):
        # make the api url relative to the current domain
        if "cnnturk" in self.url or "teve2.com.tr" in self.url:
            self.logger.debug("Using new content API url")
            api_url = urljoin(self.url, self.new_content_api.format(id=content_id))
        else:
            api_url = urljoin(self.url, self.content_api.format(id=content_id))

        apires = self.session.http.get(api_url)

        stream_data = self.session.http.json(apires, schema=self.content_api_schema)
        d = stream_data["Media"]["Link"]
        return urljoin((d["ServiceUrl"] or d["DefaultServiceUrl"]), d["SecurePath"])
    def _get_streams(self):
        """
            Find all the streams for the ITV url
            :return: Mapping of quality to stream
        """
        http.headers.update({"User-Agent": useragents.FIREFOX})
        video_info = self.video_info()
        video_info_url = video_info.get(
            "data-html5-playlist") or video_info.get("data-video-id")

        res = http.post(video_info_url,
                        data=json.dumps(self.device_info),
                        headers={"hmac": video_info.get("data-video-hmac")})
        data = http.json(res, schema=self._video_info_schema)

        log.debug("Video ID info response: {0}".format(data))

        stype = data['Playlist']['VideoType']

        for media in data['Playlist']['Video']['MediaFiles']:
            url = urljoin(data['Playlist']['Video']['Base'], media['Href'])
            name_fmt = "{pixels}_{bitrate}" if stype == "CATCHUP" else None
            for s in HLSStream.parse_variant_playlist(
                    self.session, url, name_fmt=name_fmt).items():
                yield s
Exemple #13
0
    def _get_streams(self):

        headers = {'User-Agent': CHROME}

        res = self.session.http.get(self.url, headers=headers)

        param = self._param_re.search(res.text).group('param')

        _json_url = urljoin(self._base_link, param)

        _json_object = self.session.http.get(_json_url, headers=headers).json()

        stream = _json_object.get('url')

        if stream.endswith('.mp4'):
            raise NoStreamsError('Stream is probably geo-locked to Greece')

        headers.update({"Referer": self.url})

        parse_hls = bool(strtobool(self.get_option('parse_hls')))

        if parse_hls:
            return HLSStream.parse_variant_playlist(self.session,
                                                    stream,
                                                    headers=headers)
        else:
            return dict(live=HTTPStream(self.session, stream, headers=headers))
Exemple #14
0
 def uri(self, uri):
     if uri and urlparse(uri).scheme:
         return uri
     elif self.base_uri and uri:
         return urljoin(self.base_uri, uri)
     else:
         return uri
Exemple #15
0
    def get_data(self):
        js_data = {}
        res = self.session.http.get(self.url)

        m = self.is_live_channel_re.search(res.text)
        if not m:
            return

        if m.group(1) == "true":
            js_data['type'] = 'channel'
        else:
            js_data['type'] = 'episode'

        m = self.main_chunk_js_url_re.search(res.text)
        if not m:
            log.error('Failed to get main chunk JS URL')
            return

        res = self.session.http.get(urljoin(self.url, m.group(1)))

        m = self.js_credentials_re.search(res.text)
        if not m:
            log.error('Failed to get credentials')
            return

        js_data['credentials'] = m.groupdict()

        m = self.js_cipher_data_re.search(res.text)
        if not m:
            log.error('Failed to get cipher data')
            return

        js_data['cipher_data'] = m.groupdict()

        return js_data
    def _get_streams(self):

        headers = {'User-Agent': CHROME}

        if self.url.endswith('/live'):
            live = True
        else:
            live = False

        res = self.session.http.get(self.url, headers=headers)

        match = self._param_re.search(res.text)

        param = match.group('param')

        if not live:
            param = '?'.join([param, 'cid={0}'.format(match.group('id'))])

        _json_url = urljoin(self._base_link, param)

        _json_object = self.session.http.get(_json_url, headers=headers).json()

        stream = _json_object.get('stream').strip()

        headers.update({"Referer": self.url})

        try:
            parse_hls = bool(strtobool(self.get_option('parse_hls')))
        except AttributeError:
            parse_hls = True

        if parse_hls:
            return HLSStream.parse_variant_playlist(self.session, stream, headers=headers)
        else:
            return dict(stream=HTTPStream(self.session, stream, headers=headers))
Exemple #17
0
    def process_module_info(self):
        if self.closed:
            return

        try:
            result = self.fetch_module_info()
        except PluginError as err:
            self.logger.error("{0}", err)
            return

        providers = result.get("stream")
        if not providers or providers == "offline":
            self.logger.debug("Stream went offline")
            self.close()
            return

        for provider in providers:
            if provider.get("name") == self.stream.provider:
                break
        else:
            return

        try:
            stream = provider["streams"][self.stream.stream_index]
        except IndexError:
            self.logger.error("Stream index not in result")
            return

        filename_format = stream["streamName"].replace("%", "%s")
        filename_format = urljoin(provider["url"], filename_format)

        self.filename_format = filename_format
        self.update_chunk_info(stream)
    def _get_streams(self):
        res = self.session.http.get(self.url)
        data_url = self._player_url_schema.validate(res.text)
        if not data_url:
            log.error("Could not find video at this url.")
            return

        data_url = urljoin(res.url, data_url)
        log.debug("Player URL: '{0}'", data_url)
        res = self.session.http.get(data_url)
        mediainfo = parse_json(res.text,
                               name="MEDIAINFO",
                               schema=self._mediainfo_schema)
        log.trace("Mediainfo: {0!r}".format(mediainfo))

        for media in mediainfo["_mediaArray"]:
            for stream in media["_mediaStreamArray"]:
                stream_ = stream["_stream"]
                if isinstance(stream_, list):
                    if not stream_:
                        continue
                    stream_ = stream_[0]

                if ".m3u8" in stream_:
                    for s in HLSStream.parse_variant_playlist(
                            self.session, stream_).items():
                        yield s
                elif (".mp4" in stream_ and ".f4m" not in stream_):
                    yield "{0}".format(
                        self._QUALITY_MAP[stream["_quality"]]), HTTPStream(
                            self.session, stream_)
                else:
                    if ".f4m" not in stream_:
                        log.error(
                            "Unexpected stream type: '{0}'".format(stream_))
 def uri(self, uri):
     if uri and urlparse(uri).scheme:
         return uri
     elif self.base_uri and uri:
         return urljoin(self.base_uri, uri)
     else:
         return uri
Exemple #20
0
    def _get_vod_stream(self, vod_id):
        res = self._get_api_res("recordings", vod_id)

        for sdata in self.session.http.json(res, schema=self._vod_schema):
            if sdata["format"] == "hls":
                hls_url = urljoin(sdata["url"], "manifest.m3u8")
                yield "{0}p".format(sdata["height"]), HLSStream(self.session, hls_url)
    def process_module_info(self):
        if self.closed:
            return

        try:
            result = self.fetch_module_info()
        except PluginError as err:
            self.logger.error("{0}", err)
            return

        providers = result.get("stream")
        if not providers or providers == "offline":
            self.logger.debug("Stream went offline")
            self.close()
            return

        for provider in providers:
            if provider.get("name") == self.stream.provider:
                break
        else:
            return

        try:
            stream = provider["streams"][self.stream.stream_index]
        except IndexError:
            self.logger.error("Stream index not in result")
            return

        filename_format = stream["streamName"].replace("%", "%s")
        filename_format = urljoin(provider["url"], filename_format)

        self.filename_format = filename_format
        self.update_chunk_info(stream)
Exemple #22
0
    def _get_vod(self, user_id, video_name):
        res = self.session.http.get(urljoin(self.api_url,
                                            "getVideoByFileName"),
                                    params=dict(userId=user_id,
                                                videoName=video_name,
                                                serverType="web",
                                                callback="x"))

        vod_data = self.session.http.json(res, schema=self.vod_schema)

        if video_name == vod_data['ShowTitle']:
            host, base_path = self.server_addr_re.search(
                vod_data['ServerAddress']).groups()
            if not host or not base_path:
                raise PluginError("Could not split 'ServerAddress' components")

            base_file, file_ext = self.media_file_re.search(
                vod_data['MediaFile']).groups()
            if not base_file or not file_ext:
                raise PluginError("Could not split 'MediaFile' components")

            media_path = "{0}{1}{2}{3}{4}{5}".format(
                base_path, vod_data['MediaRoot'], base_file,
                vod_data['Bitrates'], file_ext, vod_data['StreamingType'])
            log.debug("Media path={0}".format(media_path))

            vod_url = urlunparse((vod_data['ProtocolType'], host, media_path,
                                  '', vod_data['Token'], ''))
            log.debug("URL={0}".format(vod_url))

            return HLSStream.parse_variant_playlist(self.session, vod_url)
Exemple #23
0
    def _parse_smil(self, url, swf_url):
        res = http.get(url)
        smil = http.xml(res, "SMIL config", schema=_smil_schema)

        for src, bitrate in smil["videos"]:
            url = urljoin(smil["http_base"], src)
            yield bitrate, AkamaiHDStream(self.session, url, swf=swf_url)
Exemple #24
0
    def _get_streams(self):
        m = self.url_re.match(self.url)
        url_type = m and m.group(1)
        log.debug("URL type={0}".format(url_type))

        res = self.session.http.get(self.url)

        if url_type != "live":
            m = self.video_name_re.search(res.text)
            video_name = m and m.group(1)
            if not video_name:
                raise PluginError('Could not determine video_name')
            log.debug("Video name={0}".format(video_name))

        m = self.main_js_url_re.search(res.text)
        main_js_path = m and m.group(1)
        if not main_js_path:
            raise PluginError('Could not determine main_js_path')
        log.debug("Main JS path={0}".format(main_js_path))

        res = self.session.http.get(urljoin(self.url, main_js_path))

        m = self.user_id_re.search(res.text)
        user_id = m and m.group(1)
        if not user_id:
            raise PluginError('Could not determine user_id')
        log.debug("User ID={0}".format(user_id))

        if url_type == "live":
            return self._get_live(user_id)
        else:
            return self._get_vod(user_id, video_name)
Exemple #25
0
    def _get_streams(self):
        res = self.session.http.get(self.url)

        # some pages have embedded players
        iframe_m = self.iframe_re.search(res.text)
        if iframe_m:
            url = urljoin(self.url, iframe_m.group("url"))
            res = self.session.http.get(url)

        video = self.src_re.search(res.text)
        stream_src = video and video.group("url")

        if stream_src and stream_src.endswith("m3u8"):
            # do not open empty m3u8 files
            if len(self.session.http.get(stream_src).text) <= 10:
                log.error("This stream is currently offline")
                return

            log.debug("URL={0}".format(stream_src))
            streams = HLSStream.parse_variant_playlist(self.session,
                                                       stream_src)
            if not streams:
                return {"live": HLSStream(self.session, stream_src)}
            else:
                return streams
Exemple #26
0
    def _get_vod_stream(self, vod_id):
        res = self._get_api_res("recordings", vod_id)

        for sdata in http.json(res, schema=self._vod_schema):
            if sdata["format"] == "hls":
                hls_url = urljoin(sdata["url"], "manifest.m3u8")
                yield "{0}p".format(sdata["height"]), HLSStream(self.session, hls_url)
    def _parse_smil(self, url, swf_url):
        res = http.get(url)
        smil = http.xml(res, "SMIL config", schema=_smil_schema)

        for src, bitrate in smil["videos"]:
            url = urljoin(smil["http_base"], src)
            yield bitrate, AkamaiHDStream(self.session, url, swf=swf_url)
Exemple #28
0
    def _get_streams(self):
        data_url = self.session.http.get(self.url,
                                         schema=self._player_url_schema)
        if data_url:
            res = self.session.http.get(urljoin(self.url, data_url))
            stream_info = self.session.http.xml(res,
                                                schema=self._livestream_schema)

            for stream in stream_info:
                url = stream["url"]
                try:
                    if ".m3u8" in url:
                        for s in HLSStream.parse_variant_playlist(
                                self.session, url, name_key="bitrate").items():
                            yield s
                    elif ".f4m" in url:
                        for s in HDSStream.parse_manifest(
                                self.session,
                                url,
                                pvswf=self.swf_url,
                                is_akamai=True).items():
                            yield s
                    elif ".mp4" in url:
                        yield "{0}k".format(stream["bitrate"]), HTTPStream(
                            self.session, url)
                except IOError as err:
                    self.logger.warning("Error parsing stream: {0}", err)
Exemple #29
0
 def repair_url(self, url, base_url, stream_base=''):
     # remove \
     new_url = url.replace('\\', '')
     # repairs broken scheme
     if new_url.startswith('http&#58;//'):
         new_url = 'http:' + new_url[9:]
     elif new_url.startswith('https&#58;//'):
         new_url = 'https:' + new_url[10:]
     # creates a valid url from path only urls
     # and adds missing scheme for // urls
     if stream_base and new_url[1] != '/':
         if new_url[0] == '/':
             new_url = new_url[1:]
         new_url = urljoin(stream_base, new_url)
     else:
         new_url = urljoin(base_url, new_url)
     return new_url
Exemple #30
0
    def _window_location(self):
        match = self._window_location_re.search(self.html_text)
        if match:
            temp_url = urljoin(self.url, match.group('url'))
            log.debug('Found window_location: {0}'.format(temp_url))
            return temp_url

        log.trace('No window_location')
        return False
Exemple #31
0
    def _get_source_streams(self):
        res = self.session.http.get(self.url)

        for atag in itertags(res.text, 'a'):
            if "video-play__link" in atag.attributes.get("class", ""):
                href = urljoin(self.url, atag.attributes.get("href"))
                log.debug("Loading embedded video page")
                vpage = self.session.http.get(href, params=dict(ajax="true", npo_cc_skip_wall="true"))
                for source in itertags(vpage.text, 'source'):
                    return HLSStream.parse_variant_playlist(self.session, source.attributes.get("src"))
    def create_hls_url(self, suffix):
        """
        creates a valid hls_url
        :param suffix: url session params
        :return: hls_url
        """
        m = self._suffix_re.search(suffix)
        if not m:
            return
        self.logger.debug("create hls_url from suffix")

        channel = self._url_re.match(self.url).group("channel")
        domain = self._channel_domains[channel]
        path = m.group("path")
        path_suffix = "master.m3u8?{suffix}".format(suffix=suffix)

        hls_url = urljoin(domain, path)
        hls_url = urljoin(hls_url, path_suffix)
        return hls_url
Exemple #33
0
    def create_hls_url(self, suffix):
        """
        creates a valid hls_url
        :param suffix: url session params
        :return: hls_url
        """
        m = self._suffix_re.search(suffix)
        if not m:
            return
        self.logger.debug("create hls_url from suffix")

        channel = self._url_re.match(self.url).group("channel")
        domain = self._channel_domains[channel]
        path = m.group("path")
        path_suffix = "master.m3u8?{suffix}".format(suffix=suffix)

        hls_url = urljoin(domain, path)
        hls_url = urljoin(hls_url, path_suffix)
        return hls_url
    def handle_module_info(self, args):
        res = {}
        for arg in args:
            if "cdnConfig" in arg:
                parts = [
                    # scheme
                    arg["cdnConfig"]["protocol"],
                    # netloc
                    arg["cdnConfig"]["data"][0]["data"][0]["sites"][0]["host"],
                    # path
                    arg["cdnConfig"]["data"][0]["data"][0]["sites"][0]["path"],
                    "",
                    "",
                    "",  # params, query, fragment
                ]
                # Example:
                # LIVE: http://uhs-akamai.ustream.tv/
                # VOD:  http://vod-cdn.ustream.tv/
                res["cdn_url"] = urlunparse(parts)
            if "stream" in arg and bool(arg["stream"].get("streamFormats")):
                data = arg["stream"]
                if data["streamFormats"].get("flv/segmented"):
                    flv_segmented = data["streamFormats"]["flv/segmented"]
                    path = flv_segmented["contentAccess"]["accessList"][0][
                        "data"]["path"]

                    res["streams"] = []
                    for stream in flv_segmented["streams"]:
                        res["streams"] += [
                            dict(
                                stream_name="{0}p".format(
                                    stream["videoCodec"]["height"]),
                                path=urljoin(
                                    path,
                                    stream["segmentUrl"].replace("%", "%s")),
                                hashes=flv_segmented["hashes"],
                                first_chunk=flv_segmented["chunkId"],
                                chunk_time=flv_segmented["chunkTime"],
                            )
                        ]
                elif bool(data["streamFormats"]):
                    # supported formats:
                    # - flv/segmented
                    # unsupported formats:
                    # - flv
                    # - mp4
                    # - mp4/segmented
                    raise PluginError(
                        "Stream format is not supported: {0}".format(", ".join(
                            data["streamFormats"].keys())))
            elif "stream" in arg and arg["stream"]["contentAvailable"] is False:
                log.error("This stream is currently offline")
                raise ModuleInfoNoStreams

        return res
Exemple #35
0
    def _get_streams(self):
        res = http.get(self.url, headers={"User-Agent": useragents.CHROME})
        m = self.js_re.search(res.text)
        if m:
            self.logger.debug("Found js key: {0}", m.group(1))
            js_url = m.group(0)
            res = http.get(urljoin(self.url, js_url))

            for url in self.player_re.findall(res.text):
                if "adblock" not in url:
                    yield "live", HLSStream(self.session, url)
 def join(url, other):
     # if the other URL is an absolute url, then return that
     if urlparse(other).scheme:
         return other
     elif url:
         parts = list(urlsplit(url))
         if not parts[2].endswith("/"):
             parts[2] += "/"
         url = urlunsplit(parts)
         return urljoin(url, other)
     else:
         return other
Exemple #37
0
 def vod_data(self, vid=None):
     """
     Get the VOD data path and the default VOD ID
     :return:
     """
     page = self.session.http.get(self.url)
     m = self._vod_re.search(page.text)
     vod_data_url = m and urljoin(self.url, m.group(0))
     if vod_data_url:
         self.logger.debug("Found VOD data url: {0}", vod_data_url)
         res = self.session.http.get(vod_data_url)
         return self.session.http.json(res)
Exemple #38
0
 def vod_data(self, vid=None):
     """
     Get the VOD data path and the default VOD ID
     :return:
     """
     page = self.session.http.get(self.url)
     m = self._vod_re.search(page.text)
     vod_data_url = m and urljoin(self.url, m.group(0))
     if vod_data_url:
         log.debug("Found VOD data url: {0}".format(vod_data_url))
         res = self.session.http.get(vod_data_url)
         return self.session.http.json(res)
Exemple #39
0
    def find_videopage(self):
        self.logger.debug("Not a videopage")
        res = http.get(self.url)

        m = self._videopage_re.search(res.text)
        if not m:
            self.logger.debug("No stream path, stream might be offline or invalid url.")
            raise NoStreamsError(self.url)

        path = m.group("path")
        self.logger.debug("Found new path: {0}".format(path))
        return urljoin(self.url, path)
Exemple #40
0
    def _get_streams(self):
        try:
            data_url = self.session.http.get(
                self.url,
                schema=validate.Schema(
                    validate.parse_html(),
                    validate.xml_find(".//*[@data-ctrl-player]"),
                    validate.get("data-ctrl-player"),
                    validate.transform(lambda s: s.replace("'", "\"")),
                    validate.parse_json(), {"url": validate.text},
                    validate.get("url")))
        except PluginError:
            return

        data_url = urljoin(self._URL_DATA_BASE, data_url)
        log.debug("Player URL: '{0}'", data_url)

        self.title, media = self.session.http.get(
            data_url,
            schema=validate.Schema(
                validate.parse_json(name="MEDIAINFO"), {
                    "mc": {
                        validate.optional("_title"):
                        validate.text,
                        "_mediaArray": [
                            validate.all(
                                {
                                    "_mediaStreamArray": [
                                        validate.all(
                                            {
                                                "_quality":
                                                validate.any(
                                                    validate.text, int),
                                                "_stream": [validate.url()],
                                            },
                                            validate.union_get(
                                                "_quality", ("_stream", 0)))
                                    ]
                                }, validate.get("_mediaStreamArray"),
                                validate.transform(dict))
                        ]
                    }
                }, validate.get("mc"),
                validate.union_get("_title", ("_mediaArray", 0))))

        if media.get("auto"):
            for s in HLSStream.parse_variant_playlist(
                    self.session, media.get("auto")).items():
                yield s
        else:
            for quality, stream in media.items():
                yield self._QUALITY_MAP.get(quality, quality), HTTPStream(
                    self.session, stream)
Exemple #41
0
 def join(url, other):
     # if the other URL is an absolute url, then return that
     if urlparse(other).scheme:
         return other
     elif url:
         parts = list(urlsplit(url))
         if not parts[2].endswith("/"):
             parts[2] += "/"
         url = urlunsplit(parts)
         return urljoin(url, other)
     else:
         return other
Exemple #42
0
def url_concat(base, *parts, **kwargs):
    """
    Join extra paths to a URL, does not join absolute paths
    :param base: the base URL
    :param parts: a list of the parts to join
    :param allow_fragments: include url fragments
    :return: the joined URL
    """
    allow_fragments = kwargs.get("allow_fragments", True)
    for part in parts:
        base = urljoin(base.rstrip("/") + "/", part.strip("/"), allow_fragments)
    return base
Exemple #43
0
    def _get_streams(self):
        res = http.get(self.url)

        # some pages have embedded players
        iframe_m = self.iframe_re.search(res.text)
        if iframe_m:
            url = urljoin(self.url, iframe_m.group("url"))
            res = http.get(url)

        video = self.src_re.search(res.text)
        stream_src = video and video.group("url")

        if stream_src and stream_src.endswith("m3u8"):
            return HLSStream.parse_variant_playlist(self.session, stream_src)
Exemple #44
0
    def handle_module_info(self, args):
        res = {}
        for arg in args:
            if "cdnConfig" in arg:
                parts = [
                    # scheme
                    arg["cdnConfig"]["protocol"],
                    # netloc
                    arg["cdnConfig"]["data"][0]["data"][0]["sites"][0]["host"],
                    # path
                    arg["cdnConfig"]["data"][0]["data"][0]["sites"][0]["path"],
                    "", "", "",  # params, query, fragment
                ]
                # Example:
                # LIVE: http://uhs-akamai.ustream.tv/
                # VOD:  http://vod-cdn.ustream.tv/
                res["cdn_url"] = urlunparse(parts)
            if "stream" in arg and bool(arg["stream"].get("streamFormats")):
                data = arg["stream"]
                if data["streamFormats"].get("flv/segmented"):
                    flv_segmented = data["streamFormats"]["flv/segmented"]
                    path = flv_segmented["contentAccess"]["accessList"][0]["data"]["path"]

                    res["streams"] = []
                    for stream in flv_segmented["streams"]:
                        res["streams"] += [dict(
                            stream_name=stream["preset"],
                            path=urljoin(path,
                                         stream["segmentUrl"].replace("%", "%s")),
                            hashes=flv_segmented["hashes"],
                            first_chunk=flv_segmented["chunkId"],
                            chunk_time=flv_segmented["chunkTime"],
                        )]
                elif bool(data["streamFormats"]):
                    # supported formats:
                    # - flv/segmented
                    # unsupported formats:
                    # - flv
                    # - mp4
                    # - mp4/segmented
                    raise PluginError("Stream format is not supported: {0}".format(
                        ", ".join(data["streamFormats"].keys())))
            elif "stream" in arg and arg["stream"]["contentAvailable"] is False:
                    log.error("This stream is currently offline")
                    raise ModuleInfoNoStreams

        return res
Exemple #45
0
    def _get_streams(self):
        base_url = self.url
        res = http.get(self.url)

        # Search for the iframe in the page
        iframe_m = self.iframe_re.search(res.text)
        if iframe_m:
            # If the iframe is found, load the embedded page
            base_url = iframe_m.group(1)
            res = http.get(iframe_m.group(1))

        # Search the page (original or embedded) for the stream URL
        src_m = self.src_re.search(res.text)
        if src_m:
            stream_url = urljoin(base_url, src_m.group(1))
            # There is no variant playlist, only a plain HLS Stream
            yield "live", HLSStream(self.session, stream_url)
Exemple #46
0
    def _get_streams(self):
        metadata = self.get_metadata()

        try:
            res = self.session.http.get(urljoin(self.api_url, metadata["mediaUri"]))
        except Exception as e:
            if "401 Client Error" in str(e):
                raise PluginError("This Video is not available in your country")
            raise e

        log.debug("Found stream data")
        data = self.session.http.json(res)
        hls_url = data["playbackItem"]["manifestUrl"]
        log.debug("URL={0}".format(hls_url))
        for s in HLSStream.parse_variant_playlist(self.session,
                                                  hls_url).items():
            yield s
Exemple #47
0
    def _get_streams(self):
        # Get the stream type from the url (tv/radio).
        stream_type = _url_re.match(self.url).group(1).upper()
        cookie = {
            "NRK_PLAYER_SETTINGS_{0}".format(stream_type): COOKIE_PARAMS
        }

        # Construct API URL for this program.
        baseurl = self.session.http.get(self.url, cookies=cookie, schema=_schema)
        program_id = _id_re.search(self.url).group(1)

        # Extract media URL.
        json_url = urljoin(baseurl, "mediaelement/{0}".format(program_id))
        res = self.session.http.get(json_url, cookies=cookie)
        media_element = self.session.http.json(res, schema=_mediaelement_schema)
        media_url = media_element["mediaUrl"]

        return HLSStream.parse_variant_playlist(self.session, media_url)
Exemple #48
0
    def _get_streams(self):
        data_url = self.session.http.get(self.url, schema=self._player_url_schema)
        if data_url:
            res = self.session.http.get(urljoin(self.url, data_url))
            stream_info = self.session.http.xml(res, schema=self._livestream_schema)

            for stream in stream_info:
                url = stream["url"]
                try:
                    if ".m3u8" in url:
                        for s in HLSStream.parse_variant_playlist(self.session, url, name_key="bitrate").items():
                            yield s
                    elif ".f4m" in url:
                        for s in HDSStream.parse_manifest(self.session, url, pvswf=self.swf_url, is_akamai=True).items():
                            yield s
                    elif ".mp4" in url:
                        yield "{0}k".format(stream["bitrate"]), HTTPStream(self.session, url)
                except IOError as err:
                    self.logger.warning("Error parsing stream: {0}", err)
Exemple #49
0
    def _get_streams(self):
        media_id, application = self._get_media_app()
        if media_id:
            api = UHSClient(media_id, application, referrer=self.url, cluster="live", password=self.get_option("password"))
            log.debug("Connecting to UStream API: media_id={0}, application={1}, referrer={2}, cluster={3}",
                      media_id, application, self.url, "live")
            api.connect()

            streams_data = {}
            streams = {}
            for _ in range(5):
                # do not use to many tries, it might take longer for a timeout
                # when streamFormats is {} and contentAvailable is True
                data = api.recv()
                try:
                    if data["cmd"] == "moduleInfo":
                        r = self.handle_module_info(data["args"])
                        if r:
                            streams_data.update(r)
                    elif data["cmd"] == "reject":
                        self.handle_reject(api, data["args"])
                    else:
                        log.debug("Unexpected `{0}` command".format(data["cmd"]))
                        log.trace("{0!r}".format(data))
                except ModuleInfoNoStreams:
                    return None

                if streams_data.get("streams") and streams_data.get("cdn_url"):
                    for s in streams_data["streams"]:
                        streams[s["stream_name"]] = UHSStream(
                            session=self.session,
                            api=api,
                            first_chunk_data=ChunkData(
                                s["first_chunk"],
                                s["chunk_time"],
                                s["hashes"],
                                datetime.datetime.now(tz=utc)),
                            template_url=urljoin(streams_data["cdn_url"],
                                                 s["path"]),
                        )
                    return streams
Exemple #50
0
    def _get_streams(self):
        """
            Find all the streams for the ITV url
            :return: Mapping of quality to stream
        """
        self.session.http.headers.update({"User-Agent": useragents.FIREFOX})
        video_info = self.video_info()
        video_info_url = video_info.get("data-html5-playlist") or video_info.get("data-video-id")

        res = self.session.http.post(video_info_url,
                        data=json.dumps(self.device_info),
                        headers={"hmac": video_info.get("data-video-hmac")})
        data = self.session.http.json(res, schema=self._video_info_schema)

        log.debug("Video ID info response: {0}".format(data))

        stype = data['Playlist']['VideoType']

        for media in data['Playlist']['Video']['MediaFiles']:
            url = urljoin(data['Playlist']['Video']['Base'], media['Href'])
            name_fmt = "{pixels}_{bitrate}" if stype == "CATCHUP" else None
            for s in HLSStream.parse_variant_playlist(self.session, url, name_fmt=name_fmt).items():
                yield s
Exemple #51
0
    def _get_streams(self):
        res = self.session.http.get(self.url)

        # some pages have embedded players
        iframe_m = self.iframe_re.search(res.text)
        if iframe_m:
            url = urljoin(self.url, iframe_m.group("url"))
            res = self.session.http.get(url)

        video = self.src_re.search(res.text)
        stream_src = video and video.group("url")

        if stream_src and stream_src.endswith("m3u8"):
            # do not open empty m3u8 files
            if len(self.session.http.get(stream_src).text) <= 10:
                log.error("This stream is currently offline")
                return

            log.debug("URL={0}".format(stream_src))
            streams = HLSStream.parse_variant_playlist(self.session, stream_src)
            if not streams:
                return {"live": HLSStream(self.session, stream_src)}
            else:
                return streams
Exemple #52
0
 def host(self):
     host = self._host or self.API_URL.format(randint(0, 0xffffff), self.media_id, self.application,
                                              "lp-" + self._cluster)
     return urljoin(host, "/1/ustream")
Exemple #53
0
    def _get_streams(self):
        page = http.get(self.url, schema=_schema)
        if not page:
            return

        pubkey_pem = get_public_key(self.cache, urljoin(self.url, page["clientlibs"]))
        if not pubkey_pem:
            raise PluginError("Unable to get public key")

        flashvars = page["flashvars"]

        params = {
            "cashPath": int(time.time() * 1000)
        }
        res = http.get(urljoin(self.url, flashvars["country"]), params=params)
        if not res:
            return
        language = http.xml(res, schema=_language_schema)

        api_params = {}
        for key in ("ss_id", "mv_id", "device_cd", "ss1_prm", "ss2_prm", "ss3_prm"):
            if flashvars.get(key, ""):
                api_params[key] = flashvars[key]

        aeskey = number.long_to_bytes(random.getrandbits(8 * 32), 32)

        params = {
            "s": flashvars["s"],
            "c": language,
            "e": self.url,
            "d": aes_encrypt(aeskey, json.dumps(api_params)),
            "a": rsa_encrypt(pubkey_pem, aeskey)
        }
        res = http.get(urljoin(self.url, flashvars["init"]), params=params)
        if not res:
            return
        rtn = http.json(res, schema=_init_schema)
        if not rtn:
            return

        init_data = parse_json(aes_decrypt(aeskey, rtn))

        parsed = urlparse(init_data["play_url"])
        if parsed.scheme != "https" or not parsed.path.startswith("/i/") or not parsed.path.endswith("/master.m3u8"):
            return
        hlsstream_url = init_data["play_url"]

        streams = HLSStream.parse_variant_playlist(self.session, hlsstream_url)

        if "caption_url" in init_data:
            if self.get_option("mux_subtitles") and FFMPEGMuxer.is_usable(self.session):
                res = http.get(init_data["caption_url"])
                srt = http.xml(res, ignore_ns=True, schema=_xml_to_srt_schema)
                subfiles = []
                metadata = {}
                for i, lang, srt in ((i, s[0], s[1]) for i, s in enumerate(srt)):
                    subfile = tempfile.TemporaryFile()
                    subfile.write(srt.encode("utf8"))
                    subfile.seek(0)
                    subfiles.append(FileStream(self.session, fileobj=subfile))
                    metadata["s:s:{0}".format(i)] = ["language={0}".format(lang)]

                for n, s in streams.items():
                    yield n, MuxedStream(self.session, s, *subfiles,
                                         maps=list(range(0, len(metadata) + 1)),
                                         metadata=metadata)
                return
            else:
                self.logger.info("Subtitles: {0}".format(init_data["caption_url"]))

        for s in streams.items():
            yield s
Exemple #54
0
def absolute_url(baseurl, url):
    if not url.startswith("http"):
        return urljoin(baseurl, url)
    else:
        return url
Exemple #55
0
    def _get_streams(self):
        match = url_re.match(self.url)

        stream_page_scheme = 'https'
        stream_page_domain = match.group(4)
        stream_page_path = match.group(5)
        country_code = CONST_DEFAULT_COUNTRY_CODE

        # create http session and set headers
        http_session = http
        http_session.headers.update(CONST_HEADERS)

        # get cookies
        r = http_session.get(urlunparse((stream_page_scheme, stream_page_domain, stream_page_path, '', '', '')))

        # redirect to profile page means stream is offline
        if '/profile/' in r.url:
            raise NoStreamsError(self.url)
        if not r.ok:
            self.logger.debug("Status code for {0}: {1}", r.url, r.status_code)
            raise NoStreamsError(self.url)
        if len(http_session.cookies) == 0:
            raise PluginError("Can't get a cookies")

        if urlparse(r.url).netloc != stream_page_domain:
            # then redirected to regional subdomain
            country_code = urlparse(r.url).netloc.split('.')[0].lower()

        # time to set variables
        baseurl = urlunparse((stream_page_scheme, urlparse(r.url).netloc, '', '', '', ''))
        amf_gateway_url = urljoin(baseurl, CONST_AMF_GATEWAY_LOCATION)
        stream_page_url = urljoin(baseurl, stream_page_path)

        headers = {
            'User-Agent': useragents.CHROME,
            'Referer': stream_page_url,
            'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
            'X-Requested-With': 'XMLHttpRequest'
        }

        data = 'method=getRoomData&args%5B%5D={0}&args%5B%5D=false'.format(stream_page_path)
        self.logger.debug('DATA: {0}'.format(str(data)))
        # send request and close http-session
        r = http_session.post(url=amf_gateway_url,
                              headers=headers,
                              params={CONST_AMF_GATEWAY_PARAM: country_code},
                              data=data)
        http_session.close()

        if r.status_code != 200:
            raise PluginError("unexpected status code for {0}: {1}", r.url, r.status_code)

        stream_source_info = amf_msg_schema.validate(json.loads(r.text))
        self.logger.debug("source stream info:\n{0}", stream_source_info)

        if not stream_source_info:
            return

        urlnoproto = stream_source_info['localData']['videoServerUrl']
        urlnoproto = update_scheme('https://', urlnoproto)
        performer = stream_source_info['performerData']['username']

        hls_url = '{0}/hls/stream_{1}/playlist.m3u8'.format(urlnoproto, performer)

        if hls_url:
            self.logger.debug('HLS URL: {0}'.format(hls_url))
            try:
                for s in HLSStream.parse_variant_playlist(self.session, hls_url, headers=headers).items():
                    yield s
            except Exception as e:
                if '404' in str(e):
                    self.logger.error('Stream is currently offline or private')
                else:
                    self.logger.error(str(e))
                return