def test_xml_findall(self): el = Element("parent") children = [Element("child") for i in range(10)] for child in children: el.append(child) assert validate(xml_findall("child"), el) == children
class ard_live(Plugin): swf_url = "http://live.daserste.de/lib/br-player/swf/main.swf" _url_re = re.compile(r"https?://(www.)?daserste.de/", re.I) _player_re = re.compile(r'''dataURL\s*:\s*(?P<q>['"])(?P<url>.*?)(?P=q)''') _player_url_schema = validate.Schema( validate.transform(_player_re.search), validate.any(None, validate.all(validate.get("url"), validate.text))) _livestream_schema = validate.Schema( validate.xml_findall(".//assets"), validate.filter(lambda x: x.attrib.get("type") != "subtitles"), validate.get(0), validate.xml_findall(".//asset"), [ validate.union({ "url": validate.xml_findtext("./fileName"), "bitrate": validate.xml_findtext("./bitrateVideo") }) ]) @classmethod def can_handle_url(cls, url): return cls._url_re.match(url) is not None def _get_streams(self): data_url = http.get(self.url, schema=self._player_url_schema) if data_url: res = http.get(urljoin(self.url, data_url)) stream_info = http.xml(res, schema=self._livestream_schema) for stream in stream_info: url = stream["url"] try: if ".m3u8" in url: for s in HLSStream.parse_variant_playlist( self.session, url, name_key="bitrate").items(): yield s elif ".f4m" in url: for s in HDSStream.parse_manifest( self.session, url, pvswf=self.swf_url, is_akamai=True).items(): yield s elif ".mp4" in url: yield "{0}k".format(stream["bitrate"]), HTTPStream( self.session, url) except IOError as err: self.logger.warning("Error parsing stream: {0}", err)
class Rtve(Plugin): secret_key = base64.b64decode("eWVMJmRhRDM=") content_id_re = re.compile(r'data-id\s*=\s*"(\d+)"') url_re = re.compile( r""" https?://(?:www\.)?rtve\.es/(?:directo|noticias|television|deportes|alacarta|drmn)/.*?/? """, re.VERBOSE) cdn_schema = validate.Schema( validate.transform(partial(parse_xml, invalid_char_entities=True)), validate.xml_findall(".//preset"), [ validate.union({ "quality": validate.all(validate.getattr("attrib"), validate.get("type")), "urls": validate.all(validate.xml_findall(".//url"), [validate.getattr("text")]) }) ]) subtitles_api = "http://www.rtve.es/api/videos/{id}/subtitulos.json" subtitles_schema = validate.Schema( {"page": { "items": [{ "src": validate.url(), "lang": validate.text }] }}, validate.get("page"), validate.get("items")) video_api = "http://www.rtve.es/api/videos/{id}.json" video_schema = validate.Schema( { "page": { "items": [{ "qualities": [{ "preset": validate.text, "height": int }] }] } }, validate.get("page"), validate.get("items"), validate.get(0)) options = PluginOptions({"mux_subtitles": False}) @classmethod def can_handle_url(cls, url): return cls.url_re.match(url) is not None def __init__(self, url): Plugin.__init__(self, url) self.zclient = ZTNRClient(self.secret_key) http.headers = {"User-Agent": useragents.SAFARI_8} def _get_content_id(self): res = http.get(self.url) m = self.content_id_re.search(res.text) return m and int(m.group(1)) def _get_subtitles(self, content_id): res = http.get(self.subtitles_api.format(id=content_id)) return http.json(res, schema=self.subtitles_schema) def _get_quality_map(self, content_id): res = http.get(self.video_api.format(id=content_id)) data = http.json(res, schema=self.video_schema) qmap = {} for item in data["qualities"]: qname = { "MED": "Media", "HIGH": "Alta", "ORIGINAL": "Original" }.get(item["preset"], item["preset"]) qmap[qname] = u"{0}p".format(item["height"]) return qmap def _get_streams(self): streams = [] content_id = self._get_content_id() if content_id: self.logger.debug("Found content with id: {0}", content_id) stream_data = self.zclient.get_cdn_list(content_id, schema=self.cdn_schema) quality_map = None for stream in stream_data: for url in stream["urls"]: if url.endswith("m3u8"): try: streams.extend( HLSStream.parse_variant_playlist( self.session, url).items()) except (IOError, OSError): self.logger.debug("Failed to load m3u8 url: {0}", url) elif ((url.endswith("mp4") or url.endswith("mov") or url.endswith("avi")) and http.head( url, raise_for_status=False).status_code == 200): if quality_map is None: # only make the request when it is necessary quality_map = self._get_quality_map(content_id) # rename the HTTP sources to match the HLS sources quality = quality_map.get(stream["quality"], stream["quality"]) streams.append((quality, HTTPStream(self.session, url))) subtitles = None if self.get_option("mux_subtitles"): subtitles = self._get_subtitles(content_id) if subtitles: substreams = {} for i, subtitle in enumerate(subtitles): substreams[subtitle["lang"]] = HTTPStream( self.session, subtitle["src"]) for q, s in streams: yield q, MuxedStream(self.session, s, subtitles=substreams) else: for s in streams: yield s
class AdultSwim(Plugin): API_URL = "http://www.adultswim.com/videos/api/v2/videos/{id}?fields=stream" vod_api = "http://www.adultswim.com/videos/api/v0/assets" url_re = re.compile( r"""https?://(?:www\.)?adultswim\.com/videos (?:/(streams))? (?:/([^/]+))? (?:/([^/]+))? """, re.VERBOSE) _stream_data_re = re.compile(r"(?:__)?AS_INITIAL_DATA(?:__)? = (\{.*?});", re.M | re.DOTALL) live_schema = validate.Schema({ u"streams": { validate.text: { u"stream": validate.text, validate.optional(u"isLive"): bool, u"archiveEpisodes": [{ u"id": validate.text, u"slug": validate.text, }] } } }) vod_id_schema = validate.Schema( {u"show": { u"sluggedVideo": { u"id": validate.text } }}, validate.transform(lambda x: x["show"]["sluggedVideo"]["id"])) _api_schema = validate.Schema({ u'status': u'ok', u'data': { u'stream': { u'assets': [{ u'url': validate.url() }] } } }) _vod_api_schema = validate.Schema( validate.all(validate.xml_findall(".//files/file"), [ validate.xml_element, validate.transform(lambda v: { "bitrate": v.attrib.get("bitrate"), "url": v.text }) ])) @classmethod def can_handle_url(cls, url): match = AdultSwim.url_re.match(url) return match is not None def _make_hls_hds_stream(self, func, stream, *args, **kwargs): return func(self.session, stream["url"], *args, **kwargs) def _get_show_streams(self, stream_data, show, episode, platform="desktop"): video_id = parse_json(stream_data.group(1), schema=self.vod_id_schema) res = http.get(self.vod_api, params={ "platform": platform, "id": video_id }) # create a unique list of the stream manifest URLs streams = [] urldups = [] for stream in parse_xml(res.text, schema=self._vod_api_schema): if stream["url"] not in urldups: streams.append(stream) urldups.append(stream["url"]) mapper = StreamMapper(lambda fmt, strm: strm["url"].endswith(fmt)) mapper.map(".m3u8", self._make_hls_hds_stream, HLSStream.parse_variant_playlist) mapper.map(".f4m", self._make_hls_hds_stream, HDSStream.parse_manifest, is_akamai=True) mapper.map( ".mp4", lambda s: (s["bitrate"] + "k", HTTPStream(self.session, s["url"]))) for q, s in mapper(streams): yield q, s def _get_live_stream(self, stream_data, show, episode=None): # parse the stream info as json stream_info = parse_json(stream_data.group(1), schema=self.live_schema) # get the stream ID stream_id = None show_info = stream_info[u"streams"][show] if episode: self.logger.debug("Loading replay of episode: {0}/{1}", show, episode) for epi in show_info[u"archiveEpisodes"]: if epi[u"slug"] == episode: stream_id = epi[u"id"] elif show_info.get("isLive") or not len(show_info[u"archiveEpisodes"]): self.logger.debug("Loading LIVE streams for: {0}", show) stream_id = show_info[u"stream"] else: # off-air if len(show_info[u"archiveEpisodes"]): epi = show_info[u"archiveEpisodes"][0] self.logger.debug("Loading replay of episode: {0}/{1}", show, epi[u"slug"]) stream_id = epi[u"id"] else: self.logger.error("This stream is currently offline") return if stream_id: api_url = self.API_URL.format(id=stream_id) res = http.get(api_url, headers={"User-Agent": useragents.SAFARI_8}) stream_data = http.json(res, schema=self._api_schema) mapper = StreamMapper(lambda fmt, surl: surl.endswith(fmt)) mapper.map(".m3u8", HLSStream.parse_variant_playlist, self.session) mapper.map(".f4m", HDSStream.parse_manifest, self.session) stream_urls = [ asset[u"url"] for asset in stream_data[u'data'][u'stream'][u'assets'] ] for q, s in mapper(stream_urls): yield q, s else: self.logger.error( "Couldn't find the stream ID for this stream: {0}".format( show)) def _get_streams(self): # get the page url_match = self.url_re.match(self.url) live_stream, show_name, episode_name = url_match.groups() if live_stream: show_name = show_name or "live-stream" res = http.get(self.url, headers={"User-Agent": useragents.SAFARI_8}) # find the big blob of stream info in the page stream_data = self._stream_data_re.search(res.text) if stream_data: if live_stream: streams = self._get_live_stream(stream_data, show_name, episode_name) else: self.logger.debug("Loading VOD streams for: {0}/{1}", show_name, episode_name) streams = self._get_show_streams(stream_data, show_name, episode_name) # De-dup the streams, some of the mobile streams overlap the desktop streams dups = set() for q, s in streams: if hasattr(s, "args") and "url" in s.args: if s.args["url"] not in dups: yield q, s dups.add(s.args["url"]) else: yield q, s else: self.logger.error( "Couldn't find the stream data for this stream: {0}".format( show_name))
} ASSET_URL = "http://prima.tv4play.se/api/web/asset/{0}/play" SWF_URL = "http://www.tv4play.se/flash/tv4video.swf" _url_re = re.compile( r""" http(s)?://(www\.)? (?: tv4play.se/program/[^\?/]+| fotbollskanalen.se/video ) .+(video_id|videoid)=(?P<video_id>\d+) """, re.VERBOSE) _asset_schema = validate.Schema(validate.xml_findall("items/item"), [ validate.all( validate.xml_findall("*"), validate.map(lambda e: (e.tag, e.text)), validate.transform(dict), { "base": validate.text, "bitrate": validate.all(validate.text, validate.transform(int)), "url": validate.text }) ]) class TV4Play(Plugin): @classmethod def can_handle_url(cls, url): return _url_re.match(url)
class RTE(Plugin): VOD_API_URL = 'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id={0}' LIVE_API_URL = 'http://feeds.rasset.ie/livelistings/playlist' _url_re = re.compile(r'http://www\.rte\.ie/player/[a-z0-9]+/(?:show/[a-z-]+-[0-9]+/(?P<video_id>[0-9]+)|live/(?P<channel_id>[0-9]+))') _vod_api_schema = validate.Schema({ 'current_date': validate.text, 'shows': validate.Schema( list, validate.length(1), validate.get(0), validate.Schema({ 'valid_start': validate.text, 'valid_end': validate.text, 'media:group': validate.Schema( list, validate.length(1), validate.get(0), validate.Schema( { 'hls_server': validate.url(), 'hls_url': validate.text, 'hds_server': validate.url(), 'hds_url': validate.text, # API returns RTMP streams that don't seem to work, ignore them # 'url': validate.any( # validate.url(scheme="rtmp"), # validate.url(scheme="rtmpe") # ) }, validate.transform(lambda x: [x['hls_server'] + x['hls_url'], x['hds_server'] + x['hds_url']]) ), ), }), ) }) _live_api_schema = validate.Schema( validate.xml_findall('.//{http://search.yahoo.com/mrss/}content'), [ validate.all( validate.xml_element(attrib={'url': validate.url()}), validate.get('url') ) ] ) _live_api_iphone_schema = validate.Schema( list, validate.length(1), validate.get(0), validate.Schema( {'fullUrl': validate.any(validate.url(), 'none')}, validate.get('fullUrl') ) ) @classmethod def can_handle_url(cls, url): return RTE._url_re.match(url) def _get_streams(self): match = self._url_re.match(self.url) video_id = match.group('video_id') if video_id is not None: # VOD res = http.get(self.VOD_API_URL.format(video_id)) stream_data = http.json(res, schema=self._vod_api_schema) # Check whether video format is expired current_date = datetime.strptime(stream_data['current_date'], '%Y-%m-%dT%H:%M:%S.%f') valid_start = datetime.strptime(stream_data['shows']['valid_start'], '%Y-%m-%dT%H:%M:%S') valid_end = datetime.strptime(stream_data['shows']['valid_end'], '%Y-%m-%dT%H:%M:%S') if current_date < valid_start or current_date > valid_end: self.logger.error('Failed to access stream, may be due to expired content') return streams = stream_data['shows']['media:group'] else: # Live channel_id = match.group('channel_id') # Get live streams for desktop res = http.get(self.LIVE_API_URL, params={'channelid': channel_id}) streams = http.xml(res, schema=self._live_api_schema) # Get HLS streams for Iphone res = http.get(self.LIVE_API_URL, params={'channelid': channel_id, 'platform': 'iphone'}) stream = http.json(res, schema=self._live_api_iphone_schema) if stream != 'none': streams.append(stream) for stream in streams: if '.f4m' in stream: for s in HDSStream.parse_manifest(self.session, stream).items(): yield s if '.m3u8' in stream: for s in HLSStream.parse_variant_playlist(self.session, stream).items(): yield s
validate.optional("lsPlayerSwfUrl"): validate.text, validate.optional("hdPlayerSwfUrl"): validate.text }) _smil_schema = validate.Schema(validate.union({ "http_base": validate.all( validate.xml_find("{http://www.w3.org/2001/SMIL20/Language}head/" "{http://www.w3.org/2001/SMIL20/Language}meta" "[@name='httpBase']"), validate.xml_element(attrib={ "content": validate.text }), validate.get("content") ), "videos": validate.all( validate.xml_findall("{http://www.w3.org/2001/SMIL20/Language}body/" "{http://www.w3.org/2001/SMIL20/Language}switch/" "{http://www.w3.org/2001/SMIL20/Language}video"), [ validate.all( validate.xml_element(attrib={ "src": validate.text, "system-bitrate": validate.all( validate.text, validate.transform(int) ) }), validate.transform( lambda e: (e.attrib["src"], e.attrib["system-bitrate"]) ) ) ],
"_stream": validate.any(validate.text, [validate.text]), "_quality": validate.any(int, validate.text) }] }] }) _smil_schema = validate.Schema( validate.union({ "base": validate.all(validate.xml_find("head/meta"), validate.get("base"), validate.url(scheme="http")), "cdn": validate.all(validate.xml_find("head/meta"), validate.get("cdn")), "videos": validate.all(validate.xml_findall("body/seq/video"), [validate.get("src")]) })) class ard_mediathek(Plugin): @classmethod def can_handle_url(cls, url): return _url_re.match(url) def _get_http_streams(self, info): name = QUALITY_MAP.get(info["_quality"], "vod") urls = info["_stream"] if not isinstance(info["_stream"], list): urls = [urls]
} ), "clientlibs": validate.all( validate.transform(_clientlibs_re.search), validate.get(2), validate.text ) }) ) _language_schema = validate.Schema( validate.xml_findtext("./country_code") ) _xml_to_srt_schema = validate.Schema( validate.xml_findall(".//body/div"), [ validate.union([validate.all( validate.getattr("attrib"), validate.get("{http://www.w3.org/XML/1998/namespace}lang") ), validate.all( validate.xml_findall("./p"), validate.transform(lambda x: list(enumerate(x, 1))), [ validate.all( validate.union({ "i": validate.get(0), "begin": validate.all( validate.get(1), validate.getattr("attrib"),
class WWENetwork(Plugin): url_re = re.compile(r"https?://network.wwe.com") content_id_re = re.compile(r'''"content_id" : "(\d+)"''') playback_scenario = "HTTP_CLOUD_WIRED" login_url = "https://secure.net.wwe.com/workflow.do" login_page_url = "https://secure.net.wwe.com/enterworkflow.do?flowId=account.login&forwardUrl=http%3A%2F%2Fnetwork.wwe.com" api_url = "https://ws.media.net.wwe.com/ws/media/mf/op-findUserVerifiedEvent/v-2.3" _info_schema = validate.Schema( validate.union({ "status": validate.union({ "code": validate.all(validate.xml_findtext(".//status-code"), validate.transform(int)), "message": validate.xml_findtext(".//status-message"), }), "urls": validate.all( validate.xml_findall(".//url"), [validate.getattr("text")] ), validate.optional("fingerprint"): validate.xml_findtext(".//updated-fingerprint"), validate.optional("session_key"): validate.xml_findtext(".//session-key"), "session_attributes": validate.all( validate.xml_findall(".//session-attribute"), [validate.getattr("attrib"), validate.union({ "name": validate.get("name"), "value": validate.get("value") })] ) }) ) options = PluginOptions({ "email": None, "password": None, }) def __init__(self, url): super(WWENetwork, self).__init__(url) http.headers.update({"User-Agent": useragents.CHROME}) self._session_attributes = Cache(filename="plugin-cache.json", key_prefix="wwenetwork:attributes") self._session_key = self.cache.get("session_key") self._authed = self._session_attributes.get("ipid") and self._session_attributes.get("fprt") @classmethod def can_handle_url(cls, url): return cls.url_re.match(url) is not None def login(self, email, password): self.logger.debug("Attempting login as {0}", email) # sets some required cookies to login http.get(self.login_page_url) # login res = http.post(self.login_url, data=dict(registrationAction='identify', emailAddress=email, password=password, submitButton=""), headers={"Referer": self.login_page_url}, allow_redirects=False) self._authed = "Authentication Error" not in res.text if self._authed: self._session_attributes.set("ipid", res.cookies.get("ipid"), expires=3600 * 1.5) self._session_attributes.set("fprt", res.cookies.get("fprt"), expires=3600 * 1.5) return self._authed def _update_session_attribute(self, key, value): if value: self._session_attributes.set(key, value, expires=3600 * 1.5) # 1h30m expiry http.cookies.set(key, value) @property def session_key(self): return self._session_key @session_key.setter def session_key(self, value): self.cache.set("session_key", value) self._session_key = value def _get_media_info(self, content_id): """ Get the info about the content, based on the ID :param content_id: :return: """ params = {"identityPointId": self._session_attributes.get("ipid"), "fingerprint": self._session_attributes.get("fprt"), "contentId": content_id, "playbackScenario": self.playback_scenario, "platform": "WEB_MEDIAPLAYER_5", "subject": "LIVE_EVENT_COVERAGE", "frameworkURL": "https://ws.media.net.wwe.com", "_": int(time.time())} if self.session_key: params["sessionKey"] = self.session_key url = self.api_url.format(id=content_id) res = http.get(url, params=params) return http.xml(res, ignore_ns=True, schema=self._info_schema) def _get_content_id(self): # check the page to find the contentId res = http.get(self.url) m = self.content_id_re.search(res.text) if m: return m.group(1) def _get_streams(self): email = self.get_option("email") password = self.get_option("password") if not self._authed and (not email and not password): self.logger.error("A login for WWE Network is required, use --wwenetwork-email/" "--wwenetwork-password to set them") return if not self._authed: if not self.login(email, password): self.logger.error("Failed to login, check your username/password") return content_id = self._get_content_id() if content_id: self.logger.debug("Found content ID: {0}", content_id) info = self._get_media_info(content_id) if info["status"]["code"] == 1: # update the session attributes self._update_session_attribute("fprt", info.get("fingerprint")) for attr in info["session_attributes"]: self._update_session_attribute(attr["name"], attr["value"]) if info.get("session_key"): self.session_key = info.get("session_key") for url in info["urls"]: for s in HLSStream.parse_variant_playlist(self.session, url, name_fmt="{pixels}_{bitrate}").items(): yield s else: raise PluginError("Could not load streams: {message} ({code})".format(**info["status"]))