def update_scheme(current, target, force=True): # type: (str, str, bool) -> str """ Take the scheme from the current URL and apply it to the target URL if it is missing :param current: current URL :param target: target URL :param force: always apply the current scheme to the target, even if a target scheme exists :return: target URL with the current URL's scheme """ target_p = urlparse(target) if ( # target URLs with implicit scheme and netloc including a port: ("http://", "foo.bar:1234") -> "http://foo.bar:1234" # urllib.parse.urlparse has incorrect behavior in py<3.9, so we'll have to use a regex here # py>=3.9: urlparse("127.0.0.1:1234") == ParseResult(scheme='127.0.0.1', netloc='', path='1234', ...) # py<3.9 : urlparse("127.0.0.1:1234") == ParseResult(scheme='', netloc='', path='127.0.0.1:1234', ...) not _re_uri_implicit_scheme.search(target) and not target.startswith("//") # target URLs without scheme and netloc: ("http://", "foo.bar/foo") -> "http://foo.bar/foo" or not target_p.scheme and not target_p.netloc): return "{0}://{1}".format( urlparse(current).scheme, urlunparse(target_p)) # target URLs without scheme but with netloc: ("http://", "//foo.bar/foo") -> "http://foo.bar/foo" if not target_p.scheme and target_p.netloc: return "{0}:{1}".format(urlparse(current).scheme, urlunparse(target_p)) # target URLs with scheme # override the target scheme if force: return urlunparse(target_p._replace(scheme=urlparse(current).scheme)) # keep the target scheme return target
def url_equal(first, second, ignore_scheme=False, ignore_netloc=False, ignore_path=False, ignore_params=False, ignore_query=False, ignore_fragment=False): """ Compare two URLs and return True if they are equal, some parts of the URLs can be ignored :param first: URL :param second: URL :param ignore_scheme: ignore the scheme :param ignore_netloc: ignore the netloc :param ignore_path: ignore the path :param ignore_params: ignore the params :param ignore_query: ignore the query string :param ignore_fragment: ignore the fragment :return: result of comparison """ # <scheme>://<netloc>/<path>;<params>?<query>#<fragment> firstp = urlparse(first) secondp = urlparse(second) return ((firstp.scheme == secondp.scheme or ignore_scheme) and (firstp.netloc == secondp.netloc or ignore_netloc) and (firstp.path == secondp.path or ignore_path) and (firstp.params == secondp.params or ignore_params) and (firstp.query == secondp.query or ignore_query) and (firstp.fragment == secondp.fragment or ignore_fragment))
def _find_video_id(self, url): m = _url_re.match(url) video_id = m.group("video_id") or m.group("video_id_short") if video_id: log.debug("Video ID from URL") return video_id res = self.session.http.get(url) if urlparse(res.url).netloc == "consent.youtube.com": c_data = {} for _i in itertags(res.text, "input"): if _i.attributes.get("type") == "hidden": c_data[_i.attributes.get("name")] = _i.attributes.get("value") log.debug("c_data_keys: {}".format(', '.join(c_data.keys()))) res = self.session.http.post("https://consent.youtube.com/s", data=c_data) consent = self.session.http.cookies.get('CONSENT', domain='.youtube.com') if 'YES' in consent: self.cache.set("consent_ck", consent) datam = _ytdata_re.search(res.text) if datam: data = parse_json(datam.group(1)) # find the videoRenderer object, where there is a LVE NOW badge for vid_ep in search_dict(data, 'currentVideoEndpoint'): video_id = vid_ep.get("watchEndpoint", {}).get("videoId") if video_id: log.debug("Video ID from currentVideoEndpoint") return video_id for x in search_dict(data, 'videoRenderer'): if x.get("viewCountText", {}).get("runs"): if x.get("videoId"): log.debug("Video ID from videoRenderer (live)") return x["videoId"] for bstyle in search_dict(x.get("badges", {}), "style"): if bstyle == "BADGE_STYLE_TYPE_LIVE_NOW": if x.get("videoId"): log.debug("Video ID from videoRenderer (live)") return x["videoId"] if urlparse(url).path.endswith(("/embed/live_stream", "/live")): for link in itertags(res.text, "link"): if link.attributes.get("rel") == "canonical": canon_link = link.attributes.get("href") if canon_link != url: if canon_link.endswith("v=live_stream"): log.debug("The video is not available") break else: log.debug("Re-directing to canonical URL: {0}".format(canon_link)) return self._find_video_id(canon_link) raise PluginError("Could not find a video on this page")
def test_compare_url_path(self): blacklist_path = [ ('example.com', '/_livetvpreview/'), ('foo.bar', '/plugins'), ] self.assertTrue( self.res_plugin.compare_url_path( urlparse('https://www.foo.bar/plugins/123.html'), blacklist_path)) self.assertFalse( self.res_plugin.compare_url_path( urlparse('https://example.com/123.html'), blacklist_path))
def update_scheme(current, target): """ Take the scheme from the current URL and applies it to the target URL if the target URL startswith // or is missing a scheme :param current: current URL :param target: target URL :return: target URL with the current URLs scheme """ target_p = urlparse(target) if not target_p.scheme and target_p.netloc: return "{0}:{1}".format(urlparse(current).scheme, urlunparse(target_p)) elif not target_p.scheme and not target_p.netloc: return "{0}://{1}".format(urlparse(current).scheme, urlunparse(target_p)) else: return target
def __init__(self, node, root=None, parent=None, url=None, *args, **kwargs): # top level has no parent super(MPD, self).__init__(node, root=self, *args, **kwargs) # parser attributes self.url = url self.timelines = defaultdict(lambda: -1) self.timelines.update(kwargs.pop("timelines", {})) self.id = self.attr(u"id") self.profiles = self.attr(u"profiles", required=True) self.type = self.attr(u"type", default=u"static", parser=MPDParsers.type) self.minimumUpdatePeriod = self.attr(u"minimumUpdatePeriod", parser=MPDParsers.duration, default=Duration()) self.minBufferTime = self.attr(u"minBufferTime", parser=MPDParsers.duration, required=True) self.timeShiftBufferDepth = self.attr(u"timeShiftBufferDepth", parser=MPDParsers.duration) self.availabilityStartTime = self.attr(u"availabilityStartTime", parser=MPDParsers.datetime, default=datetime.datetime.fromtimestamp(0, utc), # earliest date required=self.type == "dynamic") self.publishTime = self.attr(u"publishTime", parser=MPDParsers.datetime, required=self.type == "dynamic") self.availabilityEndTime = self.attr(u"availabilityEndTime", parser=MPDParsers.datetime) self.mediaPresentationDuration = self.attr(u"mediaPresentationDuration", parser=MPDParsers.duration) self.suggestedPresentationDelay = self.attr(u"suggestedPresentationDelay", parser=MPDParsers.duration) # parse children location = self.children(Location) self.location = location[0] if location else None if self.location: self.url = self.location.text urlp = list(urlparse(self.url)) if urlp[2]: urlp[2], _ = urlp[2].rsplit("/", 1) self._base_url = urlunparse(urlp) self.baseURLs = self.children(BaseURL) self.periods = self.children(Period, minimum=1) self.programInformation = self.children(ProgramInformation)
def __init__(self, session, baseurl, url, bootstrap, metadata=None, timeout=60, **request_params): Stream.__init__(self, session) self.baseurl = baseurl self.url = url self.bootstrap = bootstrap self.metadata = metadata self.timeout = timeout # Deep copy request params to make it mutable self.request_params = deepcopy(request_params) parsed = urlparse(self.url) if parsed.query: params = parse_qsl(parsed.query) if params: if not self.request_params.get("params"): self.request_params["params"] = {} self.request_params["params"].update(params) self.url = urlunparse( (parsed.scheme, parsed.netloc, parsed.path, None, None, None))
def _get_streams(self): self.session.http.headers.update({"Referer": self.url}) iframe_url = None res = self.session.http.get(self.url) for iframe in itertags(res.text, "iframe"): if "embed.lsm.lv" in iframe.attributes.get("src"): iframe_url = iframe.attributes.get("src") break if not iframe_url: log.error("Could not find player iframe") return log.debug("Found iframe: {0}".format(iframe_url)) res = self.session.http.get(iframe_url) for source in itertags(res.text, "source"): if source.attributes.get("src"): stream_url = source.attributes.get("src") url_path = urlparse(stream_url).path if url_path.endswith(".m3u8"): for s in HLSStream.parse_variant_playlist( self.session, stream_url).items(): yield s else: log.debug("Not used URL path: {0}".format(url_path))
def _create_stream(self, stream, is_live): stream_name = "{0}p".format(stream["height"]) stream_type = stream["mediaType"] stream_url = stream["url"] if stream_type in ("hls", "mp4"): if urlparse(stream_url).path.endswith("m3u8"): try: streams = HLSStream.parse_variant_playlist(self.session, stream_url) # TODO: Replace with "yield from" when dropping Python 2. for stream in streams.items(): yield stream except IOError as err: self.logger.error("Failed to extract HLS streams: {0}", err) else: yield stream_name, HTTPStream(self.session, stream_url) elif stream_type == "rtmp": params = { "rtmp": stream["streamer"], "playpath": stream["url"], "swfVfy": SWF_URL, "pageUrl": self.url, } if is_live: params["live"] = True else: params["playpath"] = "mp4:{0}".format(params["playpath"]) stream = RTMPStream(self.session, params) yield stream_name, stream
def find_iframe(self, res): for url in self.iframe_re.findall(res.text): if url.startswith("//"): p = urlparse(self.url) return "{0}:{1}".format(p.scheme, url) else: return url
def _find_iframe(self, res): iframe = self.iframe_re.search(res.text) url = iframe and iframe.group(1) if url and url.startswith("//"): p = urlparse(self.url) url = "{0}:{1}".format(p.scheme, url) return url
def prepend_www(url): """Changes google.com to www.google.com""" parsed = urlparse(url) if parsed.netloc.split(".")[0] != "www": return parsed.scheme + "://www." + parsed.netloc + parsed.path else: return url
def _get_streams(self): self.session.http.headers.update({'User-Agent': useragents.FIREFOX}) iframe_url = None page = self.session.http.get(self.url) for a in itertags(page.text, 'a'): if a.attributes.get('class') == 'play-live': iframe_url = update_scheme(self.url, a.attributes['data-url']) break if not iframe_url: raise PluginError('Could not find iframe.') parsed = urlparse(iframe_url) path_list = parsed.path.split('/') if len(path_list) != 6: # only support a known iframe url style, # the video id might be on a different spot if the url changes raise PluginError('unsupported iframe URL: {0}'.format(iframe_url)) res = self.session.http.get( self.API_URL.format(netloc=parsed.netloc, id=path_list[4])) data = self.session.http.json(res, schema=self._api_schema) log.trace('{0!r}'.format(data)) url = self.PLAYLIST_URL.format( app=data['streamProperties']['application'], name=data['playStreamName'], netloc=data['cdnHost'], ) return HLSStream.parse_variant_playlist(self.session, url)
def _get_streams(self): match = _url_re.match(self.url) video_id = match.group("video_id") res = http.get(ASSET_URL.format(video_id)) assets = http.xml(res, schema=_asset_schema) streams = {} for asset in assets: base = asset["base"] url = asset["url"] if urlparse(url).path.endswith(".f4m"): streams.update( HDSStream.parse_manifest(self.session, url, pvswf=SWF_URL) ) elif base.startswith("rtmp"): name = "{0}k".format(asset["bitrate"]) params = { "rtmp": asset["base"], "playpath": url, "live": True } streams[name] = RTMPStream(self.session, params) return streams
def _get_streams(self): self.url = http.resolve_url(self.url) match = _url_re.match(self.url) parsed = urlparse(self.url) if parsed.fragment: channel_id = parsed.fragment elif parsed.path[:3] == '/v/': channel_id = parsed.path.split('/')[-1] else: channel_id = match.group("channel") if not channel_id: return channel_id = channel_id.lower().replace("/", "_") res = http.get(API_URL.format(channel_id)) info = http.json(res, schema=_schema) if not info["success"]: return if info.get("isLive"): name = "live" else: name = "vod" stream = HTTPStream(self.session, info["payload"]) # Wrap the stream in a FLVPlaylist to verify the FLV tags stream = FLVPlaylist(self.session, [stream]) return {name: stream}
def _get_streams(self): match = self._url_re.match(self.url) channel = match.group("channel") self.session.http.headers.update({"User-Agent": useragents.CHROME}) if channel: streams = self._get_live_streams(channel) or [] else: streams = self._get_vod_streams() or [] for video_url in streams: log.debug("Found stream: {0}".format(video_url)) parsed = urlparse(video_url) if parsed.path.endswith(".f4m"): for s in HDSStream.parse_manifest(self.session, video_url).items(): yield s elif parsed.path.endswith(".m3u8"): for s in HLSStream.parse_variant_playlist( self.session, video_url).items(): yield s elif parsed.path.endswith(".mp4"): match = self._re_mp4_bitrate.match(video_url) bitrate = "vod" if match is None else "{0}k".format( match.group('bitrate')) yield bitrate, HTTPStream(self.session, video_url)
def _get_stream_info(self, url): match = _url_re.match(url) user = match.group("user") live_channel = match.group("liveChannel") if user: video_id = self._find_channel_video() elif live_channel: return self._find_canonical_stream_info() else: video_id = match.group("video_id") if video_id == "live_stream": query_info = dict(parse_qsl(urlparse(url).query)) if "channel" in query_info: video_id = self._get_channel_video(query_info["channel"]) if not video_id: return params = { "video_id": video_id, # CUSTOM: Remove all "el" lines but first one and uncomment it to restore # Issues when trying to download Youtube videos, looks like 'el' value is key here # https://github.com/Tyrrrz/YoutubeExplode/issues/66#issuecomment-348685419 #"el": "player_embedded" "el": "detailpage" #"el": "embedded" } res = http.get(API_VIDEO_INFO, params=params, headers=HLS_HEADERS) return parse_query(res.text, name="config", schema=_config_schema)
def login(self, ptrt_url): """ Create session using BBC ID. See https://www.bbc.co.uk/usingthebbc/account/ :param ptrt_url: The snapback URL to redirect to after successful authentication :type ptrt_url: string :return: Whether authentication was successful :rtype: bool """ def auth_check(res): return ptrt_url in ([h.url for h in res.history] + [res.url]) # make the session request to get the correct cookies session_res = self.session.http.get( self.session_url, params=dict(ptrt=ptrt_url) ) if auth_check(session_res): log.debug("Already authenticated, skipping authentication") return True res = self.session.http.post( self.auth_url, params=urlparse(session_res.url).query, data=dict( jsEnabled=True, username=self.get_option("username"), password=self.get_option('password'), attempts=0 ), headers={"Referer": self.url}) return auth_check(res)
def _get_streams(self): res = http.get(self.url) match = _info_re.search(res.text) if not match: return info = parse_json(match.group(1), schema=_schema) stream_name = info["mode"] mp4_url = info.get("mp4_url") ios_url = info.get("ios_url") swf_url = info.get("swf_url") if mp4_url: stream = HTTPStream(self.session, mp4_url) yield stream_name, stream if ios_url: if urlparse(ios_url).path.endswith(".m3u8"): streams = HLSStream.parse_variant_playlist( self.session, ios_url) # TODO: Replace with "yield from" when dropping Python 2. for stream in streams.items(): yield stream if swf_url: stream = self._get_rtmp_stream(swf_url) if stream: yield stream_name, stream
def check_url(value): validate(str, value) parsed = urlparse(value) if not parsed.netloc: raise ValidationError( "{value} is not a valid URL", value=repr(value), schema="url", ) for name, schema in attributes.items(): if not hasattr(parsed, name): raise ValidationError( "Invalid URL attribute {name}", name=repr(name), schema="url", ) try: validate(schema, getattr(parsed, name)) except ValidationError as err: raise ValidationError( "Unable to validate URL attribute {name}", name=repr(name), schema="url", context=err, ) return True
def _get_streams(self): res = http.get(self.url) match = _info_re.search(res.text) if not match: return info = parse_json(match.group(1), schema=_schema) stream_name = info["mode"] mp4_url = info.get("mp4_url") ios_url = info.get("ios_url") swf_url = info.get("swf_url") if mp4_url: stream = HTTPStream(self.session, mp4_url) yield stream_name, stream if ios_url: if urlparse(ios_url).path.endswith(".m3u8"): streams = HLSStream.parse_variant_playlist(self.session, ios_url) # TODO: Replace with "yield from" when dropping Python 2. for stream in streams.items(): yield stream if swf_url: stream = self._get_rtmp_stream(swf_url) if stream: yield stream_name, stream
def _get_streams(self): http.headers.update({"User-Agent": useragents.CHROME, "Referer": self.referer}) fragment = dict(parse_qsl(urlparse(self.url).fragment)) link = fragment.get("link") if not link: link = self._get_tv_link() if not link: self.logger.error("Missing link fragment: stream unavailable") return player_url = self._api_url.format(link) self.logger.debug("Requesting player API: {0} (referer={1})", player_url, self.referer) res = http.get(player_url, params={"_": int(time.time() * 1000)}, headers={"X-Requested-With": "XMLHttpRequest"}) try: data = http.json(res, schema=self.api_schema) except PluginError as e: print(e) self.logger.error("Cannot play this stream type") else: if data["status"]: if data["file"].startswith("<"): self.logger.error("Cannot play embedded streams") else: return HLSStream.parse_variant_playlist(self.session, data["file"]) else: self.logger.error(data["text"])
def create_decryptor(self, key, sequence): if key.method != "AES-128": raise StreamError("Unable to decrypt cipher {0}", key.method) if not self.key_uri_override and not key.uri: raise StreamError("Missing URI to decryption key") if self.key_uri_override: p = urlparse(key.uri) key_uri = LazyFormatter.format( self.key_uri_override, url=key.uri, scheme=p.scheme, netloc=p.netloc, path=p.path, query=p.query, ) else: key_uri = key.uri if self.key_uri != key_uri: res = self.session.http.get(key_uri, exception=StreamError, retries=self.retries, **self.reader.request_params) res.encoding = "binary/octet-stream" self.key_data = res.content self.key_uri = key_uri iv = key.iv or num_to_iv(sequence) # Pad IV if needed iv = b"\x00" * (16 - len(iv)) + iv return AES.new(self.key_data, AES.MODE_CBC, iv)
def fetch(self, segment, retries=None): if self.closed or not retries: return try: headers = {} now = datetime.datetime.now(tz=utc) if segment.available_at > now: time_to_wait = (segment.available_at - now).total_seconds() fname = os.path.basename(urlparse(segment.url).path) log.debug("Waiting for segment: {fname} ({wait:.01f}s)".format( fname=fname, wait=time_to_wait)) sleep_until(segment.available_at) if segment.range: start, length = segment.range if length: end = start + length - 1 else: end = "" headers["Range"] = "bytes={0}-{1}".format(start, end) return self.session.http.get(segment.url, timeout=self.timeout, exception=StreamError, headers=headers) except StreamError as err: log.error("Failed to open segment {0}: {1}", segment.url, err) return self.fetch(segment, retries - 1)
def _get_streams(self): self.session.http.headers.update({"User-Agent": useragents.CHROME, "Referer": self.referer}) fragment = dict(parse_qsl(urlparse(self.url).fragment)) link = fragment.get("link") if not link: link = self._get_tv_link() if not link: self.logger.error("Missing link fragment: stream unavailable") return player_url = self._api_url.format(link) self.logger.debug("Requesting player API: {0} (referer={1})", player_url, self.referer) res = self.session.http.get(player_url, params={"_": int(time.time() * 1000)}, headers={"X-Requested-With": "XMLHttpRequest"}) try: data = self.session.http.json(res, schema=self.api_schema) except PluginError as e: print(e) self.logger.error("Cannot play this stream type") else: if data["status"]: if data["file"].startswith("<"): self.logger.error("Cannot play embedded streams") else: return HLSStream.parse_variant_playlist(self.session, data["file"]) else: self.logger.error(data["text"])
def uri(self, uri): if uri and urlparse(uri).scheme: return uri elif self.base_uri and uri: return urljoin(self.base_uri, uri) else: return uri
def _get_streams(self): streams = self.session.http.get(self.url, schema=self._stream_schema) if streams is None: return if streams['type'] != 'STATION': return stream_urls = set() for stream in streams['streams']: log.trace('{0!r}'.format(stream)) url = stream['url'] url_no_scheme = urlunparse(urlparse(url)._replace(scheme='')) if url_no_scheme in stream_urls: continue stream_urls.add(url_no_scheme) if stream['contentFormat'] in ('audio/mpeg', 'audio/aac'): yield 'live', HTTPStream(self.session, url, allow_redirects=True) elif stream['contentFormat'] == 'video/MP2T': streams = HLSStream.parse_variant_playlist(self.session, stream["url"]) if not streams: yield stream["quality"], HLSStream(self.session, stream["url"]) else: for s in streams.items(): yield s
def __init__(self, url): super(Twitch, self).__init__(url) match = self._re_url.match(url).groupdict() parsed = urlparse(url) self.params = parse_query(parsed.query) self.subdomain = match.get("subdomain") self.video_id = None self._channel_id = None self._channel = None self.clip_name = None self.title = None self.author = None self.category = None if self.subdomain == "player": # pop-out player if self.params.get("video"): self.video_id = self.params["video"] self._channel = self.params.get("channel") elif self.subdomain == "clips": # clip share URL self.clip_name = match.get("channel") else: self._channel = match.get("channel") and match.get("channel").lower() self.video_id = match.get("video_id") or match.get("videos_id") self.clip_name = match.get("clip_name") self.api = TwitchAPI(session=self.session) self.usher = UsherService(session=self.session)
def __init__(self, url): Plugin.__init__(self, url) self._hosted_chain = [] match = _url_re.match(url).groupdict() parsed = urlparse(url) self.params = parse_query(parsed.query) self.subdomain = match.get("subdomain") self.video_id = None self.video_type = None self._channel_id = None self._channel = None self.clip_name = None if self.subdomain == "player": # pop-out player if self.params.get("video"): try: self.video_type = self.params["video"][0] self.video_id = self.params["video"][1:] except IndexError: self.logger.debug("Invalid video param: {0}", self.params["video"]) self._channel = self.params.get("channel") elif self.subdomain == "clips": # clip share URL self.clip_name = match.get("channel") else: self._channel = match.get("channel") and match.get("channel").lower() self.video_type = match.get("video_type") if match.get("videos_id"): self.video_type = "v" self.video_id = match.get("video_id") or match.get("videos_id") self.clip_name = match.get("clip_name") self.api = TwitchAPI(beta=self.subdomain == "beta", version=5) self.usher = UsherService()
def fetch(self, segment, retries=None): if self.closed or not retries: return try: headers = {} now = datetime.datetime.now(tz=utc) if segment.available_at > now: time_to_wait = (segment.available_at - now).total_seconds() fname = os.path.basename(urlparse(segment.url).path) log.debug("Waiting for segment: {fname} ({wait:.01f}s)".format(fname=fname, wait=time_to_wait)) sleep_until(segment.available_at) if segment.range: start, length = segment.range if length: end = start + length - 1 else: end = "" headers["Range"] = "bytes={0}-{1}".format(start, end) return self.session.http.get(segment.url, timeout=self.timeout, exception=StreamError, headers=headers) except StreamError as err: log.error("Failed to open segment {0}: {1}", segment.url, err) return self.fetch(segment, retries - 1)
def _get_streams_delfi(self, src): try: data = self.session.http.get(src, schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string(".//script[contains(text(),'embedJs.setAttribute(')][1]/text()"), validate.any(None, validate.all( validate.text, validate.transform(re.compile(r"embedJs\.setAttribute\('src',\s*'(.+?)'").search), validate.any(None, validate.all( validate.get(1), validate.transform(lambda url: parse_qsd(urlparse(url).fragment)), {"stream": validate.text}, validate.get("stream"), validate.parse_json(), {"versions": [{ "hls": validate.text }]}, validate.get("versions") )) )) )) except PluginError: log.error("Failed to get streams from iframe") return for stream in data: src = update_scheme("https://", stream["hls"], force=False) for s in HLSStream.parse_variant_playlist(self.session, src).items(): yield s
def _create_stream(self, stream, is_live): stream_name = "{0}p".format(stream["height"]) stream_type = stream["mediaType"] stream_url = stream["url"] if stream_type in ("hls", "mp4"): if urlparse(stream_url).path.endswith("m3u8"): try: streams = HLSStream.parse_variant_playlist( self.session, stream_url) # TODO: Replace with "yield from" when dropping Python 2. for stream in streams.items(): yield stream except IOError as err: self.logger.error("Failed to extract HLS streams: {0}", err) else: yield stream_name, HTTPStream(self.session, stream_url) elif stream_type == "rtmp": params = { "rtmp": stream["streamer"], "playpath": stream["url"], "swfVfy": SWF_URL, "pageUrl": self.url, } if is_live: params["live"] = True else: params["playpath"] = "mp4:{0}".format(params["playpath"]) stream = RTMPStream(self.session, params) yield stream_name, stream
def _get_stream_info(self, url): match = _url_re.match(url) user = match.group("user") live_channel = match.group("liveChannel") if user: video_id = self._find_channel_video() elif live_channel: return self._find_canonical_stream_info() else: video_id = match.group("video_id") if video_id == "live_stream": query_info = dict(parse_qsl(urlparse(url).query)) if "channel" in query_info: video_id = self._get_channel_video(query_info["channel"]) if not video_id: return params = { "video_id": video_id, "el": "player_embedded" } res = http.get(API_VIDEO_INFO, params=params, headers=HLS_HEADERS) return parse_query(res.text, name="config", schema=_config_schema)
def _get_vod(self, root): schema_vod = validate.Schema( validate.xml_xpath_string(".//script[@type='application/ld+json'][contains(text(),'VideoObject')][1]/text()"), validate.text, validate.transform(lambda jsonlike: re.sub(r"[\r\n]+", "", jsonlike)), validate.parse_json(), validate.any( validate.all( {"@graph": [dict]}, validate.get("@graph"), validate.filter(lambda obj: obj["@type"] == "VideoObject"), validate.get(0) ), dict ), {"contentUrl": validate.url()}, validate.get("contentUrl"), validate.transform(lambda content_url: update_scheme("https://", content_url)) ) try: vod = schema_vod.validate(root) except PluginError: return if urlparse(vod).path.endswith(".m3u8"): return HLSStream.parse_variant_playlist(self.session, vod) return {"vod": HTTPStream(self.session, vod)}
def _get_streams(self): self.session.http.headers.update({ "Referer": self.url, "User-Agent": useragents.FIREFOX }) iframe_url = None res = self.session.http.get(self.url) for iframe in itertags(res.text, "iframe"): if "embed.lsm.lv" in iframe.attributes.get("src"): iframe_url = iframe.attributes.get("src") break if not iframe_url: log.error("Could not find player iframe") return log.debug("Found iframe: {0}".format(iframe_url)) res = self.session.http.get(iframe_url) for source in itertags(res.text, "source"): if source.attributes.get("src"): stream_url = source.attributes.get("src") url_path = urlparse(stream_url).path if url_path.endswith(".m3u8"): for s in HLSStream.parse_variant_playlist(self.session, stream_url).items(): yield s else: log.debug("Not used URL path: {0}".format(url_path))
def _get_streams(self): api_url = self.session.http.get(self.url, schema=self._data_content_schema) if api_url and (api_url.startswith("/") or api_url.startswith("http")): api_url = urljoin(self.url, api_url) stream_url = self.session.http.get(api_url, schema=self._api_schema, headers={"Referer": self.url}) elif api_url and api_url.startswith("[{"): stream_url = self._api_schema.validate(api_url) else: if api_url is not None: log.error( "_data_content_schema returns invalid data: {0}".format( api_url)) return parsed = urlparse(stream_url) api_url = urljoin( self.url, self._token_api_path.format(url=stream_url, netloc="{0}://{1}".format( parsed.scheme, parsed.netloc), time=int(time()))) stream_url = self.session.http.get(api_url, schema=self._stream_schema, headers={"Referer": self.url}) return HLSStream.parse_variant_playlist(self.session, stream_url)
def _get_streams(self): if not self.login(self.get_option("email"), self.get_option("password")): raise PluginError("Login failed") try: start_point = int( float( dict(parse_qsl(urlparse(self.url).query)).get( "startPoint", 0.0))) if start_point > 0: log.info("Stream will start at {0}".format( seconds_to_hhmmss(start_point))) except ValueError: start_point = 0 content_id = self._get_video_id() if content_id: log.debug("Found content ID: {0}".format(content_id)) info = self._get_media_info(content_id) if info.get("hlsUrl"): for s in HLSStream.parse_variant_playlist( self.session, info["hlsUrl"], start_offset=start_point).items(): yield s else: log.error("Could not find the HLS URL")
def prepend_www(url): parsed = urlparse(url) if not parsed.netloc.startswith("www."): # noinspection PyProtectedMember parsed = parsed._replace(netloc="www.{0}".format(parsed.netloc)) return parsed.geturl()
def update_qsd(url, qsd=None, remove=None): """ Update or remove keys from a query string in a URL :param url: URL to update :param qsd: dict of keys to update, a None value leaves it unchanged :param remove: list of keys to remove, or "*" to remove all note: updated keys are never removed, even if unchanged :return: updated URL """ qsd = qsd or {} remove = remove or [] # parse current query string parsed = urlparse(url) current_qsd = OrderedDict(parse_qsl(parsed.query)) # * removes all possible keys if remove == "*": remove = list(current_qsd.keys()) # remove keys before updating, but leave updated keys untouched for key in remove: if key not in qsd: del current_qsd[key] # and update the query string for key, value in qsd.items(): if value: current_qsd[key] = value return parsed._replace(query=urlencode(current_qsd)).geturl()
def _parse_vod_streams(self, vod): for name, stream in vod["streams"].items(): scheme = urlparse(stream["url"]).scheme if scheme == "http": yield name, HLSStream(self.session, stream["url"]) elif scheme == "rtmp": yield name, self._create_rtmp_stream(stream, live=False)
def find_iframe(self, res): p = urlparse(self.url) for url in self.iframe_re.findall(res.text): if "googletagmanager" not in url: if url.startswith("//"): return "{0}:{1}".format(p.scheme, url) else: return url
def _get_streams(self): res = http.get(self.url, schema=_live_schema) if not res: return if res["type"] == "hls" and urlparse(res["url"]).path.endswith("m3u8"): stream = HLSStream(self.session, res["url"]) return dict(hls=stream)
def from_url(cls, session, url): purl = urlparse(url) querys = dict(parse_qsl(purl.query)) account_id, player_id, _ = purl.path.lstrip("/").split("/", 3) video_id = querys.get("videoId") bp = cls(session, account_id=account_id, player_id=player_id) return bp.get_streams(video_id)
def _get_streams(self): res = http.get(self.url, schema=_schema) if not res: return if res["type"] == "channel" and urlparse(res["url"]).path.endswith("m3u8"): return HLSStream.parse_variant_playlist(self.session, res["url"]) elif res["type"] == "video": stream = HTTPStream(self.session, res["url"]) return dict(video=stream)
def _get_video_streams(self, player): base_url = player["clip"]["baseUrl"] or VOD_BASE_URL mapper = StreamMapper( cmp=lambda ext, bitrate: urlparse(bitrate["url"]).path.endswith(ext) ) mapper.map(".m3u8", self._create_video_stream, HLSStream, base_url) mapper.map(".mp4", self._create_video_stream, HTTPStream, base_url) mapper.map(".flv", self._create_video_stream, HTTPStream, base_url) return mapper(player["clip"]["bitrates"])
def auth_url(self, url): parsed = urlparse(url) path, _ = parsed.path.rsplit("/", 1) token_res = self.session.http.get(self.token_url, params=dict(acl=path + "/*")) authparams = self.session.http.json(token_res, schema=self.token_schema) existing = dict(parse_qsl(parsed.query)) existing.update(dict(parse_qsl(authparams))) return urlunparse(parsed._replace(query=urlencode(existing)))
def get_stream_url(self, event_id): url_m = self.url_re.match(self.url) site = url_m.group(1) or url_m.group(2) api_url = self.api_url.format(id=event_id, site=site.upper()) self.logger.debug("Calling API: {0}", api_url) stream_url = http.get(api_url).text.strip("\"'") parsed = urlparse(stream_url) query = dict(parse_qsl(parsed.query)) return urlunparse(parsed._replace(query="")), query
def _britecove_params(self, url): res = http.get(url, headers={"User-Agent": useragents.FIREFOX, "Referer": self.url}) acc = self.account_id_re.search(res.text) pk = self.policy_key_re.search(res.text) query = dict(parse_qsl(urlparse(url).query)) return {"video_id": query.get("videoId"), "account_id": acc and acc.group(1), "policy_key": pk and pk.group(1), }
def _create_rtmp_stream(self, cdn, stream_name): parsed = urlparse(cdn) params = { "rtmp": cdn, "app": parsed.path[1:], "playpath": stream_name, "pageUrl": self.url, "swfUrl": SWF_URL, "live": True } return RTMPStream(self.session, params)
def join(url, other): # if the other URL is an absolute url, then return that if urlparse(other).scheme: return other elif url: parts = list(urlsplit(url)) if not parts[2].endswith("/"): parts[2] += "/" url = urlunsplit(parts) return urljoin(url, other) else: return other
def _extract_nonce(cls, http_result): """ Given an HTTP response from the sessino endpoint, extract the nonce, so we can "sign" requests with it. We don't really sign the requests in the traditional sense of a nonce, we just incude them in the auth requests. :param http_result: HTTP response from the bbc session endpoint. :type http_result: requests.Response :return: nonce to "sign" url requests with :rtype: string """ # Extract the redirect URL from the last call last_redirect_url = urlparse(http_result.history[-1].request.url) last_redirect_query = dict(parse_qsl(last_redirect_url.query)) # Extract the nonce from the query string in the redirect URL final_url = urlparse(last_redirect_query['goto']) goto_url = dict(parse_qsl(final_url.query)) goto_url_query = parse_json(goto_url['state']) # Return the nonce we can use for future queries return goto_url_query['nonce']
def _get_streams(self): params = dict(parse_qsl(urlparse(self.url).query)) vod_id = params.get("vod") match = _url_re.match(self.url) channel = match.group("channel") if vod_id: self.logger.debug("Looking for VOD {0} from channel: {1}", vod_id, channel) return self._get_vod_stream(vod_id) else: self.logger.debug("Looking for channel: {0}", channel) return self._get_live_stream(channel)