def test_compare_url_path(self): test_url = "https://www.facebook.com/plugins/123.html" parse_new_url = urlparse(test_url) self.assertTrue(self.res_plugin.compare_url_path(parse_new_url, self.res_plugin.blacklist_path)) test_url = "https://example.com/123.html" parse_new_url = urlparse(test_url) self.assertFalse(self.res_plugin.compare_url_path(parse_new_url, self.res_plugin.blacklist_path))
def find_iframe(self, res): for url in self.iframe_re.findall(res.text): if url.startswith("//"): p = urlparse(self.url) return "{0}:{1}".format(p.scheme, url) else: return url
def _get_streams(self): http.headers.update({ "User-Agent": useragents.CHROME, "Referer": self.referer }) fragment = dict(parse_qsl(urlparse(self.url).fragment)) link = fragment.get("link") if not link: link = self._get_tv_link() if not link: self.logger.error("Missing link fragment: stream unavailable") return player_url = self._api_url.format(link) self.logger.debug("Requesting player API: {0} (referer={1})", player_url, self.referer) res = http.get(player_url, params={"_": int(time.time() * 1000)}, headers={"X-Requested-With": "XMLHttpRequest"}) try: data = http.json(res, schema=self.api_schema) except PluginError as e: print(e) self.logger.error("Cannot play this stream type") else: if data["status"]: if data["file"].startswith("<"): self.logger.error("Cannot play embedded streams") else: return HLSStream.parse_variant_playlist( self.session, data["file"]) else: self.logger.error(data["text"])
def update_scheme(current, target): """ Take the scheme from the current URL and applies it to the target URL if the target URL startswith // or is missing a scheme :param current: current URL :param target: target URL :return: target URL with the current URLs scheme """ target_p = urlparse(target) if not target_p.scheme and target_p.netloc: return "{0}:{1}".format(urlparse(current).scheme, urlunparse(target_p)) elif not target_p.scheme and not target_p.netloc: return "{0}://{1}".format( urlparse(current).scheme, urlunparse(target_p)) else: return target
def _find_iframe(self, res): iframe = self.iframe_re.search(res.text) url = iframe and iframe.group(1) if url and url.startswith("//"): p = urlparse(self.url) url = "{0}:{1}".format(p.scheme, url) return url
def filter_urlquery(url, keys=[], keys_status=False): """Removes unwanted urlquerys :param url: an URL :param keys: list of query names :param keys_status: False = removes querys that are in keys True = allow only querys that are in keys :return: URL with filtered query """ parts = urlparse(url) query_dict = dict(parse_qsl(parts.query)) new_query_dict = {} for key in keys: try: if keys_status is True: new_query_dict[key] = query_dict[key] else: del query_dict[key] except KeyError: continue new_parts = list(parts) if keys_status is True: new_parts[4] = unquote(urlencode(new_query_dict)) else: new_parts[4] = unquote(urlencode(query_dict)) url = urlunparse(new_parts) return url
def uri(self, uri): if uri and urlparse(uri).scheme: return uri elif self.base_uri and uri: return urljoin(self.base_uri, uri) else: return uri
def prepend_www(url): """Changes google.com to www.google.com""" parsed = urlparse(url) if parsed.netloc.split(".")[0] != "www": return parsed.scheme + "://www." + parsed.netloc + parsed.path else: return url
def find_iframe(self, res): p = urlparse(self.url) for url in self.iframe_re.findall(res.text): if "googletagmanager" not in url: if url.startswith("//"): return "{0}:{1}".format(p.scheme, url) else: return url
def _get_video_streams(self, player): base_url = player["clip"]["baseUrl"] or VOD_BASE_URL mapper = StreamMapper(cmp=lambda ext, bitrate: urlparse(bitrate["url"]) .path.endswith(ext)) mapper.map(".m3u8", self._create_video_stream, HLSStream, base_url) mapper.map(".mp4", self._create_video_stream, HTTPStream, base_url) mapper.map(".flv", self._create_video_stream, HTTPStream, base_url) return mapper(player["clip"]["bitrates"])
def test_compare_url_path(self): rr = Resolve("https://example.com") from livecli.compat import urlparse blacklist_path = [ ("expressen.se", "/_livetvpreview/"), ("facebook.com", "/plugins"), ("vesti.ru", "/native_widget.html"), ] url_true = "https://www.facebook.com/plugins/123.html" url_false = "https://example.com/123.html" parse_new_url = urlparse(url_true) self.assertTrue(rr.compare_url_path(parse_new_url, blacklist_path)) parse_new_url = urlparse(url_false) self.assertFalse(rr.compare_url_path(parse_new_url, blacklist_path))
def from_url(cls, session, url): purl = urlparse(url) querys = dict(parse_qsl(purl.query)) account_id, player_id, _ = purl.path.lstrip("/").split("/", 3) video_id = querys.get("videoId") bp = cls(session, account_id=account_id, player_id=player_id) return bp.get_streams(video_id)
def auth_url(self, url): parsed = urlparse(url) path, _ = parsed.path.rsplit("/", 1) token_res = http.get(self.token_url, params=dict(acl=path + "/*")) authparams = http.json(token_res, schema=self.token_schema) existing = dict(parse_qsl(parsed.query)) existing.update(dict(parse_qsl(authparams))) return urlunparse(parsed._replace(query=urlencode(existing)))
def _get_streams(self): is_live = False info = self._get_stream_info(self.url) if not info: return if info.get("livestream") == '1' or info.get("live_playback") == '1': self.logger.debug("This video is live.") is_live = True formats = info.get("fmt_list") streams = {} protected = False for stream_info in info.get("url_encoded_fmt_stream_map", []): if stream_info.get("s"): protected = True continue stream = HTTPStream(self.session, stream_info["url"]) name = formats.get(stream_info["itag"]) or stream_info["quality"] if stream_info.get("stereo3d"): name += "_3d" streams[name] = stream if is_live is False: streams, protected = self._create_adaptive_streams( info, streams, protected) hls_playlist = info.get("hlsvp") if hls_playlist: parsed = urlparse(self.url) params = parse_query(parsed.query) time_offset = params.get("t") if time_offset: self.session.set_option("hls-start-offset", time_to_offset(params.get("t"))) try: hls_streams = HLSStream.parse_variant_playlist( self.session, hls_playlist, headers=HLS_HEADERS, namekey="pixels") streams.update(hls_streams) except IOError as err: self.logger.warning("Failed to extract HLS streams: {0}", err) if not streams and protected: raise PluginError("This plugin does not support protected videos, " "try youtube-dl instead") return streams
def get_stream_url(self, event_id): url_m = self.url_re.match(self.url) site = url_m.group(1) or url_m.group(2) api_url = self.api_url.format(id=event_id, site=site.upper()) self.logger.debug("Calling API: {0}", api_url) stream_url = http.get(api_url).text.strip("\"'") parsed = urlparse(stream_url) query = dict(parse_qsl(parsed.query)) return urlunparse(parsed._replace(query="")), query
def _get_streams(self): res = http.get(self.url, schema=_schema) if not res: return if res["type"] == "channel" and urlparse( res["url"]).path.endswith("m3u8"): return HLSStream.parse_variant_playlist(self.session, res["url"]) elif res["type"] == "video": stream = HTTPStream(self.session, res["url"]) return dict(video=stream)
def _extract_nonce(cls, http_result): """ Given an HTTP response from the sessino endpoint, extract the nonce, so we can "sign" requests with it. We don't really sign the requests in the traditional sense of a nonce, we just incude them in the auth requests. :param http_result: HTTP response from the bbc session endpoint. :type http_result: requests.Response :return: nonce to "sign" url requests with :rtype: string """ # Extract the redirect URL from the last call last_redirect_url = urlparse(http_result.history[-1].request.url) last_redirect_query = dict(parse_qsl(last_redirect_url.query)) # Extract the nonce from the query string in the redirect URL final_url = urlparse(last_redirect_query['goto']) goto_url = dict(parse_qsl(final_url.query)) goto_url_query = parse_json(goto_url['state']) # Return the nonce we can use for future queries return goto_url_query['nonce']
def _get_streams(self): res = http.get(self.url) match = _meta_xmlurl_id_re.search(res.text) if not match: return xml_info_url = STREAMS_INFO_URL.format(match.group(1)) video_info_res = http.get(xml_info_url) parsed_info = http.xml(video_info_res) live_el = parsed_info.find("live") live = live_el is not None and live_el.text == "1" streams = {} hdsurl_el = parsed_info.find("hdsurl") if hdsurl_el is not None and hdsurl_el.text is not None: hdsurl = hdsurl_el.text streams.update(HDSStream.parse_manifest(self.session, hdsurl)) if live: vurls_el = parsed_info.find("vurls") if vurls_el is not None: for i, vurl_el in enumerate(vurls_el): bitrate = vurl_el.get("bitrate") name = bitrate + "k" if bitrate is not None else "rtmp{0}".format( i) params = { "rtmp": vurl_el.text, } streams[name] = RTMPStream(self.session, params) parsed_urls = set() mobileurls_el = parsed_info.find("mobileurls") if mobileurls_el is not None: for mobileurl_el in mobileurls_el: text = mobileurl_el.text if not text: continue if text in parsed_urls: continue parsed_urls.add(text) url = urlparse(text) if url[0] == "http" and url[2].endswith("m3u8"): streams.update( HLSStream.parse_variant_playlist(self.session, text)) return streams
def follow_vk_redirect(cls, url): # If this is a 'videos' catalog URL with an video ID in the GET request, get that instead parsed_url = urlparse(url) if parsed_url.path.startswith('/videos-'): query = {v[0]: v[1] for v in [q.split('=') for q in parsed_url.query.split('&')] if v[0] == 'z'} try: true_path = unquote(query['z']).split('/')[0] return parsed_url.scheme + '://' + parsed_url.netloc + '/' + true_path except KeyError: # No redirect found in query string, so return the catalog url and fail later return url else: return url
def _get_streams(self): params = dict(parse_qsl(urlparse(self.url).query)) vod_id = params.get("vod") match = _url_re.match(self.url) channel = match.group("channel") if vod_id: self.logger.debug("Looking for VOD {0} from channel: {1}", vod_id, channel) return self._get_vod_stream(vod_id) else: self.logger.debug("Looking for channel: {0}", channel) return self._get_live_stream(channel)
def _britecove_params(self, url): res = http.get(url, headers={ "User-Agent": useragents.FIREFOX, "Referer": self.url }) acc = self.account_id_re.search(res.text) pk = self.policy_key_re.search(res.text) query = dict(parse_qsl(urlparse(url).query)) return { "video_id": query.get("videoId"), "account_id": acc and acc.group(1), "policy_key": pk and pk.group(1), }
def _get_streams(self): # Get the query string encrypted_data = urlparse(self.url).query data = base64.b64decode(encrypted_data) # and decrypt it passphrase = self.passphrase() if passphrase: params = decrypt_openssl(data, passphrase) config = parse_qsd(params.decode("utf8")) return HLSStream.parse_variant_playlist( self.session, self.stream_url.format(time=self.time, deviceId=self.device_id, token=self.get_token(**config), **config))
def priority(cls, url): """ Returns LOW priority if the URL is not prefixed with hds:// but ends with .f4m and return NORMAL priority if the URL is prefixed. :param url: the URL to find the plugin priority for :return: plugin priority for the given URL """ m = cls._url_re.match(url) if m: prefix, url = cls._url_re.match(url).groups() url_path = urlparse(url).path if prefix is None and url_path.endswith(".f4m"): return LOW_PRIORITY elif prefix is not None: return NORMAL_PRIORITY return NO_PRIORITY
def _get_stream_info(self, url): match = _url_re.match(url) user = match.group("user") live_channel = match.group("liveChannel") if user: video_id = self._find_channel_video() elif live_channel: return self._find_canonical_stream_info() else: video_id = match.group("video_id") or match.group("video_id_2") if video_id == "live_stream": query_info = dict(parse_qsl(urlparse(url).query)) if "channel" in query_info: video_id = self._get_channel_video(query_info["channel"]) if not video_id: return # normal _params_1 = {"el": "detailpage"} # age restricted _params_2 = {"el": "embedded"} # embedded restricted _params_3 = { "eurl": "https://youtube.googleapis.com/v/{0}".format(video_id) } count = 0 for _params in (_params_1, _params_2, _params_3): count += 1 params = {"video_id": video_id} params.update(_params) res = http.get(API_VIDEO_INFO, params=params, headers=HLS_HEADERS) info_parsed = parse_query(res.text, name="config", schema=_config_schema) if info_parsed.get("status") == "fail": self.logger.debug("get_video_info - {0}: {1}".format( count, info_parsed.get("reason"))) continue self.stream_title = info_parsed.get("title") self.logger.debug("get_video_info - {0}: Found data".format(count)) break return info_parsed
def merge_path_list(self, static, user): """merge the static list, with an user list Args: static (list): static list from this plugin user (list): list from an user command Returns: A new valid list """ for _path_url in user: if not _path_url.startswith(("http", "//")): _path_url = update_scheme("http://", _path_url) _parsed_path_url = urlparse(_path_url) if _parsed_path_url.netloc and _parsed_path_url.path: static += [(_parsed_path_url.netloc, _parsed_path_url.path)] return static
def _resolve_playlist(self, res, playlist_all): """ yield for _resolve_res Args: res: Content from self._res_text playlist_all: List of streams Returns: yield every stream """ for url in playlist_all: parsed_url = urlparse(url) if parsed_url.path.endswith((".m3u8")): try: streams = HLSStream.parse_variant_playlist(self.session, url, headers=self.headers).items() if not streams: yield "live", HLSStream(self.session, url, headers=self.headers) for s in streams: yield s except Exception as e: self.logger.error("Skipping hls_url - {0}".format(str(e))) self.help_info_e(e) elif parsed_url.path.endswith((".f4m")): try: for s in HDSStream.parse_manifest(self.session, url, headers=self.headers).items(): yield s except Exception as e: self.logger.error("Skipping hds_url - {0}".format(str(e))) self.help_info_e(e) elif parsed_url.path.endswith((".mp3", ".mp4")): try: name = "live" m = self._httpstream_bitrate_re.search(url) if m: name = "{0}k".format(m.group("bitrate")) yield name, HTTPStream(self.session, url, headers=self.headers) except Exception as e: self.logger.error("Skipping http_url - {0}".format(str(e))) self.help_info_e(e) elif parsed_url.path.endswith((".mpd")): try: self.logger.info("Found mpd: {0}".format(url)) except Exception as e: self.logger.error("Skipping mpd_url - {0}".format(str(e))) self.help_info_e(e)
def _get_streams(self): res = http.get(self.url, schema=_schema) streams = {} for url in res["urls"]: parsed = urlparse(url) if parsed.scheme.startswith("rtmp"): params = {"rtmp": url, "pageUrl": self.url, "live": True} if res["swf"]: params["swfVfy"] = res["swf"] stream = RTMPStream(self.session, params) streams["live"] = stream elif parsed.scheme.startswith("http"): name = splitext(parsed.path)[1][1:] stream = HTTPStream(self.session, url) streams[name] = stream return streams
def _create_stream(self, stream, language): stream_name = "{0}p".format(stream["height"]) stream_type = stream["mediaType"] stream_url = stream["url"] stream_language = stream["versionShortLibelle"] if language == "de": language = ["DE", "VOST-DE", "VA", "VOA", "Dt. Live", "OV", "OmU"] elif language == "en": language = ["ANG", "VOST-ANG"] elif language == "es": language = ["ESP", "VOST-ESP"] elif language == "fr": language = [ "FR", "VOST-FR", "VF", "VOF", "Frz. Live", "VO", "ST mal" ] elif language == "pl": language = ["POL", "VOST-POL"] if stream_language in language: if stream_type in ("hls", "mp4"): if urlparse(stream_url).path.endswith("m3u8"): try: streams = HLSStream.parse_variant_playlist( self.session, stream_url) for stream in streams.items(): yield stream except IOError as err: self.logger.error("Failed to extract HLS streams: {0}", err) else: yield stream_name, HTTPStream(self.session, stream_url) elif stream_type == "f4m": try: streams = HDSStream.parse_manifest(self.session, stream_url) for stream in streams.items(): yield stream except IOError as err: self.logger.error("Failed to extract HDS streams: {0}", err)
def get_video_id(self): parsed = urlparse(self.url) qinfo = dict(parse_qsl(parsed.query or parsed.fragment.lstrip("?"))) site, video_id = None, None url_m = self.url_re.match(self.url) # look for the video id in the URL, otherwise find it in the page if "tvLiveId" in qinfo: video_id = qinfo["tvLiveId"] site = url_m.group(1) elif url_m.group(2): site, video_id = url_m.group(1), url_m.group(2) else: video_id_m = http.get(self.url, schema=self.video_id_schema) if video_id_m: site, video_id = video_id_m.groups() return site, video_id
def _resolve_playlist(self, playlist_all): """ create streams Args: playlist_all: List of stream urls Returns: all streams """ http.headers.update({"Referer": self.url}) for url in playlist_all: parsed_url = urlparse(url) if parsed_url.path.endswith((".m3u8")): try: streams = HLSStream.parse_variant_playlist( self.session, url).items() if not streams: yield "live", HLSStream(self.session, url) for s in streams: yield s except Exception as e: self.logger.error("Skipping hls_url - {0}".format(str(e))) elif parsed_url.path.endswith((".f4m")): try: for s in HDSStream.parse_manifest(self.session, url).items(): yield s except Exception as e: self.logger.error("Skipping hds_url - {0}".format(str(e))) elif parsed_url.path.endswith((".mp3", ".mp4")): try: name = "live" m = self._httpstream_bitrate_re.search(url) if m: name = "{0}k".format(m.group("bitrate")) yield name, HTTPStream(self.session, url) except Exception as e: self.logger.error("Skipping http_url - {0}".format(str(e))) elif parsed_url.path.endswith((".mpd")): try: self.logger.info("Found mpd: {0}".format(url)) except Exception as e: self.logger.error("Skipping mpd_url - {0}".format(str(e)))