Ejemplo n.º 1
0
    def test_compare_url_path(self):
        test_url = "https://www.facebook.com/plugins/123.html"
        parse_new_url = urlparse(test_url)
        self.assertTrue(self.res_plugin.compare_url_path(parse_new_url, self.res_plugin.blacklist_path))

        test_url = "https://example.com/123.html"
        parse_new_url = urlparse(test_url)
        self.assertFalse(self.res_plugin.compare_url_path(parse_new_url, self.res_plugin.blacklist_path))
Ejemplo n.º 2
0
 def find_iframe(self, res):
     for url in self.iframe_re.findall(res.text):
         if url.startswith("//"):
             p = urlparse(self.url)
             return "{0}:{1}".format(p.scheme, url)
         else:
             return url
Ejemplo n.º 3
0
    def _get_streams(self):
        http.headers.update({
            "User-Agent": useragents.CHROME,
            "Referer": self.referer
        })
        fragment = dict(parse_qsl(urlparse(self.url).fragment))
        link = fragment.get("link")
        if not link:
            link = self._get_tv_link()

        if not link:
            self.logger.error("Missing link fragment: stream unavailable")
            return

        player_url = self._api_url.format(link)
        self.logger.debug("Requesting player API: {0} (referer={1})",
                          player_url, self.referer)
        res = http.get(player_url,
                       params={"_": int(time.time() * 1000)},
                       headers={"X-Requested-With": "XMLHttpRequest"})

        try:
            data = http.json(res, schema=self.api_schema)
        except PluginError as e:
            print(e)
            self.logger.error("Cannot play this stream type")
        else:
            if data["status"]:
                if data["file"].startswith("<"):
                    self.logger.error("Cannot play embedded streams")
                else:
                    return HLSStream.parse_variant_playlist(
                        self.session, data["file"])
            else:
                self.logger.error(data["text"])
Ejemplo n.º 4
0
def update_scheme(current, target):
    """
    Take the scheme from the current URL and applies it to the
    target URL if the target URL startswith // or is missing a scheme
    :param current: current URL
    :param target: target URL
    :return: target URL with the current URLs scheme
    """
    target_p = urlparse(target)
    if not target_p.scheme and target_p.netloc:
        return "{0}:{1}".format(urlparse(current).scheme, urlunparse(target_p))
    elif not target_p.scheme and not target_p.netloc:
        return "{0}://{1}".format(
            urlparse(current).scheme, urlunparse(target_p))
    else:
        return target
Ejemplo n.º 5
0
 def _find_iframe(self, res):
     iframe = self.iframe_re.search(res.text)
     url = iframe and iframe.group(1)
     if url and url.startswith("//"):
         p = urlparse(self.url)
         url = "{0}:{1}".format(p.scheme, url)
     return url
Ejemplo n.º 6
0
def filter_urlquery(url, keys=[], keys_status=False):
    """Removes unwanted urlquerys

    :param url: an URL
    :param keys: list of query names
    :param keys_status: False = removes querys that are in keys
                        True = allow only querys that are in keys
    :return: URL with filtered query
    """
    parts = urlparse(url)
    query_dict = dict(parse_qsl(parts.query))
    new_query_dict = {}

    for key in keys:
        try:
            if keys_status is True:
                new_query_dict[key] = query_dict[key]
            else:
                del query_dict[key]
        except KeyError:
            continue

    new_parts = list(parts)
    if keys_status is True:
        new_parts[4] = unquote(urlencode(new_query_dict))
    else:
        new_parts[4] = unquote(urlencode(query_dict))
    url = urlunparse(new_parts)
    return url
Ejemplo n.º 7
0
 def uri(self, uri):
     if uri and urlparse(uri).scheme:
         return uri
     elif self.base_uri and uri:
         return urljoin(self.base_uri, uri)
     else:
         return uri
Ejemplo n.º 8
0
def prepend_www(url):
    """Changes google.com to www.google.com"""
    parsed = urlparse(url)
    if parsed.netloc.split(".")[0] != "www":
        return parsed.scheme + "://www." + parsed.netloc + parsed.path
    else:
        return url
Ejemplo n.º 9
0
 def find_iframe(self, res):
     p = urlparse(self.url)
     for url in self.iframe_re.findall(res.text):
         if "googletagmanager" not in url:
             if url.startswith("//"):
                 return "{0}:{1}".format(p.scheme, url)
             else:
                 return url
Ejemplo n.º 10
0
    def _get_video_streams(self, player):
        base_url = player["clip"]["baseUrl"] or VOD_BASE_URL
        mapper = StreamMapper(cmp=lambda ext, bitrate: urlparse(bitrate["url"])
                              .path.endswith(ext))
        mapper.map(".m3u8", self._create_video_stream, HLSStream, base_url)
        mapper.map(".mp4", self._create_video_stream, HTTPStream, base_url)
        mapper.map(".flv", self._create_video_stream, HTTPStream, base_url)

        return mapper(player["clip"]["bitrates"])
Ejemplo n.º 11
0
    def test_compare_url_path(self):
        rr = Resolve("https://example.com")
        from livecli.compat import urlparse

        blacklist_path = [
            ("expressen.se", "/_livetvpreview/"),
            ("facebook.com", "/plugins"),
            ("vesti.ru", "/native_widget.html"),
        ]

        url_true = "https://www.facebook.com/plugins/123.html"
        url_false = "https://example.com/123.html"

        parse_new_url = urlparse(url_true)
        self.assertTrue(rr.compare_url_path(parse_new_url, blacklist_path))

        parse_new_url = urlparse(url_false)
        self.assertFalse(rr.compare_url_path(parse_new_url, blacklist_path))
Ejemplo n.º 12
0
    def from_url(cls, session, url):
        purl = urlparse(url)
        querys = dict(parse_qsl(purl.query))

        account_id, player_id, _ = purl.path.lstrip("/").split("/", 3)
        video_id = querys.get("videoId")

        bp = cls(session, account_id=account_id, player_id=player_id)
        return bp.get_streams(video_id)
Ejemplo n.º 13
0
    def auth_url(self, url):
        parsed = urlparse(url)
        path, _ = parsed.path.rsplit("/", 1)
        token_res = http.get(self.token_url, params=dict(acl=path + "/*"))
        authparams = http.json(token_res, schema=self.token_schema)

        existing = dict(parse_qsl(parsed.query))
        existing.update(dict(parse_qsl(authparams)))

        return urlunparse(parsed._replace(query=urlencode(existing)))
Ejemplo n.º 14
0
    def _get_streams(self):
        is_live = False

        info = self._get_stream_info(self.url)
        if not info:
            return

        if info.get("livestream") == '1' or info.get("live_playback") == '1':
            self.logger.debug("This video is live.")
            is_live = True

        formats = info.get("fmt_list")
        streams = {}
        protected = False
        for stream_info in info.get("url_encoded_fmt_stream_map", []):
            if stream_info.get("s"):
                protected = True
                continue

            stream = HTTPStream(self.session, stream_info["url"])
            name = formats.get(stream_info["itag"]) or stream_info["quality"]

            if stream_info.get("stereo3d"):
                name += "_3d"

            streams[name] = stream

        if is_live is False:
            streams, protected = self._create_adaptive_streams(
                info, streams, protected)

        hls_playlist = info.get("hlsvp")
        if hls_playlist:
            parsed = urlparse(self.url)
            params = parse_query(parsed.query)
            time_offset = params.get("t")
            if time_offset:
                self.session.set_option("hls-start-offset",
                                        time_to_offset(params.get("t")))

            try:
                hls_streams = HLSStream.parse_variant_playlist(
                    self.session,
                    hls_playlist,
                    headers=HLS_HEADERS,
                    namekey="pixels")
                streams.update(hls_streams)
            except IOError as err:
                self.logger.warning("Failed to extract HLS streams: {0}", err)

        if not streams and protected:
            raise PluginError("This plugin does not support protected videos, "
                              "try youtube-dl instead")

        return streams
Ejemplo n.º 15
0
    def get_stream_url(self, event_id):
        url_m = self.url_re.match(self.url)
        site = url_m.group(1) or url_m.group(2)
        api_url = self.api_url.format(id=event_id, site=site.upper())
        self.logger.debug("Calling API: {0}", api_url)

        stream_url = http.get(api_url).text.strip("\"'")

        parsed = urlparse(stream_url)
        query = dict(parse_qsl(parsed.query))
        return urlunparse(parsed._replace(query="")), query
Ejemplo n.º 16
0
    def _get_streams(self):
        res = http.get(self.url, schema=_schema)
        if not res:
            return

        if res["type"] == "channel" and urlparse(
                res["url"]).path.endswith("m3u8"):
            return HLSStream.parse_variant_playlist(self.session, res["url"])
        elif res["type"] == "video":
            stream = HTTPStream(self.session, res["url"])
            return dict(video=stream)
Ejemplo n.º 17
0
    def _extract_nonce(cls, http_result):
        """
        Given an HTTP response from the sessino endpoint, extract the nonce, so we can "sign" requests with it.
        We don't really sign the requests in the traditional sense of a nonce, we just incude them in the auth requests.

        :param http_result: HTTP response from the bbc session endpoint.
        :type http_result: requests.Response
        :return: nonce to "sign" url requests with
        :rtype: string
        """

        # Extract the redirect URL from the last call
        last_redirect_url = urlparse(http_result.history[-1].request.url)
        last_redirect_query = dict(parse_qsl(last_redirect_url.query))
        # Extract the nonce from the query string in the redirect URL
        final_url = urlparse(last_redirect_query['goto'])
        goto_url = dict(parse_qsl(final_url.query))
        goto_url_query = parse_json(goto_url['state'])

        # Return the nonce we can use for future queries
        return goto_url_query['nonce']
Ejemplo n.º 18
0
    def _get_streams(self):
        res = http.get(self.url)

        match = _meta_xmlurl_id_re.search(res.text)
        if not match:
            return

        xml_info_url = STREAMS_INFO_URL.format(match.group(1))
        video_info_res = http.get(xml_info_url)
        parsed_info = http.xml(video_info_res)

        live_el = parsed_info.find("live")
        live = live_el is not None and live_el.text == "1"

        streams = {}

        hdsurl_el = parsed_info.find("hdsurl")
        if hdsurl_el is not None and hdsurl_el.text is not None:
            hdsurl = hdsurl_el.text
            streams.update(HDSStream.parse_manifest(self.session, hdsurl))

        if live:
            vurls_el = parsed_info.find("vurls")
            if vurls_el is not None:
                for i, vurl_el in enumerate(vurls_el):
                    bitrate = vurl_el.get("bitrate")
                    name = bitrate + "k" if bitrate is not None else "rtmp{0}".format(
                        i)
                    params = {
                        "rtmp": vurl_el.text,
                    }
                    streams[name] = RTMPStream(self.session, params)

        parsed_urls = set()
        mobileurls_el = parsed_info.find("mobileurls")
        if mobileurls_el is not None:
            for mobileurl_el in mobileurls_el:
                text = mobileurl_el.text
                if not text:
                    continue

                if text in parsed_urls:
                    continue

                parsed_urls.add(text)
                url = urlparse(text)

                if url[0] == "http" and url[2].endswith("m3u8"):
                    streams.update(
                        HLSStream.parse_variant_playlist(self.session, text))

        return streams
Ejemplo n.º 19
0
 def follow_vk_redirect(cls, url):
     # If this is a 'videos' catalog URL with an video ID in the GET request, get that instead
     parsed_url = urlparse(url)
     if parsed_url.path.startswith('/videos-'):
         query = {v[0]: v[1] for v in [q.split('=') for q in parsed_url.query.split('&')] if v[0] == 'z'}
         try:
             true_path = unquote(query['z']).split('/')[0]
             return parsed_url.scheme + '://' + parsed_url.netloc + '/' + true_path
         except KeyError:
             # No redirect found in query string, so return the catalog url and fail later
             return url
     else:
         return url
Ejemplo n.º 20
0
    def _get_streams(self):
        params = dict(parse_qsl(urlparse(self.url).query))
        vod_id = params.get("vod")
        match = _url_re.match(self.url)
        channel = match.group("channel")

        if vod_id:
            self.logger.debug("Looking for VOD {0} from channel: {1}", vod_id,
                              channel)
            return self._get_vod_stream(vod_id)
        else:
            self.logger.debug("Looking for channel: {0}", channel)
            return self._get_live_stream(channel)
Ejemplo n.º 21
0
    def _britecove_params(self, url):
        res = http.get(url,
                       headers={
                           "User-Agent": useragents.FIREFOX,
                           "Referer": self.url
                       })
        acc = self.account_id_re.search(res.text)
        pk = self.policy_key_re.search(res.text)

        query = dict(parse_qsl(urlparse(url).query))
        return {
            "video_id": query.get("videoId"),
            "account_id": acc and acc.group(1),
            "policy_key": pk and pk.group(1),
        }
Ejemplo n.º 22
0
    def _get_streams(self):
        # Get the query string
        encrypted_data = urlparse(self.url).query
        data = base64.b64decode(encrypted_data)
        # and decrypt it
        passphrase = self.passphrase()
        if passphrase:
            params = decrypt_openssl(data, passphrase)
            config = parse_qsd(params.decode("utf8"))

            return HLSStream.parse_variant_playlist(
                self.session,
                self.stream_url.format(time=self.time,
                                       deviceId=self.device_id,
                                       token=self.get_token(**config),
                                       **config))
Ejemplo n.º 23
0
 def priority(cls, url):
     """
     Returns LOW priority if the URL is not prefixed with hds:// but ends with
     .f4m and return NORMAL priority if the URL is prefixed.
     :param url: the URL to find the plugin priority for
     :return: plugin priority for the given URL
     """
     m = cls._url_re.match(url)
     if m:
         prefix, url = cls._url_re.match(url).groups()
         url_path = urlparse(url).path
         if prefix is None and url_path.endswith(".f4m"):
             return LOW_PRIORITY
         elif prefix is not None:
             return NORMAL_PRIORITY
     return NO_PRIORITY
Ejemplo n.º 24
0
    def _get_stream_info(self, url):
        match = _url_re.match(url)
        user = match.group("user")
        live_channel = match.group("liveChannel")

        if user:
            video_id = self._find_channel_video()
        elif live_channel:
            return self._find_canonical_stream_info()
        else:
            video_id = match.group("video_id") or match.group("video_id_2")
            if video_id == "live_stream":
                query_info = dict(parse_qsl(urlparse(url).query))
                if "channel" in query_info:
                    video_id = self._get_channel_video(query_info["channel"])

        if not video_id:
            return

        # normal
        _params_1 = {"el": "detailpage"}
        # age restricted
        _params_2 = {"el": "embedded"}
        # embedded restricted
        _params_3 = {
            "eurl": "https://youtube.googleapis.com/v/{0}".format(video_id)
        }

        count = 0
        for _params in (_params_1, _params_2, _params_3):
            count += 1
            params = {"video_id": video_id}
            params.update(_params)

            res = http.get(API_VIDEO_INFO, params=params, headers=HLS_HEADERS)
            info_parsed = parse_query(res.text,
                                      name="config",
                                      schema=_config_schema)
            if info_parsed.get("status") == "fail":
                self.logger.debug("get_video_info - {0}: {1}".format(
                    count, info_parsed.get("reason")))
                continue
            self.stream_title = info_parsed.get("title")
            self.logger.debug("get_video_info - {0}: Found data".format(count))
            break

        return info_parsed
Ejemplo n.º 25
0
    def merge_path_list(self, static, user):
        """merge the static list, with an user list

        Args:
           static (list): static list from this plugin
           user (list): list from an user command

        Returns:
            A new valid list
        """
        for _path_url in user:
            if not _path_url.startswith(("http", "//")):
                _path_url = update_scheme("http://", _path_url)
            _parsed_path_url = urlparse(_path_url)
            if _parsed_path_url.netloc and _parsed_path_url.path:
                static += [(_parsed_path_url.netloc, _parsed_path_url.path)]
        return static
Ejemplo n.º 26
0
    def _resolve_playlist(self, res, playlist_all):
        """ yield for _resolve_res

        Args:
            res: Content from self._res_text
            playlist_all: List of streams

        Returns:
            yield every stream
        """
        for url in playlist_all:
            parsed_url = urlparse(url)
            if parsed_url.path.endswith((".m3u8")):
                try:
                    streams = HLSStream.parse_variant_playlist(self.session, url, headers=self.headers).items()
                    if not streams:
                        yield "live", HLSStream(self.session, url, headers=self.headers)
                    for s in streams:
                        yield s
                except Exception as e:
                    self.logger.error("Skipping hls_url - {0}".format(str(e)))
                    self.help_info_e(e)
            elif parsed_url.path.endswith((".f4m")):
                try:
                    for s in HDSStream.parse_manifest(self.session, url, headers=self.headers).items():
                        yield s
                except Exception as e:
                    self.logger.error("Skipping hds_url - {0}".format(str(e)))
                    self.help_info_e(e)
            elif parsed_url.path.endswith((".mp3", ".mp4")):
                try:
                    name = "live"
                    m = self._httpstream_bitrate_re.search(url)
                    if m:
                        name = "{0}k".format(m.group("bitrate"))
                    yield name, HTTPStream(self.session, url, headers=self.headers)
                except Exception as e:
                    self.logger.error("Skipping http_url - {0}".format(str(e)))
                    self.help_info_e(e)
            elif parsed_url.path.endswith((".mpd")):
                try:
                    self.logger.info("Found mpd: {0}".format(url))
                except Exception as e:
                    self.logger.error("Skipping mpd_url - {0}".format(str(e)))
                    self.help_info_e(e)
Ejemplo n.º 27
0
    def _get_streams(self):
        res = http.get(self.url, schema=_schema)
        streams = {}
        for url in res["urls"]:
            parsed = urlparse(url)
            if parsed.scheme.startswith("rtmp"):
                params = {"rtmp": url, "pageUrl": self.url, "live": True}
                if res["swf"]:
                    params["swfVfy"] = res["swf"]

                stream = RTMPStream(self.session, params)
                streams["live"] = stream
            elif parsed.scheme.startswith("http"):
                name = splitext(parsed.path)[1][1:]
                stream = HTTPStream(self.session, url)
                streams[name] = stream

        return streams
Ejemplo n.º 28
0
    def _create_stream(self, stream, language):
        stream_name = "{0}p".format(stream["height"])
        stream_type = stream["mediaType"]
        stream_url = stream["url"]
        stream_language = stream["versionShortLibelle"]

        if language == "de":
            language = ["DE", "VOST-DE", "VA", "VOA", "Dt. Live", "OV", "OmU"]
        elif language == "en":
            language = ["ANG", "VOST-ANG"]
        elif language == "es":
            language = ["ESP", "VOST-ESP"]
        elif language == "fr":
            language = [
                "FR", "VOST-FR", "VF", "VOF", "Frz. Live", "VO", "ST mal"
            ]
        elif language == "pl":
            language = ["POL", "VOST-POL"]

        if stream_language in language:
            if stream_type in ("hls", "mp4"):
                if urlparse(stream_url).path.endswith("m3u8"):
                    try:
                        streams = HLSStream.parse_variant_playlist(
                            self.session, stream_url)

                        for stream in streams.items():
                            yield stream
                    except IOError as err:
                        self.logger.error("Failed to extract HLS streams: {0}",
                                          err)
                else:
                    yield stream_name, HTTPStream(self.session, stream_url)

            elif stream_type == "f4m":
                try:
                    streams = HDSStream.parse_manifest(self.session,
                                                       stream_url)

                    for stream in streams.items():
                        yield stream
                except IOError as err:
                    self.logger.error("Failed to extract HDS streams: {0}",
                                      err)
Ejemplo n.º 29
0
    def get_video_id(self):
        parsed = urlparse(self.url)
        qinfo = dict(parse_qsl(parsed.query or parsed.fragment.lstrip("?")))

        site, video_id = None, None
        url_m = self.url_re.match(self.url)

        # look for the video id in the URL, otherwise find it in the page
        if "tvLiveId" in qinfo:
            video_id = qinfo["tvLiveId"]
            site = url_m.group(1)
        elif url_m.group(2):
            site, video_id = url_m.group(1), url_m.group(2)
        else:
            video_id_m = http.get(self.url, schema=self.video_id_schema)
            if video_id_m:
                site, video_id = video_id_m.groups()

        return site, video_id
Ejemplo n.º 30
0
    def _resolve_playlist(self, playlist_all):
        """ create streams

        Args:
            playlist_all: List of stream urls

        Returns:
            all streams
        """
        http.headers.update({"Referer": self.url})
        for url in playlist_all:
            parsed_url = urlparse(url)
            if parsed_url.path.endswith((".m3u8")):
                try:
                    streams = HLSStream.parse_variant_playlist(
                        self.session, url).items()
                    if not streams:
                        yield "live", HLSStream(self.session, url)
                    for s in streams:
                        yield s
                except Exception as e:
                    self.logger.error("Skipping hls_url - {0}".format(str(e)))
            elif parsed_url.path.endswith((".f4m")):
                try:
                    for s in HDSStream.parse_manifest(self.session,
                                                      url).items():
                        yield s
                except Exception as e:
                    self.logger.error("Skipping hds_url - {0}".format(str(e)))
            elif parsed_url.path.endswith((".mp3", ".mp4")):
                try:
                    name = "live"
                    m = self._httpstream_bitrate_re.search(url)
                    if m:
                        name = "{0}k".format(m.group("bitrate"))
                    yield name, HTTPStream(self.session, url)
                except Exception as e:
                    self.logger.error("Skipping http_url - {0}".format(str(e)))
            elif parsed_url.path.endswith((".mpd")):
                try:
                    self.logger.info("Found mpd: {0}".format(url))
                except Exception as e:
                    self.logger.error("Skipping mpd_url - {0}".format(str(e)))