Python urljoin Examples, livecli.compat.urljoin Python Examples

Example #1

0

Show file

File: liveedu.py Project: persianpros/livecli

    def _get_streams(self):
        """
        Get the config object from the page source and call the
        API to get the list of streams
        :return:
        """
        # attempt a login
        self.login()

        res = http.get(self.url)
        # decode the config for the page
        matches = self.config_re.finditer(res.text)
        try:
            config = self.config_schema.validate(
                dict([m.group("key", "value") for m in matches]))
        except PluginError:
            return

        if config["selectedVideoHID"]:
            self.logger.debug("Found video hash ID: {0}",
                              config["selectedVideoHID"])
            api_url = urljoin(
                self.url,
                urljoin(config["videosURL"], config["selectedVideoHID"]))
        elif config["livestreamURL"]:
            self.logger.debug("Found live stream URL: {0}",
                              config["livestreamURL"])
            api_url = urljoin(self.url, config["livestreamURL"])
        else:
            return

        ares = http.get(api_url)
        data = http.json(ares, schema=self.api_schema)
        viewing_urls = data["viewing_urls"]

        if "error" in viewing_urls:
            self.logger.error("Failed to load streams: {0}",
                              viewing_urls["error"])
        else:
            for url in viewing_urls["urls"]:
                try:
                    label = "{0}p".format(url.get("res", url["label"]))
                except KeyError:
                    label = "live"

                if url["type"] == "rtmp/mp4" and RTMPStream.is_usable(
                        self.session):
                    params = {
                        "rtmp": url["src"],
                        "pageUrl": self.url,
                        "live": True,
                    }
                    yield label, RTMPStream(self.session, params)

                elif url["type"] == "application/x-mpegURL":
                    for s in HLSStream.parse_variant_playlist(
                            self.session, url["src"]).items():
                        yield s

Example #2

0

Show file

File: dogan.py Project: thawtes/ipk

    def _get_hls_url(self, content_id):
        # make the api url relative to the current domain
        if "cnnturk" in self.url or "teve2.com.tr" in self.url:
            self.logger.debug("Using new content API url")
            api_url = urljoin(self.url, self.new_content_api.format(id=content_id))
        else:
            api_url = urljoin(self.url, self.content_api.format(id=content_id))

        apires = http.get(api_url)

        stream_data = http.json(apires, schema=self.content_api_schema)
        d = stream_data["Media"]["Link"]
        return urljoin((d["ServiceUrl"] or d["DefaultServiceUrl"]), d["SecurePath"])

Example #3

0

Show file

File: ard_live.py Project: persianpros/livecli

    def _get_streams(self):
        data_url = http.get(self.url, schema=self._player_url_schema)
        if data_url:
            res = http.get(urljoin(self.url, data_url))
            stream_info = http.xml(res, schema=self._livestream_schema)

            for stream in stream_info:
                url = stream["url"]
                try:
                    if ".m3u8" in url:
                        for s in HLSStream.parse_variant_playlist(
                                self.session, url, name_key="bitrate").items():
                            yield s
                    elif ".f4m" in url:
                        for s in HDSStream.parse_manifest(
                                self.session,
                                url,
                                pvswf=self.swf_url,
                                is_akamai=True).items():
                            yield s
                    elif ".mp4" in url:
                        yield "{0}k".format(stream["bitrate"]), HTTPStream(
                            self.session, url)
                except IOError as err:
                    self.logger.warning("Error parsing stream: {0}", err)

Example #4

0

Show file

File: hls_playlist.py Project: longsack/livecli

 def uri(self, uri):
     if uri and urlparse(uri).scheme:
         return uri
     elif self.base_uri and uri:
         return urljoin(self.base_uri, uri)
     else:
         return uri

Example #5

0

Show file

File: livestream.py Project: persianpros/livecli

    def _parse_smil(self, url, swf_url):
        res = http.get(url)
        smil = http.xml(res, "SMIL config", schema=_smil_schema)

        for src, bitrate in smil["videos"]:
            url = urljoin(smil["http_base"], src)
            yield bitrate, AkamaiHDStream(self.session, url, swf=swf_url)

Example #6

0

Show file

File: mixer.py Project: persianpros/livecli

    def _get_vod_stream(self, vod_id):
        res = self._get_api_res("recordings", vod_id)

        for sdata in http.json(res, schema=self._vod_schema):
            if sdata["format"] == "hls":
                hls_url = urljoin(sdata["url"], "manifest.m3u8")
                yield "{0}p".format(sdata["height"]), HLSStream(
                    self.session, hls_url)

Example #7

0

Show file

 def repair_url(self, url, base_url, stream_base):
     """repair a broken url"""
     # remove \
     new_url = url.replace("\\", "")
     # repairs broken scheme
     if new_url.startswith("http&#58;//"):
         new_url = "http:" + new_url[9:]
     elif new_url.startswith("https&#58;//"):
         new_url = "https:" + new_url[10:]
     # creates a valid url from path only urls and adds missing scheme for // urls
     if stream_base and new_url[1] is not "/":
         if new_url[0] is "/":
             new_url = new_url[1:]
         new_url = urljoin(stream_base, new_url)
     else:
         new_url = urljoin(base_url, new_url)
     return new_url

Example #8

0

Show file

File: mitele.py Project: persianpros/livecli

    def create_hls_url(self, suffix):
        """
        creates a valid hls_url
        :param suffix: url session params
        :return: hls_url
        """
        m = self._suffix_re.search(suffix)
        if not m:
            return
        self.logger.debug("create hls_url from suffix")

        channel = self._url_re.match(self.url).group("channel")
        domain = self._channel_domains[channel]
        path = m.group("path")
        path_suffix = "master.m3u8?{suffix}".format(suffix=suffix)

        hls_url = urljoin(domain, path)
        hls_url = urljoin(hls_url, path_suffix)
        return hls_url

Example #9

0

Show file

    def _get_streams(self):
        res = http.get(self.url, headers={"User-Agent": useragents.CHROME})
        m = self.js_re.search(res.text)
        if m:
            self.logger.debug("Found js key: {0}", m.group(1))
            js_url = m.group(0)
            res = http.get(urljoin(self.url, js_url))

            for url in self.player_re.findall(res.text):
                if "adblock" not in url:
                    yield "live", HLSStream(self.session, url)

Example #10

0

Show file

    def find_videopage(self):
        self.logger.debug("Not a videopage")
        res = http.get(self.url)

        m = self._videopage_re.search(res.text)
        if not m:
            self.logger.debug(
                "No stream path, stream might be offline or invalid url.")
            raise NoStreamsError(self.url)

        path = m.group("path")
        self.logger.debug("Found new path: {0}".format(path))
        return urljoin(self.url, path)

Example #11

0

Show file

File: ssh101.py Project: persianpros/livecli

    def _get_streams(self):
        res = http.get(self.url)

        # some pages have embedded players
        iframe_m = self.iframe_re.search(res.text)
        if iframe_m:
            url = urljoin(self.url, iframe_m.group("url"))
            res = http.get(url)

        video = self.src_re.search(res.text)
        stream_src = video and video.group("url")

        if stream_src and stream_src.endswith("m3u8"):
            return HLSStream.parse_variant_playlist(self.session, stream_src)

Example #12

0

Show file

File: brittv.py Project: longsack/livecli

 def _get_streams(self):
     http.headers.update({"User-Agent": useragents.CHROME})
     res = http.get(self.url)
     self.logger.debug("search for js_re")
     m = self.js_re.search(res.text)
     if m:
         self.logger.debug("Found js key: {0}", m.group(1))
         js_url = m.group(0)
         http.headers.update({"Referer": self.url})
         res = http.get(urljoin(self.url, js_url))
         self.logger.debug("search for player_re")
         for url in self.player_re.findall(res.text):
             self.logger.debug("Found url: {0}".format(url))
             if "adblock" not in url:
                 yield "live", HLSStream(self.session, url)

Example #13

0

Show file

    def _get_streams(self):
        # Get the stream type from the url (tv/radio).
        stream_type = _url_re.match(self.url).group(1).upper()
        cookie = {"NRK_PLAYER_SETTINGS_{0}".format(stream_type): COOKIE_PARAMS}

        # Construct API URL for this program.
        baseurl = http.get(self.url, cookies=cookie, schema=_schema)
        program_id = _id_re.search(self.url).group(1)

        # Extract media URL.
        json_url = urljoin(baseurl, "mediaelement/{0}".format(program_id))
        res = http.get(json_url, cookies=cookie)
        media_element = http.json(res, schema=_mediaelement_schema)
        media_url = media_element["mediaUrl"]

        return HLSStream.parse_variant_playlist(self.session, media_url)

Example #14

0

Show file

    def _get_streams(self):
        page = http.get(self.url, schema=_schema)
        if not page:
            return

        pubkey_pem = get_public_key(self.cache, urljoin(self.url, page["clientlibs"]))
        if not pubkey_pem:
            raise PluginError("Unable to get public key")

        flashvars = page["flashvars"]

        params = {
            "cashPath": int(time.time() * 1000)
        }
        res = http.get(urljoin(self.url, flashvars["country"]), params=params)
        if not res:
            return
        language = http.xml(res, schema=_language_schema)

        api_params = {}
        for key in ("ss_id", "mv_id", "device_cd", "ss1_prm", "ss2_prm", "ss3_prm"):
            if flashvars.get(key, ""):
                api_params[key] = flashvars[key]

        aeskey = crypto_number.long_to_bytes(random.getrandbits(8 * 32), 32)

        params = {
            "s": flashvars["s"],
            "c": language,
            "e": self.url,
            "d": aes_encrypt(aeskey, json.dumps(api_params)),
            "a": rsa_encrypt(pubkey_pem, aeskey)
        }
        res = http.get(urljoin(self.url, flashvars["init"]), params=params)
        if not res:
            return
        rtn = http.json(res, schema=_init_schema)
        if not rtn:
            return

        init_data = parse_json(aes_decrypt(aeskey, rtn))

        parsed = urlparse(init_data["play_url"])
        if parsed.scheme != "https" or not parsed.path.startswith("/i/") or not parsed.path.endswith("/master.m3u8"):
            return
        hlsstream_url = init_data["play_url"]

        streams = HLSStream.parse_variant_playlist(self.session, hlsstream_url)

        if "caption_url" in init_data:
            if self.get_option("mux_subtitles") and FFMPEGMuxer.is_usable(self.session):
                res = http.get(init_data["caption_url"])
                srt = http.xml(res, ignore_ns=True, schema=_xml_to_srt_schema)
                subfiles = []
                metadata = {}
                for i, lang, srt in ((i, s[0], s[1]) for i, s in enumerate(srt)):
                    subfile = tempfile.TemporaryFile()
                    subfile.write(srt.encode("utf8"))
                    subfile.seek(0)
                    subfiles.append(FileStream(self.session, fileobj=subfile))
                    metadata["s:s:{0}".format(i)] = ["language={0}".format(lang)]

                for n, s in streams.items():
                    yield n, MuxedStream(self.session, s, *subfiles,
                                         maps=list(range(0, len(metadata) + 1)),
                                         metadata=metadata)
                return
            else:
                self.logger.info("Subtitles: {0}".format(init_data["caption_url"]))

        for s in streams.items():
            yield s

Example #15

0

Show file

File: ustreamtv.py Project: persianpros/livecli

 def host(self):
     host = self._host or self.API_URL.format(randint(
         0, 0xffffff), self.media_id, self.application,
                                              "lp-" + self._cluster)
     return urljoin(host, "/1/ustream")

Example #16

0

Show file

    def _make_url_list(self, old_list, base_url, url_type="", stream_base=""):
        """Creates a list of validate urls from a list of broken urls
           and removes every blacklisted url

        Args:
            old_list: List of broken urls
            base_url: url that will get used for scheme and netloc
            url_type: can be iframe or playlist
                - iframe is used for
                    --resolve-whitelist-netloc
                - playlist is not used at the moment
            stream_base: basically same as base_url, but used for .f4m files.

        Returns:
            List of validate urls
        """
        blacklist_netloc_user = self.get_option("blacklist_netloc")
        blacklist_netloc = (
            "127.0.0.1",
            "about:blank",
            "abv.bg",
            "adfox.ru",
            "googletagmanager.com",
            "javascript:false",
        )
        whitelist_netloc_user = self.get_option("whitelist_netloc")

        blacklist_path = [
            ("expressen.se", "/_livetvpreview/"),
            ("facebook.com", "/plugins"),
            ("vesti.ru", "/native_widget.html"),
        ]
        # Add --resolve-blacklist-path to blacklist_path
        blacklist_path_user = self.get_option("blacklist_path")
        if blacklist_path_user is not None:
            blacklist_path = self.merge_path_list(blacklist_path, blacklist_path_user)

        whitelist_path = []
        whitelist_path_user = self.get_option("whitelist_path")
        if whitelist_path_user is not None:
            whitelist_path = self.merge_path_list(whitelist_path, whitelist_path_user)

        blacklist_endswith = (
            ".gif",
            ".jpg",
            ".png",
            ".svg",
            ".vtt",
            "/chat.html",
            "/chat",
        )

        new_list = []
        for url in old_list:
            # Don't add the same url as self.url to the list.
            if url == self.url:
                continue
            # Repair the scheme
            new_url = url.replace("\\", "")
            if new_url.startswith("http&#58;//"):
                new_url = "http:" + new_url[9:]
            elif new_url.startswith("https&#58;//"):
                new_url = "https:" + new_url[10:]
            # Repair the domain
            if stream_base and new_url[1] is not "/":
                if new_url[0] is "/":
                    new_url = new_url[1:]
                new_url = urljoin(stream_base, new_url)
            else:
                new_url = urljoin(base_url, new_url)
            # Parse the url and remove not wanted urls
            parse_new_url = urlparse(new_url)

            REMOVE = False

            # sorted after the way livecli will try to remove an url
            status_remove = [
                "WL-netloc",  # - Allow only whitelisted domains --resolve-whitelist-netloc
                "WL-path",    # - Allow only whitelisted paths from a domain --resolve-whitelist-path
                "BL-static",  # - Removes blacklisted domains
                "BL-netloc",  # - Removes blacklisted domains --resolve-blacklist-netloc
                "BL-path",    # - Removes blacklisted paths from a domain --resolve-blacklist-path
                "BL-ew",      # - Removes images and chatrooms
                "ADS",        # - Remove obviously ad urls
            ]

            if REMOVE is False:
                count = 0
                for url_status in ((url_type == "iframe" and
                                    whitelist_netloc_user is not None and
                                    parse_new_url.netloc.endswith(tuple(whitelist_netloc_user)) is False),
                                   (url_type == "iframe" and
                                    whitelist_path_user is not None and
                                    self.compare_url_path(parse_new_url, whitelist_path) is False),
                                   (parse_new_url.netloc.endswith(blacklist_netloc)),
                                   (blacklist_netloc_user is not None and
                                    parse_new_url.netloc.endswith(tuple(blacklist_netloc_user))),
                                   (self.compare_url_path(parse_new_url, blacklist_path) is True),
                                   (parse_new_url.path.endswith(blacklist_endswith)),
                                   (self._ads_path.match(parse_new_url.path))):

                    count += 1
                    if url_status:
                        REMOVE = True
                        break

            if REMOVE is True:
                self.logger.debug("{0} - Removed url: {1}".format(status_remove[count - 1], new_url))
                continue
            # Add url to the list
            new_list += [new_url]
        # Remove duplicates
        new_list = list(set(new_list))
        return new_list

Example #17

0

Show file

def absolute_url(baseurl, url):
    if not url.startswith("http"):
        return urljoin(baseurl, url)
    else:
        return url

Example #18

0

Show file

    def _make_url_list(self, old_list, base_url, stream_base=""):
        """Creates a list of validate urls from a list of broken urls
           and removes every blacklisted url

        Args:
            old_list: List of broken urls
            base_url: url that will get used for scheme and netloc
            stream_base: basically same as base_url, but used for .f4m files.

        Returns:
            List of validate urls
        """
        blacklist_netloc_user = self.get_option("blacklist_netloc")
        blacklist_netloc = (
            "about:blank",
            "adfox.ru",
            "googletagmanager.com",
            "javascript:false",
        )

        blacklist_path = [
            ("facebook.com", "/plugins"),
            ("vesti.ru", "/native_widget.html"),
        ]
        # Add --resolve-blacklist-path to blacklist_path
        blacklist_path_user = self.get_option("blacklist_path")
        if blacklist_path_user is not None:
            for _path_url in blacklist_path_user:
                if not _path_url.startswith(("http", "//")):
                    _path_url = update_scheme("http://", _path_url)
                _parsed_path_url = urlparse(_path_url)
                if _parsed_path_url.netloc and _parsed_path_url.path:
                    blacklist_path += [(_parsed_path_url.netloc,
                                        _parsed_path_url.path)]

        new_list = []
        for url in old_list:
            # Don't add the same url as self.url to the list.
            if url == self.url:
                continue
            # Repair the scheme
            new_url = url.replace("\\", "")
            if new_url.startswith("http&#58;//"):
                new_url = "http:" + new_url[9:]
            elif new_url.startswith("https&#58;//"):
                new_url = "https:" + new_url[10:]
            # Repair the domain
            if stream_base and new_url[1] is not "/":
                if new_url[0] is "/":
                    new_url = new_url[1:]
                new_url = urljoin(stream_base, new_url)
            else:
                new_url = urljoin(base_url, new_url)
            # Parse the url and remove not wanted urls
            parse_new_url = urlparse(new_url)
            REMOVE = False
            # Removes blacklisted domains
            if REMOVE is False and parse_new_url.netloc.endswith(
                    blacklist_netloc):
                REMOVE = True
            # Removes blacklisted domains from --resolve-blacklist-netloc
            if REMOVE is False and blacklist_netloc_user is not None and parse_new_url.netloc.endswith(
                    tuple(blacklist_netloc_user)):
                REMOVE = True
            # Removes blacklisted paths from a domain
            if REMOVE is False:
                for netloc, path in blacklist_path:
                    if parse_new_url.netloc.endswith(
                            netloc) and parse_new_url.path.startswith(path):
                        REMOVE = True
                        continue
            # Removes images and chatrooms
            if REMOVE is False and parse_new_url.path.endswith(
                (".jpg", ".png", ".svg", "/chat")):
                REMOVE = True
            # Remove obviously ad urls
            if REMOVE is False and self._ads_path.match(parse_new_url.path):
                REMOVE = True
            if REMOVE is True:
                self.logger.debug("Removed url: {0}".format(new_url))
                continue
            # Add url to the list
            new_list += [new_url]
        # Remove duplicates
        new_list = list(set(new_list))
        return new_list