Example #1
0
def update_scheme(current, target, force=True):
    # type: (str, str, bool) -> str
    """
    Take the scheme from the current URL and apply it to the target URL if it is missing
    :param current: current URL
    :param target: target URL
    :param force: always apply the current scheme to the target, even if a target scheme exists
    :return: target URL with the current URL's scheme
    """
    target_p = urlparse(target)

    if (
            # target URLs with implicit scheme and netloc including a port: ("http://", "foo.bar:1234") -> "http://foo.bar:1234"
            # urllib.parse.urlparse has incorrect behavior in py<3.9, so we'll have to use a regex here
            # py>=3.9: urlparse("127.0.0.1:1234") == ParseResult(scheme='127.0.0.1', netloc='', path='1234', ...)
            # py<3.9 : urlparse("127.0.0.1:1234") == ParseResult(scheme='', netloc='', path='127.0.0.1:1234', ...)
            not _re_uri_implicit_scheme.search(target)
            and not target.startswith("//")
            # target URLs without scheme and netloc: ("http://", "foo.bar/foo") -> "http://foo.bar/foo"
            or not target_p.scheme and not target_p.netloc):
        return "{0}://{1}".format(
            urlparse(current).scheme, urlunparse(target_p))

    # target URLs without scheme but with netloc: ("http://", "//foo.bar/foo") -> "http://foo.bar/foo"
    if not target_p.scheme and target_p.netloc:
        return "{0}:{1}".format(urlparse(current).scheme, urlunparse(target_p))

    # target URLs with scheme
    # override the target scheme
    if force:
        return urlunparse(target_p._replace(scheme=urlparse(current).scheme))

    # keep the target scheme
    return target
Example #2
0
def url_equal(first,
              second,
              ignore_scheme=False,
              ignore_netloc=False,
              ignore_path=False,
              ignore_params=False,
              ignore_query=False,
              ignore_fragment=False):
    """
    Compare two URLs and return True if they are equal, some parts of the URLs can be ignored
    :param first: URL
    :param second: URL
    :param ignore_scheme: ignore the scheme
    :param ignore_netloc: ignore the netloc
    :param ignore_path: ignore the path
    :param ignore_params: ignore the params
    :param ignore_query: ignore the query string
    :param ignore_fragment: ignore the fragment
    :return: result of comparison
    """
    # <scheme>://<netloc>/<path>;<params>?<query>#<fragment>

    firstp = urlparse(first)
    secondp = urlparse(second)

    return ((firstp.scheme == secondp.scheme or ignore_scheme)
            and (firstp.netloc == secondp.netloc or ignore_netloc)
            and (firstp.path == secondp.path or ignore_path)
            and (firstp.params == secondp.params or ignore_params)
            and (firstp.query == secondp.query or ignore_query)
            and (firstp.fragment == secondp.fragment or ignore_fragment))
Example #3
0
def url_equal(first, second, ignore_scheme=False, ignore_netloc=False, ignore_path=False, ignore_params=False,
              ignore_query=False, ignore_fragment=False):
    """
    Compare two URLs and return True if they are equal, some parts of the URLs can be ignored
    :param first: URL
    :param second: URL
    :param ignore_scheme: ignore the scheme
    :param ignore_netloc: ignore the netloc
    :param ignore_path: ignore the path
    :param ignore_params: ignore the params
    :param ignore_query: ignore the query string
    :param ignore_fragment: ignore the fragment
    :return: result of comparison
    """
    # <scheme>://<netloc>/<path>;<params>?<query>#<fragment>

    firstp = urlparse(first)
    secondp = urlparse(second)

    return ((firstp.scheme == secondp.scheme or ignore_scheme) and
            (firstp.netloc == secondp.netloc or ignore_netloc) and
            (firstp.path == secondp.path or ignore_path) and
            (firstp.params == secondp.params or ignore_params) and
            (firstp.query == secondp.query or ignore_query) and
            (firstp.fragment == secondp.fragment or ignore_fragment))
Example #4
0
    def _find_video_id(self, url):
        m = _url_re.match(url)
        video_id = m.group("video_id") or m.group("video_id_short")
        if video_id:
            log.debug("Video ID from URL")
            return video_id

        res = self.session.http.get(url)
        if urlparse(res.url).netloc == "consent.youtube.com":
            c_data = {}
            for _i in itertags(res.text, "input"):
                if _i.attributes.get("type") == "hidden":
                    c_data[_i.attributes.get("name")] = _i.attributes.get("value")
            log.debug("c_data_keys: {}".format(', '.join(c_data.keys())))
            res = self.session.http.post("https://consent.youtube.com/s", data=c_data)
            consent = self.session.http.cookies.get('CONSENT', domain='.youtube.com')
            if 'YES' in consent:
                self.cache.set("consent_ck", consent)

        datam = _ytdata_re.search(res.text)
        if datam:
            data = parse_json(datam.group(1))
            # find the videoRenderer object, where there is a LVE NOW badge
            for vid_ep in search_dict(data, 'currentVideoEndpoint'):
                video_id = vid_ep.get("watchEndpoint", {}).get("videoId")
                if video_id:
                    log.debug("Video ID from currentVideoEndpoint")
                    return video_id
            for x in search_dict(data, 'videoRenderer'):
                if x.get("viewCountText", {}).get("runs"):
                    if x.get("videoId"):
                        log.debug("Video ID from videoRenderer (live)")
                        return x["videoId"]
                for bstyle in search_dict(x.get("badges", {}), "style"):
                    if bstyle == "BADGE_STYLE_TYPE_LIVE_NOW":
                        if x.get("videoId"):
                            log.debug("Video ID from videoRenderer (live)")
                            return x["videoId"]

        if urlparse(url).path.endswith(("/embed/live_stream", "/live")):
            for link in itertags(res.text, "link"):
                if link.attributes.get("rel") == "canonical":
                    canon_link = link.attributes.get("href")
                    if canon_link != url:
                        if canon_link.endswith("v=live_stream"):
                            log.debug("The video is not available")
                            break
                        else:
                            log.debug("Re-directing to canonical URL: {0}".format(canon_link))
                            return self._find_video_id(canon_link)

        raise PluginError("Could not find a video on this page")
Example #5
0
    def test_compare_url_path(self):
        blacklist_path = [
            ('example.com', '/_livetvpreview/'),
            ('foo.bar', '/plugins'),
        ]

        self.assertTrue(
            self.res_plugin.compare_url_path(
                urlparse('https://www.foo.bar/plugins/123.html'),
                blacklist_path))

        self.assertFalse(
            self.res_plugin.compare_url_path(
                urlparse('https://example.com/123.html'), blacklist_path))
Example #6
0
def update_scheme(current, target):
    """
    Take the scheme from the current URL and applies it to the
    target URL if the target URL startswith // or is missing a scheme
    :param current: current URL
    :param target: target URL
    :return: target URL with the current URLs scheme
    """
    target_p = urlparse(target)
    if not target_p.scheme and target_p.netloc:
        return "{0}:{1}".format(urlparse(current).scheme, urlunparse(target_p))
    elif not target_p.scheme and not target_p.netloc:
        return "{0}://{1}".format(urlparse(current).scheme, urlunparse(target_p))
    else:
        return target
Example #7
0
    def __init__(self, node, root=None, parent=None, url=None, *args, **kwargs):
        # top level has no parent
        super(MPD, self).__init__(node, root=self, *args, **kwargs)
        # parser attributes
        self.url = url
        self.timelines = defaultdict(lambda: -1)
        self.timelines.update(kwargs.pop("timelines", {}))
        self.id = self.attr(u"id")
        self.profiles = self.attr(u"profiles", required=True)
        self.type = self.attr(u"type", default=u"static", parser=MPDParsers.type)
        self.minimumUpdatePeriod = self.attr(u"minimumUpdatePeriod", parser=MPDParsers.duration, default=Duration())
        self.minBufferTime = self.attr(u"minBufferTime", parser=MPDParsers.duration, required=True)
        self.timeShiftBufferDepth = self.attr(u"timeShiftBufferDepth", parser=MPDParsers.duration)
        self.availabilityStartTime = self.attr(u"availabilityStartTime", parser=MPDParsers.datetime,
                                               default=datetime.datetime.fromtimestamp(0, utc),  # earliest date
                                               required=self.type == "dynamic")
        self.publishTime = self.attr(u"publishTime", parser=MPDParsers.datetime, required=self.type == "dynamic")
        self.availabilityEndTime = self.attr(u"availabilityEndTime", parser=MPDParsers.datetime)
        self.mediaPresentationDuration = self.attr(u"mediaPresentationDuration", parser=MPDParsers.duration)
        self.suggestedPresentationDelay = self.attr(u"suggestedPresentationDelay", parser=MPDParsers.duration)

        # parse children
        location = self.children(Location)
        self.location = location[0] if location else None
        if self.location:
            self.url = self.location.text
            urlp = list(urlparse(self.url))
            if urlp[2]:
                urlp[2], _ = urlp[2].rsplit("/", 1)
            self._base_url = urlunparse(urlp)

        self.baseURLs = self.children(BaseURL)
        self.periods = self.children(Period, minimum=1)
        self.programInformation = self.children(ProgramInformation)
Example #8
0
    def __init__(self,
                 session,
                 baseurl,
                 url,
                 bootstrap,
                 metadata=None,
                 timeout=60,
                 **request_params):
        Stream.__init__(self, session)

        self.baseurl = baseurl
        self.url = url
        self.bootstrap = bootstrap
        self.metadata = metadata
        self.timeout = timeout

        # Deep copy request params to make it mutable
        self.request_params = deepcopy(request_params)

        parsed = urlparse(self.url)
        if parsed.query:
            params = parse_qsl(parsed.query)
            if params:
                if not self.request_params.get("params"):
                    self.request_params["params"] = {}

                self.request_params["params"].update(params)

        self.url = urlunparse(
            (parsed.scheme, parsed.netloc, parsed.path, None, None, None))
Example #9
0
    def _get_streams(self):
        self.session.http.headers.update({"Referer": self.url})

        iframe_url = None
        res = self.session.http.get(self.url)
        for iframe in itertags(res.text, "iframe"):
            if "embed.lsm.lv" in iframe.attributes.get("src"):
                iframe_url = iframe.attributes.get("src")
                break

        if not iframe_url:
            log.error("Could not find player iframe")
            return

        log.debug("Found iframe: {0}".format(iframe_url))
        res = self.session.http.get(iframe_url)
        for source in itertags(res.text, "source"):
            if source.attributes.get("src"):
                stream_url = source.attributes.get("src")
                url_path = urlparse(stream_url).path
                if url_path.endswith(".m3u8"):
                    for s in HLSStream.parse_variant_playlist(
                            self.session, stream_url).items():
                        yield s
                else:
                    log.debug("Not used URL path: {0}".format(url_path))
Example #10
0
    def _create_stream(self, stream, is_live):
        stream_name = "{0}p".format(stream["height"])
        stream_type = stream["mediaType"]
        stream_url = stream["url"]

        if stream_type in ("hls", "mp4"):
            if urlparse(stream_url).path.endswith("m3u8"):
                try:
                    streams = HLSStream.parse_variant_playlist(self.session, stream_url)

                    # TODO: Replace with "yield from" when dropping Python 2.
                    for stream in streams.items():
                        yield stream
                except IOError as err:
                    self.logger.error("Failed to extract HLS streams: {0}", err)
            else:
                yield stream_name, HTTPStream(self.session, stream_url)

        elif stream_type == "rtmp":
            params = {
                "rtmp": stream["streamer"],
                "playpath": stream["url"],
                "swfVfy": SWF_URL,
                "pageUrl": self.url,
            }

            if is_live:
                params["live"] = True
            else:
                params["playpath"] = "mp4:{0}".format(params["playpath"])

            stream = RTMPStream(self.session, params)
            yield stream_name, stream
Example #11
0
 def find_iframe(self, res):
     for url in self.iframe_re.findall(res.text):
         if url.startswith("//"):
             p = urlparse(self.url)
             return "{0}:{1}".format(p.scheme, url)
         else:
             return url
Example #12
0
 def _find_iframe(self, res):
     iframe = self.iframe_re.search(res.text)
     url = iframe and iframe.group(1)
     if url and url.startswith("//"):
         p = urlparse(self.url)
         url = "{0}:{1}".format(p.scheme, url)
     return url
Example #13
0
def prepend_www(url):
    """Changes google.com to www.google.com"""
    parsed = urlparse(url)
    if parsed.netloc.split(".")[0] != "www":
        return parsed.scheme + "://www." + parsed.netloc + parsed.path
    else:
        return url
Example #14
0
    def _get_streams(self):
        self.session.http.headers.update({'User-Agent': useragents.FIREFOX})

        iframe_url = None
        page = self.session.http.get(self.url)
        for a in itertags(page.text, 'a'):
            if a.attributes.get('class') == 'play-live':
                iframe_url = update_scheme(self.url, a.attributes['data-url'])
                break

        if not iframe_url:
            raise PluginError('Could not find iframe.')

        parsed = urlparse(iframe_url)
        path_list = parsed.path.split('/')
        if len(path_list) != 6:
            # only support a known iframe url style,
            # the video id might be on a different spot if the url changes
            raise PluginError('unsupported iframe URL: {0}'.format(iframe_url))

        res = self.session.http.get(
            self.API_URL.format(netloc=parsed.netloc, id=path_list[4]))

        data = self.session.http.json(res, schema=self._api_schema)
        log.trace('{0!r}'.format(data))

        url = self.PLAYLIST_URL.format(
            app=data['streamProperties']['application'],
            name=data['playStreamName'],
            netloc=data['cdnHost'],
        )
        return HLSStream.parse_variant_playlist(self.session, url)
Example #15
0
    def _get_streams(self):
        match = _url_re.match(self.url)
        video_id = match.group("video_id")
        res = http.get(ASSET_URL.format(video_id))
        assets = http.xml(res, schema=_asset_schema)

        streams = {}
        for asset in assets:
            base = asset["base"]
            url = asset["url"]

            if urlparse(url).path.endswith(".f4m"):
                streams.update(
                    HDSStream.parse_manifest(self.session, url, pvswf=SWF_URL)
                )
            elif base.startswith("rtmp"):
                name = "{0}k".format(asset["bitrate"])
                params = {
                    "rtmp": asset["base"],
                    "playpath": url,
                    "live": True
                }
                streams[name] = RTMPStream(self.session, params)

        return streams
Example #16
0
    def _get_streams(self):
        self.url = http.resolve_url(self.url)
        match = _url_re.match(self.url)
        parsed = urlparse(self.url)
        if parsed.fragment:
            channel_id = parsed.fragment
        elif parsed.path[:3] == '/v/':
            channel_id = parsed.path.split('/')[-1]
        else:
            channel_id = match.group("channel")

        if not channel_id:
            return

        channel_id = channel_id.lower().replace("/", "_")
        res = http.get(API_URL.format(channel_id))
        info = http.json(res, schema=_schema)

        if not info["success"]:
            return

        if info.get("isLive"):
            name = "live"
        else:
            name = "vod"

        stream = HTTPStream(self.session, info["payload"])
        # Wrap the stream in a FLVPlaylist to verify the FLV tags
        stream = FLVPlaylist(self.session, [stream])

        return {name: stream}
Example #17
0
    def _get_streams(self):
        match = self._url_re.match(self.url)
        channel = match.group("channel")

        self.session.http.headers.update({"User-Agent": useragents.CHROME})
        if channel:
            streams = self._get_live_streams(channel) or []
        else:
            streams = self._get_vod_streams() or []

        for video_url in streams:
            log.debug("Found stream: {0}".format(video_url))
            parsed = urlparse(video_url)
            if parsed.path.endswith(".f4m"):
                for s in HDSStream.parse_manifest(self.session,
                                                  video_url).items():
                    yield s
            elif parsed.path.endswith(".m3u8"):
                for s in HLSStream.parse_variant_playlist(
                        self.session, video_url).items():
                    yield s
            elif parsed.path.endswith(".mp4"):
                match = self._re_mp4_bitrate.match(video_url)
                bitrate = "vod" if match is None else "{0}k".format(
                    match.group('bitrate'))
                yield bitrate, HTTPStream(self.session, video_url)
Example #18
0
    def _get_stream_info(self, url):
        match = _url_re.match(url)
        user = match.group("user")
        live_channel = match.group("liveChannel")

        if user:
            video_id = self._find_channel_video()
        elif live_channel:
            return self._find_canonical_stream_info()
        else:
            video_id = match.group("video_id")
            if video_id == "live_stream":
                query_info = dict(parse_qsl(urlparse(url).query))
                if "channel" in query_info:
                    video_id = self._get_channel_video(query_info["channel"])

        if not video_id:
            return

        params = {
            "video_id": video_id,
            # CUSTOM: Remove all "el" lines but first one and uncomment it to restore
            # Issues when trying to download Youtube videos, looks like 'el' value is key here
            # https://github.com/Tyrrrz/YoutubeExplode/issues/66#issuecomment-348685419
            #"el": "player_embedded"
            "el": "detailpage"
            #"el": "embedded"
        }
        res = http.get(API_VIDEO_INFO, params=params, headers=HLS_HEADERS)
        return parse_query(res.text, name="config", schema=_config_schema)
Example #19
0
 def find_iframe(self, res):
     for url in self.iframe_re.findall(res.text):
         if url.startswith("//"):
             p = urlparse(self.url)
             return "{0}:{1}".format(p.scheme, url)
         else:
             return url
Example #20
0
    def login(self, ptrt_url):
        """
        Create session using BBC ID. See https://www.bbc.co.uk/usingthebbc/account/

        :param ptrt_url: The snapback URL to redirect to after successful authentication
        :type ptrt_url: string
        :return: Whether authentication was successful
        :rtype: bool
        """

        def auth_check(res):
            return ptrt_url in ([h.url for h in res.history] + [res.url])

        # make the session request to get the correct cookies
        session_res = self.session.http.get(
            self.session_url,
            params=dict(ptrt=ptrt_url)
        )

        if auth_check(session_res):
            log.debug("Already authenticated, skipping authentication")
            return True

        res = self.session.http.post(
            self.auth_url,
            params=urlparse(session_res.url).query,
            data=dict(
                jsEnabled=True,
                username=self.get_option("username"),
                password=self.get_option('password'),
                attempts=0
            ),
            headers={"Referer": self.url})

        return auth_check(res)
Example #21
0
    def _get_streams(self):
        res = http.get(self.url)
        match = _info_re.search(res.text)
        if not match:
            return

        info = parse_json(match.group(1), schema=_schema)
        stream_name = info["mode"]
        mp4_url = info.get("mp4_url")
        ios_url = info.get("ios_url")
        swf_url = info.get("swf_url")

        if mp4_url:
            stream = HTTPStream(self.session, mp4_url)
            yield stream_name, stream

        if ios_url:
            if urlparse(ios_url).path.endswith(".m3u8"):
                streams = HLSStream.parse_variant_playlist(
                    self.session, ios_url)
                # TODO: Replace with "yield from" when dropping Python 2.
                for stream in streams.items():
                    yield stream

        if swf_url:
            stream = self._get_rtmp_stream(swf_url)
            if stream:
                yield stream_name, stream
Example #22
0
    def check_url(value):
        validate(str, value)
        parsed = urlparse(value)
        if not parsed.netloc:
            raise ValidationError(
                "{value} is not a valid URL",
                value=repr(value),
                schema="url",
            )

        for name, schema in attributes.items():
            if not hasattr(parsed, name):
                raise ValidationError(
                    "Invalid URL attribute {name}",
                    name=repr(name),
                    schema="url",
                )

            try:
                validate(schema, getattr(parsed, name))
            except ValidationError as err:
                raise ValidationError(
                    "Unable to validate URL attribute {name}",
                    name=repr(name),
                    schema="url",
                    context=err,
                )

        return True
Example #23
0
    def _get_streams(self):
        res = http.get(self.url)
        match = _info_re.search(res.text)
        if not match:
            return

        info = parse_json(match.group(1), schema=_schema)
        stream_name = info["mode"]
        mp4_url = info.get("mp4_url")
        ios_url = info.get("ios_url")
        swf_url = info.get("swf_url")

        if mp4_url:
            stream = HTTPStream(self.session, mp4_url)
            yield stream_name, stream

        if ios_url:
            if urlparse(ios_url).path.endswith(".m3u8"):
                streams = HLSStream.parse_variant_playlist(self.session, ios_url)
                # TODO: Replace with "yield from" when dropping Python 2.
                for stream in streams.items():
                    yield stream

        if swf_url:
            stream = self._get_rtmp_stream(swf_url)
            if stream:
                yield stream_name, stream
Example #24
0
    def _get_streams(self):
        http.headers.update({"User-Agent": useragents.CHROME,
                             "Referer": self.referer})
        fragment = dict(parse_qsl(urlparse(self.url).fragment))
        link = fragment.get("link")
        if not link:
            link = self._get_tv_link()

        if not link:
            self.logger.error("Missing link fragment: stream unavailable")
            return

        player_url = self._api_url.format(link)
        self.logger.debug("Requesting player API: {0} (referer={1})", player_url, self.referer)
        res = http.get(player_url,
                       params={"_": int(time.time() * 1000)},
                       headers={"X-Requested-With": "XMLHttpRequest"})

        try:
            data = http.json(res, schema=self.api_schema)
        except PluginError as e:
            print(e)
            self.logger.error("Cannot play this stream type")
        else:
            if data["status"]:
                if data["file"].startswith("<"):
                    self.logger.error("Cannot play embedded streams")
                else:
                    return HLSStream.parse_variant_playlist(self.session, data["file"])
            else:
                self.logger.error(data["text"])
Example #25
0
    def create_decryptor(self, key, sequence):
        if key.method != "AES-128":
            raise StreamError("Unable to decrypt cipher {0}", key.method)

        if not self.key_uri_override and not key.uri:
            raise StreamError("Missing URI to decryption key")

        if self.key_uri_override:
            p = urlparse(key.uri)
            key_uri = LazyFormatter.format(
                self.key_uri_override,
                url=key.uri,
                scheme=p.scheme,
                netloc=p.netloc,
                path=p.path,
                query=p.query,
            )
        else:
            key_uri = key.uri

        if self.key_uri != key_uri:
            res = self.session.http.get(key_uri, exception=StreamError,
                                        retries=self.retries,
                                        **self.reader.request_params)
            res.encoding = "binary/octet-stream"
            self.key_data = res.content
            self.key_uri = key_uri

        iv = key.iv or num_to_iv(sequence)

        # Pad IV if needed
        iv = b"\x00" * (16 - len(iv)) + iv

        return AES.new(self.key_data, AES.MODE_CBC, iv)
Example #26
0
    def fetch(self, segment, retries=None):
        if self.closed or not retries:
            return

        try:
            headers = {}
            now = datetime.datetime.now(tz=utc)
            if segment.available_at > now:
                time_to_wait = (segment.available_at - now).total_seconds()
                fname = os.path.basename(urlparse(segment.url).path)
                log.debug("Waiting for segment: {fname} ({wait:.01f}s)".format(
                    fname=fname, wait=time_to_wait))
                sleep_until(segment.available_at)

            if segment.range:
                start, length = segment.range
                if length:
                    end = start + length - 1
                else:
                    end = ""
                headers["Range"] = "bytes={0}-{1}".format(start, end)

            return self.session.http.get(segment.url,
                                         timeout=self.timeout,
                                         exception=StreamError,
                                         headers=headers)
        except StreamError as err:
            log.error("Failed to open segment {0}: {1}", segment.url, err)
            return self.fetch(segment, retries - 1)
Example #27
0
    def _get_streams(self):
        self.session.http.headers.update({"User-Agent": useragents.CHROME,
                             "Referer": self.referer})
        fragment = dict(parse_qsl(urlparse(self.url).fragment))
        link = fragment.get("link")
        if not link:
            link = self._get_tv_link()

        if not link:
            self.logger.error("Missing link fragment: stream unavailable")
            return

        player_url = self._api_url.format(link)
        self.logger.debug("Requesting player API: {0} (referer={1})", player_url, self.referer)
        res = self.session.http.get(player_url,
                       params={"_": int(time.time() * 1000)},
                       headers={"X-Requested-With": "XMLHttpRequest"})

        try:
            data = self.session.http.json(res, schema=self.api_schema)
        except PluginError as e:
            print(e)
            self.logger.error("Cannot play this stream type")
        else:
            if data["status"]:
                if data["file"].startswith("<"):
                    self.logger.error("Cannot play embedded streams")
                else:
                    return HLSStream.parse_variant_playlist(self.session, data["file"])
            else:
                self.logger.error(data["text"])
Example #28
0
 def uri(self, uri):
     if uri and urlparse(uri).scheme:
         return uri
     elif self.base_uri and uri:
         return urljoin(self.base_uri, uri)
     else:
         return uri
Example #29
0
    def _get_streams(self):
        streams = self.session.http.get(self.url, schema=self._stream_schema)
        if streams is None:
            return

        if streams['type'] != 'STATION':
            return

        stream_urls = set()
        for stream in streams['streams']:
            log.trace('{0!r}'.format(stream))
            url = stream['url']

            url_no_scheme = urlunparse(urlparse(url)._replace(scheme=''))
            if url_no_scheme in stream_urls:
                continue
            stream_urls.add(url_no_scheme)

            if stream['contentFormat'] in ('audio/mpeg', 'audio/aac'):
                yield 'live', HTTPStream(self.session, url, allow_redirects=True)
            elif stream['contentFormat'] == 'video/MP2T':
                streams = HLSStream.parse_variant_playlist(self.session, stream["url"])
                if not streams:
                    yield stream["quality"], HLSStream(self.session, stream["url"])
                else:
                    for s in streams.items():
                        yield s
Example #30
0
    def __init__(self, url):
        super(Twitch, self).__init__(url)
        match = self._re_url.match(url).groupdict()
        parsed = urlparse(url)
        self.params = parse_query(parsed.query)
        self.subdomain = match.get("subdomain")
        self.video_id = None
        self._channel_id = None
        self._channel = None
        self.clip_name = None
        self.title = None
        self.author = None
        self.category = None

        if self.subdomain == "player":
            # pop-out player
            if self.params.get("video"):
                self.video_id = self.params["video"]
            self._channel = self.params.get("channel")
        elif self.subdomain == "clips":
            # clip share URL
            self.clip_name = match.get("channel")
        else:
            self._channel = match.get("channel") and match.get("channel").lower()
            self.video_id = match.get("video_id") or match.get("videos_id")
            self.clip_name = match.get("clip_name")

        self.api = TwitchAPI(session=self.session)
        self.usher = UsherService(session=self.session)
Example #31
0
    def __init__(self, url):
        Plugin.__init__(self, url)
        self._hosted_chain = []
        match = _url_re.match(url).groupdict()
        parsed = urlparse(url)
        self.params = parse_query(parsed.query)
        self.subdomain = match.get("subdomain")
        self.video_id = None
        self.video_type = None
        self._channel_id = None
        self._channel = None
        self.clip_name = None

        if self.subdomain == "player":
            # pop-out player
            if self.params.get("video"):
                try:
                    self.video_type = self.params["video"][0]
                    self.video_id = self.params["video"][1:]
                except IndexError:
                    self.logger.debug("Invalid video param: {0}", self.params["video"])
            self._channel = self.params.get("channel")
        elif self.subdomain == "clips":
            # clip share URL
            self.clip_name = match.get("channel")
        else:
            self._channel = match.get("channel") and match.get("channel").lower()
            self.video_type = match.get("video_type")
            if match.get("videos_id"):
                self.video_type = "v"
            self.video_id = match.get("video_id") or match.get("videos_id")
            self.clip_name = match.get("clip_name")

        self.api = TwitchAPI(beta=self.subdomain == "beta", version=5)
        self.usher = UsherService()
Example #32
0
    def fetch(self, segment, retries=None):
        if self.closed or not retries:
            return

        try:
            headers = {}
            now = datetime.datetime.now(tz=utc)
            if segment.available_at > now:
                time_to_wait = (segment.available_at - now).total_seconds()
                fname = os.path.basename(urlparse(segment.url).path)
                log.debug("Waiting for segment: {fname} ({wait:.01f}s)".format(fname=fname, wait=time_to_wait))
                sleep_until(segment.available_at)

            if segment.range:
                start, length = segment.range
                if length:
                    end = start + length - 1
                else:
                    end = ""
                headers["Range"] = "bytes={0}-{1}".format(start, end)

            return self.session.http.get(segment.url,
                                         timeout=self.timeout,
                                         exception=StreamError,
                                         headers=headers)
        except StreamError as err:
            log.error("Failed to open segment {0}: {1}", segment.url, err)
            return self.fetch(segment, retries - 1)
Example #33
0
    def _get_streams_delfi(self, src):
        try:
            data = self.session.http.get(src, schema=validate.Schema(
                validate.parse_html(),
                validate.xml_xpath_string(".//script[contains(text(),'embedJs.setAttribute(')][1]/text()"),
                validate.any(None, validate.all(
                    validate.text,
                    validate.transform(re.compile(r"embedJs\.setAttribute\('src',\s*'(.+?)'").search),
                    validate.any(None, validate.all(
                        validate.get(1),
                        validate.transform(lambda url: parse_qsd(urlparse(url).fragment)),
                        {"stream": validate.text},
                        validate.get("stream"),
                        validate.parse_json(),
                        {"versions": [{
                            "hls": validate.text
                        }]},
                        validate.get("versions")
                    ))
                ))
            ))
        except PluginError:
            log.error("Failed to get streams from iframe")
            return

        for stream in data:
            src = update_scheme("https://", stream["hls"], force=False)
            for s in HLSStream.parse_variant_playlist(self.session, src).items():
                yield s
Example #34
0
 def uri(self, uri):
     if uri and urlparse(uri).scheme:
         return uri
     elif self.base_uri and uri:
         return urljoin(self.base_uri, uri)
     else:
         return uri
Example #35
0
    def _create_stream(self, stream, is_live):
        stream_name = "{0}p".format(stream["height"])
        stream_type = stream["mediaType"]
        stream_url = stream["url"]

        if stream_type in ("hls", "mp4"):
            if urlparse(stream_url).path.endswith("m3u8"):
                try:
                    streams = HLSStream.parse_variant_playlist(
                        self.session, stream_url)

                    # TODO: Replace with "yield from" when dropping Python 2.
                    for stream in streams.items():
                        yield stream
                except IOError as err:
                    self.logger.error("Failed to extract HLS streams: {0}",
                                      err)
            else:
                yield stream_name, HTTPStream(self.session, stream_url)

        elif stream_type == "rtmp":
            params = {
                "rtmp": stream["streamer"],
                "playpath": stream["url"],
                "swfVfy": SWF_URL,
                "pageUrl": self.url,
            }

            if is_live:
                params["live"] = True
            else:
                params["playpath"] = "mp4:{0}".format(params["playpath"])

            stream = RTMPStream(self.session, params)
            yield stream_name, stream
Example #36
0
    def __init__(self, url):
        Plugin.__init__(self, url)
        self._hosted_chain = []
        match = _url_re.match(url).groupdict()
        parsed = urlparse(url)
        self.params = parse_query(parsed.query)
        self.subdomain = match.get("subdomain")
        self.video_id = None
        self.video_type = None
        self._channel_id = None
        self._channel = None
        self.clip_name = None

        if self.subdomain == "player":
            # pop-out player
            if self.params.get("video"):
                try:
                    self.video_type = self.params["video"][0]
                    self.video_id = self.params["video"][1:]
                except IndexError:
                    self.logger.debug("Invalid video param: {0}", self.params["video"])
            self._channel = self.params.get("channel")
        elif self.subdomain == "clips":
            # clip share URL
            self.clip_name = match.get("channel")
        else:
            self._channel = match.get("channel") and match.get("channel").lower()
            self.video_type = match.get("video_type")
            if match.get("videos_id"):
                self.video_type = "v"
            self.video_id = match.get("video_id") or match.get("videos_id")
            self.clip_name = match.get("clip_name")

        self.api = TwitchAPI(beta=self.subdomain == "beta", version=5)
        self.usher = UsherService()
Example #37
0
    def _get_streams(self):
        self.url = http.resolve_url(self.url)
        match = _url_re.match(self.url)
        parsed = urlparse(self.url)
        if parsed.fragment:
            channel_id = parsed.fragment
        elif parsed.path[:3] == '/v/':
            channel_id = parsed.path.split('/')[-1]
        else:
            channel_id = match.group("channel")

        if not channel_id:
            return

        channel_id = channel_id.lower().replace("/", "_")
        res = http.get(API_URL.format(channel_id))
        info = http.json(res, schema=_schema)

        if not info["success"]:
            return

        if info.get("isLive"):
            name = "live"
        else:
            name = "vod"

        stream = HTTPStream(self.session, info["payload"])
        # Wrap the stream in a FLVPlaylist to verify the FLV tags
        stream = FLVPlaylist(self.session, [stream])

        return {name: stream}
Example #38
0
    def _get_stream_info(self, url):
        match = _url_re.match(url)
        user = match.group("user")
        live_channel = match.group("liveChannel")

        if user:
            video_id = self._find_channel_video()
        elif live_channel:
            return self._find_canonical_stream_info()
        else:
            video_id = match.group("video_id")
            if video_id == "live_stream":
                query_info = dict(parse_qsl(urlparse(url).query))
                if "channel" in query_info:
                    video_id = self._get_channel_video(query_info["channel"])

        if not video_id:
            return

        params = {
            "video_id": video_id,
            "el": "player_embedded"
        }
        res = http.get(API_VIDEO_INFO, params=params, headers=HLS_HEADERS)
        return parse_query(res.text, name="config", schema=_config_schema)
Example #39
0
def prepend_www(url):
    """Changes google.com to www.google.com"""
    parsed = urlparse(url)
    if parsed.netloc.split(".")[0] != "www":
        return parsed.scheme + "://www." + parsed.netloc + parsed.path
    else:
        return url
Example #40
0
    def _get_vod(self, root):
        schema_vod = validate.Schema(
            validate.xml_xpath_string(".//script[@type='application/ld+json'][contains(text(),'VideoObject')][1]/text()"),
            validate.text,
            validate.transform(lambda jsonlike: re.sub(r"[\r\n]+", "", jsonlike)),
            validate.parse_json(),
            validate.any(
                validate.all(
                    {"@graph": [dict]},
                    validate.get("@graph"),
                    validate.filter(lambda obj: obj["@type"] == "VideoObject"),
                    validate.get(0)
                ),
                dict
            ),
            {"contentUrl": validate.url()},
            validate.get("contentUrl"),
            validate.transform(lambda content_url: update_scheme("https://", content_url))
        )
        try:
            vod = schema_vod.validate(root)
        except PluginError:
            return

        if urlparse(vod).path.endswith(".m3u8"):
            return HLSStream.parse_variant_playlist(self.session, vod)

        return {"vod": HTTPStream(self.session, vod)}
Example #41
0
    def _get_streams(self):
        self.session.http.headers.update({
           "Referer": self.url,
           "User-Agent": useragents.FIREFOX
        })

        iframe_url = None
        res = self.session.http.get(self.url)
        for iframe in itertags(res.text, "iframe"):
            if "embed.lsm.lv" in iframe.attributes.get("src"):
                iframe_url = iframe.attributes.get("src")
                break

        if not iframe_url:
            log.error("Could not find player iframe")
            return

        log.debug("Found iframe: {0}".format(iframe_url))
        res = self.session.http.get(iframe_url)
        for source in itertags(res.text, "source"):
            if source.attributes.get("src"):
                stream_url = source.attributes.get("src")
                url_path = urlparse(stream_url).path
                if url_path.endswith(".m3u8"):
                    for s in HLSStream.parse_variant_playlist(self.session,
                                                              stream_url).items():
                        yield s
                else:
                    log.debug("Not used URL path: {0}".format(url_path))
Example #42
0
    def _get_streams(self):
        api_url = self.session.http.get(self.url,
                                        schema=self._data_content_schema)
        if api_url and (api_url.startswith("/") or api_url.startswith("http")):
            api_url = urljoin(self.url, api_url)
            stream_url = self.session.http.get(api_url,
                                               schema=self._api_schema,
                                               headers={"Referer": self.url})
        elif api_url and api_url.startswith("[{"):
            stream_url = self._api_schema.validate(api_url)
        else:
            if api_url is not None:
                log.error(
                    "_data_content_schema returns invalid data: {0}".format(
                        api_url))
            return

        parsed = urlparse(stream_url)
        api_url = urljoin(
            self.url,
            self._token_api_path.format(url=stream_url,
                                        netloc="{0}://{1}".format(
                                            parsed.scheme, parsed.netloc),
                                        time=int(time())))
        stream_url = self.session.http.get(api_url,
                                           schema=self._stream_schema,
                                           headers={"Referer": self.url})
        return HLSStream.parse_variant_playlist(self.session, stream_url)
Example #43
0
    def _get_streams(self):
        if not self.login(self.get_option("email"),
                          self.get_option("password")):
            raise PluginError("Login failed")

        try:
            start_point = int(
                float(
                    dict(parse_qsl(urlparse(self.url).query)).get(
                        "startPoint", 0.0)))
            if start_point > 0:
                log.info("Stream will start at {0}".format(
                    seconds_to_hhmmss(start_point)))
        except ValueError:
            start_point = 0

        content_id = self._get_video_id()

        if content_id:
            log.debug("Found content ID: {0}".format(content_id))
            info = self._get_media_info(content_id)
            if info.get("hlsUrl"):
                for s in HLSStream.parse_variant_playlist(
                        self.session, info["hlsUrl"],
                        start_offset=start_point).items():
                    yield s
            else:
                log.error("Could not find the HLS URL")
Example #44
0
def prepend_www(url):
    parsed = urlparse(url)
    if not parsed.netloc.startswith("www."):
        # noinspection PyProtectedMember
        parsed = parsed._replace(netloc="www.{0}".format(parsed.netloc))

    return parsed.geturl()
Example #45
0
def update_qsd(url, qsd=None, remove=None):
    """
    Update or remove keys from a query string in a URL

    :param url: URL to update
    :param qsd: dict of keys to update, a None value leaves it unchanged
    :param remove: list of keys to remove, or "*" to remove all
                   note: updated keys are never removed, even if unchanged
    :return: updated URL
    """
    qsd = qsd or {}
    remove = remove or []

    # parse current query string
    parsed = urlparse(url)
    current_qsd = OrderedDict(parse_qsl(parsed.query))

    # * removes all possible keys
    if remove == "*":
        remove = list(current_qsd.keys())

    # remove keys before updating, but leave updated keys untouched
    for key in remove:
        if key not in qsd:
            del current_qsd[key]

    # and update the query string
    for key, value in qsd.items():
        if value:
            current_qsd[key] = value

    return parsed._replace(query=urlencode(current_qsd)).geturl()
Example #46
0
    def _parse_vod_streams(self, vod):
        for name, stream in vod["streams"].items():
            scheme = urlparse(stream["url"]).scheme

            if scheme == "http":
                yield name, HLSStream(self.session, stream["url"])
            elif scheme == "rtmp":
                yield name, self._create_rtmp_stream(stream, live=False)
Example #47
0
 def find_iframe(self, res):
     p = urlparse(self.url)
     for url in self.iframe_re.findall(res.text):
         if "googletagmanager" not in url:
             if url.startswith("//"):
                 return "{0}:{1}".format(p.scheme, url)
             else:
                 return url
Example #48
0
def update_scheme(current, target):
    """
    Take the scheme from the current URL and applies it to the
    target URL if the target URL startswith // or is missing a scheme
    :param current: current URL
    :param target: target URL
    :return: target URL with the current URLs scheme
    """
    target_p = urlparse(target)
    if not target_p.scheme and target_p.netloc:
        return "{0}:{1}".format(urlparse(current).scheme,
                                urlunparse(target_p))
    elif not target_p.scheme and not target_p.netloc:
        return "{0}://{1}".format(urlparse(current).scheme,
                                  urlunparse(target_p))
    else:
        return target
Example #49
0
    def _get_streams(self):
        res = http.get(self.url, schema=_live_schema)
        if not res:
            return

        if res["type"] == "hls" and urlparse(res["url"]).path.endswith("m3u8"):
            stream = HLSStream(self.session, res["url"])
            return dict(hls=stream)
Example #50
0
    def from_url(cls, session, url):
        purl = urlparse(url)
        querys = dict(parse_qsl(purl.query))

        account_id, player_id, _ = purl.path.lstrip("/").split("/", 3)
        video_id = querys.get("videoId")

        bp = cls(session, account_id=account_id, player_id=player_id)
        return bp.get_streams(video_id)
Example #51
0
    def _get_streams(self):
        res = http.get(self.url, schema=_schema)
        if not res:
            return

        if res["type"] == "channel" and urlparse(res["url"]).path.endswith("m3u8"):
            return HLSStream.parse_variant_playlist(self.session, res["url"])
        elif res["type"] == "video":
            stream = HTTPStream(self.session, res["url"])
            return dict(video=stream)
Example #52
0
    def _get_video_streams(self, player):
        base_url = player["clip"]["baseUrl"] or VOD_BASE_URL
        mapper = StreamMapper(
            cmp=lambda ext, bitrate: urlparse(bitrate["url"]).path.endswith(ext)
        )
        mapper.map(".m3u8", self._create_video_stream, HLSStream, base_url)
        mapper.map(".mp4", self._create_video_stream, HTTPStream, base_url)
        mapper.map(".flv", self._create_video_stream, HTTPStream, base_url)

        return mapper(player["clip"]["bitrates"])
Example #53
0
    def auth_url(self, url):
        parsed = urlparse(url)
        path, _ = parsed.path.rsplit("/", 1)
        token_res = self.session.http.get(self.token_url, params=dict(acl=path + "/*"))
        authparams = self.session.http.json(token_res, schema=self.token_schema)

        existing = dict(parse_qsl(parsed.query))
        existing.update(dict(parse_qsl(authparams)))

        return urlunparse(parsed._replace(query=urlencode(existing)))
Example #54
0
    def get_stream_url(self, event_id):
        url_m = self.url_re.match(self.url)
        site = url_m.group(1) or url_m.group(2)
        api_url = self.api_url.format(id=event_id, site=site.upper())
        self.logger.debug("Calling API: {0}", api_url)

        stream_url = http.get(api_url).text.strip("\"'")

        parsed = urlparse(stream_url)
        query = dict(parse_qsl(parsed.query))
        return urlunparse(parsed._replace(query="")), query
Example #55
0
    def _britecove_params(self, url):
        res = http.get(url, headers={"User-Agent": useragents.FIREFOX,
                                     "Referer": self.url})
        acc = self.account_id_re.search(res.text)
        pk = self.policy_key_re.search(res.text)

        query = dict(parse_qsl(urlparse(url).query))
        return {"video_id": query.get("videoId"),
                "account_id": acc and acc.group(1),
                "policy_key": pk and pk.group(1),
                }
Example #56
0
    def _create_rtmp_stream(self, cdn, stream_name):
        parsed = urlparse(cdn)
        params = {
            "rtmp": cdn,
            "app": parsed.path[1:],
            "playpath": stream_name,
            "pageUrl": self.url,
            "swfUrl": SWF_URL,
            "live": True
        }

        return RTMPStream(self.session, params)
Example #57
0
 def join(url, other):
     # if the other URL is an absolute url, then return that
     if urlparse(other).scheme:
         return other
     elif url:
         parts = list(urlsplit(url))
         if not parts[2].endswith("/"):
             parts[2] += "/"
         url = urlunsplit(parts)
         return urljoin(url, other)
     else:
         return other
Example #58
0
    def _extract_nonce(cls, http_result):
        """
        Given an HTTP response from the sessino endpoint, extract the nonce, so we can "sign" requests with it.
        We don't really sign the requests in the traditional sense of a nonce, we just incude them in the auth requests.

        :param http_result: HTTP response from the bbc session endpoint.
        :type http_result: requests.Response
        :return: nonce to "sign" url requests with
        :rtype: string
        """

        # Extract the redirect URL from the last call
        last_redirect_url = urlparse(http_result.history[-1].request.url)
        last_redirect_query = dict(parse_qsl(last_redirect_url.query))
        # Extract the nonce from the query string in the redirect URL
        final_url = urlparse(last_redirect_query['goto'])
        goto_url = dict(parse_qsl(final_url.query))
        goto_url_query = parse_json(goto_url['state'])

        # Return the nonce we can use for future queries
        return goto_url_query['nonce']
Example #59
0
    def _get_streams(self):
        params = dict(parse_qsl(urlparse(self.url).query))
        vod_id = params.get("vod")
        match = _url_re.match(self.url)
        channel = match.group("channel")

        if vod_id:
            self.logger.debug("Looking for VOD {0} from channel: {1}", vod_id, channel)
            return self._get_vod_stream(vod_id)
        else:
            self.logger.debug("Looking for channel: {0}", channel)
            return self._get_live_stream(channel)