Beispiel #1
0
    def auth_url(self, url):
        parsed = urlparse(url)
        path, _ = parsed.path.rsplit("/", 1)
        token_res = http.get(self.token_url, params=dict(acl=path + "/*"))
        authparams = http.json(token_res, schema=self.token_schema)

        existing = dict(parse_qsl(parsed.query))
        existing.update(dict(parse_qsl(authparams)))

        return urlunparse(parsed._replace(query=urlencode(existing)))
Beispiel #2
0
    def auth_url(self, url):
        parsed = urlparse(url)
        path, _ = parsed.path.rsplit("/", 1)
        token_res = self.session.http.get(self.token_url, params=dict(acl=path + "/*"))
        authparams = self.session.http.json(token_res, schema=self.token_schema)

        existing = dict(parse_qsl(parsed.query))
        existing.update(dict(parse_qsl(authparams)))

        return urlunparse(parsed._replace(query=urlencode(existing)))
Beispiel #3
0
    def _get_stream_info(self, url):
        match = _url_re.match(url)
        user = match.group("user")
        live_channel = match.group("liveChannel")

        if user:
            video_id = self._find_channel_video()
        elif live_channel:
            return self._find_canonical_stream_info()
        else:
            video_id = match.group("video_id")
            if video_id == "live_stream":
                query_info = dict(parse_qsl(urlparse(url).query))
                if "channel" in query_info:
                    video_id = self._get_channel_video(query_info["channel"])

        if not video_id:
            return

        for el in ("detailpage", "embedded"):
            params = {
                "video_id": video_id,
                "el": el
            }
            res = http.get(API_VIDEO_INFO, params=params, headers=HLS_HEADERS)
            info_parsed = parse_query(res.text, name="config", schema=_config_schema)
            if info_parsed.get("status") == "fail":
                self.logger.debug("get_video_info - {0}: {1}".format(
                    el,
                    info_parsed.get("reason"))
                )
                continue
            break

        return info_parsed
Beispiel #4
0
    def _get_stream_info(self, url):
        match = _url_re.match(url)
        user = match.group("user")
        live_channel = match.group("liveChannel")

        if user:
            video_id = self._find_channel_video()
        elif live_channel:
            return self._find_canonical_stream_info()
        else:
            video_id = match.group("video_id")
            if video_id == "live_stream":
                query_info = dict(parse_qsl(urlparse(url).query))
                if "channel" in query_info:
                    video_id = self._get_channel_video(query_info["channel"])

        if not video_id:
            return

        params = {
            "video_id": video_id,
            # CUSTOM: Remove all "el" lines but first one and uncomment it to restore
            # Issues when trying to download Youtube videos, looks like 'el' value is key here
            # https://github.com/Tyrrrz/YoutubeExplode/issues/66#issuecomment-348685419
            #"el": "player_embedded"
            "el": "detailpage"
            #"el": "embedded"
        }
        res = http.get(API_VIDEO_INFO, params=params, headers=HLS_HEADERS)
        return parse_query(res.text, name="config", schema=_config_schema)
Beispiel #5
0
    def _get_streams(self):
        self.session.http.headers.update({"User-Agent": useragents.CHROME,
                             "Referer": self.referer})
        fragment = dict(parse_qsl(urlparse(self.url).fragment))
        link = fragment.get("link")
        if not link:
            link = self._get_tv_link()

        if not link:
            self.logger.error("Missing link fragment: stream unavailable")
            return

        player_url = self._api_url.format(link)
        self.logger.debug("Requesting player API: {0} (referer={1})", player_url, self.referer)
        res = self.session.http.get(player_url,
                       params={"_": int(time.time() * 1000)},
                       headers={"X-Requested-With": "XMLHttpRequest"})

        try:
            data = self.session.http.json(res, schema=self.api_schema)
        except PluginError as e:
            print(e)
            self.logger.error("Cannot play this stream type")
        else:
            if data["status"]:
                if data["file"].startswith("<"):
                    self.logger.error("Cannot play embedded streams")
                else:
                    return HLSStream.parse_variant_playlist(self.session, data["file"])
            else:
                self.logger.error(data["text"])
Beispiel #6
0
    def __init__(self,
                 session,
                 baseurl,
                 url,
                 bootstrap,
                 metadata=None,
                 timeout=60,
                 **request_params):
        Stream.__init__(self, session)

        self.baseurl = baseurl
        self.url = url
        self.bootstrap = bootstrap
        self.metadata = metadata
        self.timeout = timeout

        # Deep copy request params to make it mutable
        self.request_params = deepcopy(request_params)

        parsed = urlparse(self.url)
        if parsed.query:
            params = parse_qsl(parsed.query)
            if params:
                if not self.request_params.get("params"):
                    self.request_params["params"] = {}

                self.request_params["params"].update(params)

        self.url = urlunparse(
            (parsed.scheme, parsed.netloc, parsed.path, None, None, None))
Beispiel #7
0
    def _get_streams(self):
        if not self.login(self.get_option("email"),
                          self.get_option("password")):
            raise PluginError("Login failed")

        try:
            start_point = int(
                float(
                    dict(parse_qsl(urlparse(self.url).query)).get(
                        "startPoint", 0.0)))
            if start_point > 0:
                log.info("Stream will start at {0}".format(
                    seconds_to_hhmmss(start_point)))
        except ValueError:
            start_point = 0

        content_id = self._get_video_id()

        if content_id:
            log.debug("Found content ID: {0}".format(content_id))
            info = self._get_media_info(content_id)
            if info.get("hlsUrl"):
                for s in HLSStream.parse_variant_playlist(
                        self.session, info["hlsUrl"],
                        start_offset=start_point).items():
                    yield s
            else:
                log.error("Could not find the HLS URL")
Beispiel #8
0
    def follow_vk_redirect(self):
        if self._has_video_id():
            return

        try:
            parsed_url = urlparse(self.url)
            true_path = next(
                unquote(v).split("/")[0]
                for k, v in parse_qsl(parsed_url.query)
                if k == "z" and len(v) > 0)
            self.url = "{0}://{1}/{2}".format(parsed_url.scheme,
                                              parsed_url.netloc, true_path)
            if self._has_video_id():
                return
        except StopIteration:
            pass

        try:
            self.url = self.session.http.get(
                self.url,
                schema=validate.Schema(
                    validate.parse_html(),
                    validate.xml_xpath_string(
                        ".//head/meta[@property='og:url'][@content]/@content"),
                    validate.text))
        except PluginError:
            pass
        if self._has_video_id():
            return

        raise NoStreamsError(self.url)
Beispiel #9
0
    def _get_streams(self):
        http.headers.update({"User-Agent": useragents.CHROME,
                             "Referer": self.referer})
        fragment = dict(parse_qsl(urlparse(self.url).fragment))
        link = fragment.get("link")
        if not link:
            link = self._get_tv_link()

        if not link:
            self.logger.error("Missing link fragment: stream unavailable")
            return

        player_url = self._api_url.format(link)
        self.logger.debug("Requesting player API: {0} (referer={1})", player_url, self.referer)
        res = http.get(player_url,
                       params={"_": int(time.time() * 1000)},
                       headers={"X-Requested-With": "XMLHttpRequest"})

        try:
            data = http.json(res, schema=self.api_schema)
        except PluginError as e:
            print(e)
            self.logger.error("Cannot play this stream type")
        else:
            if data["status"]:
                if data["file"].startswith("<"):
                    self.logger.error("Cannot play embedded streams")
                else:
                    return HLSStream.parse_variant_playlist(self.session, data["file"])
            else:
                self.logger.error(data["text"])
Beispiel #10
0
def update_qsd(url, qsd=None, remove=None):
    """
    Update or remove keys from a query string in a URL

    :param url: URL to update
    :param qsd: dict of keys to update, a None value leaves it unchanged
    :param remove: list of keys to remove, or "*" to remove all
                   note: updated keys are never removed, even if unchanged
    :return: updated URL
    """
    qsd = qsd or {}
    remove = remove or []

    # parse current query string
    parsed = urlparse(url)
    current_qsd = OrderedDict(parse_qsl(parsed.query))

    # * removes all possible keys
    if remove == "*":
        remove = list(current_qsd.keys())

    # remove keys before updating, but leave updated keys untouched
    for key in remove:
        if key not in qsd:
            del current_qsd[key]

    # and update the query string
    for key, value in qsd.items():
        if value:
            current_qsd[key] = value

    return parsed._replace(query=urlencode(current_qsd)).geturl()
Beispiel #11
0
    def _get_stream_info(self, url):
        match = _url_re.match(url)
        user = match.group("user")
        live_channel = match.group("liveChannel")

        if user:
            video_id = self._find_channel_video()
        elif live_channel:
            return self._find_canonical_stream_info()
        else:
            video_id = match.group("video_id")
            if video_id == "live_stream":
                query_info = dict(parse_qsl(urlparse(url).query))
                if "channel" in query_info:
                    video_id = self._get_channel_video(query_info["channel"])

        if not video_id:
            return

        params = {
            "video_id": video_id,
            "el": "player_embedded"
        }
        res = http.get(API_VIDEO_INFO, params=params, headers=HLS_HEADERS)
        return parse_query(res.text, name="config", schema=_config_schema)
Beispiel #12
0
    def _get_streams(self):
        event_id = dict(parse_qsl(urlparse(self.url).query.lower())).get("eventid")
        if event_id is None:
            return

        stream_url, params = self.get_stream_url(event_id)
        return HLSStream.parse_variant_playlist(self.session,
                                                stream_url,
                                                params=params)
Beispiel #13
0
    def from_url(cls, session, url):
        purl = urlparse(url)
        querys = dict(parse_qsl(purl.query))

        account_id, player_id, _ = purl.path.lstrip("/").split("/", 3)
        video_id = querys.get("videoId")

        bp = cls(session, account_id=account_id, player_id=player_id)
        return bp.get_streams(video_id)
Beispiel #14
0
    def _get_streams(self):

        headers = {'User-Agent': CHROME}

        if 'web-tv-live' in self.url:
            live = True
        else:
            live = False
            self.url = self.url.replace('episodeinner',
                                        'episodes').replace('showID', 'show')

        res = self.session.http.get(self.url, headers=headers)

        if live:

            tags = list(itertags(res.text, 'script'))

            tag = [i for i in tags if 'm3u8' in i.text][0].text

            m3u8 = re.search(r'''["'](http.+?\.m3u8)['"]''', tag)

            if m3u8:
                m3u8 = m3u8.group(1)
            else:
                raise NoStreamsError('Ant1 CY Broadcast is currently disabled')

        else:

            eid = dict(parse_qsl(urlparse(self.url).query))['episodeID']

            tags = [
                i for i in list(itertags(res.text, 'a'))
                if eid in i.attributes.get('data-innerurl', '')
            ]

            tag = tags[0].attributes.get('data-video')

            m3u8 = re.search(r"&quot;(http.+?master\.m3u8)&quot;",
                             tag).group(1)

        stream = self.session.http.get(self._api_url.format(m3u8),
                                       headers=headers).text

        headers.update({"Referer": self.url})

        try:
            parse_hls = bool(strtobool(self.get_option('parse_hls')))
        except AttributeError:
            parse_hls = True

        if parse_hls:
            return HLSStream.parse_variant_playlist(self.session,
                                                    stream,
                                                    headers=headers)
        else:
            return dict(
                stream=HTTPStream(self.session, stream, headers=headers))
Beispiel #15
0
def update_qsd(url,
               qsd=None,
               remove=None,
               keep_blank_values=True,
               safe="",
               quote_via=quote_plus):
    """
    Update or remove keys from a query string in a URL

    :param url: URL to update
    :param qsd: dict of keys to update, a None value leaves it unchanged
    :param remove: list of keys to remove, or "*" to remove all
                   note: updated keys are never removed, even if unchanged
    :param keep_blank_values: whether params with blank values should be kept or not
    :param safe: string of reserved encoding characters, passed to the quote_via function
    :param quote_via: function which encodes query string keys and values. Default: urllib.parse.quote_plus
    :return: updated URL
    """
    qsd = qsd or {}
    remove = remove or []

    # parse current query string
    parsed = urlparse(url)
    current_qsd = OrderedDict(parse_qsl(parsed.query, keep_blank_values=True))

    # * removes all possible keys
    if remove == "*":
        remove = list(current_qsd.keys())

    # remove keys before updating, but leave updated keys untouched
    for key in remove:
        if key not in qsd:
            del current_qsd[key]

    # and update the query string
    for key, value in qsd.items():
        if value is not None:
            current_qsd[key] = value

    for key, value in list(current_qsd.items()
                           ):  # use list() to create a view of the current_qsd
        if not value and not keep_blank_values and key not in qsd:
            del current_qsd[key]

    if is_py3:
        query = urlencode(query=current_qsd, safe=safe, quote_via=quote_via)
    else:

        def dict2query(d):
            query = []
            for key in d.keys():
                query.append("{0}={1}".format(key, d[key]))
            return "&".join(query)

        query = quote_via(dict2query(current_qsd), safe="=&" + safe)

    return parsed._replace(query=query).geturl()
Beispiel #16
0
def arglist_from_query(path):
    old_data = parse_qsl(urlparse(path).query)
    arglist = []
    for k, v in old_data:
        if k == 'q':
            # backwards compatibility --q
            k = 'default-stream'
        arglist += ['--{0}'.format(unquote(k)), unquote(v)]
    return arglist
Beispiel #17
0
    def from_url(cls, session, url):
        purl = urlparse(url)
        querys = dict(parse_qsl(purl.query))

        account_id, player_id, _ = purl.path.lstrip("/").split("/", 3)
        video_id = querys.get("videoId")

        bp = cls(session, account_id=account_id, player_id=player_id)
        return bp.get_streams(video_id)
Beispiel #18
0
    def get_stream_url(self, event_id):
        site = self.match.group(1) or self.match.group(2)
        api_url = self.api_url.format(id=event_id, site=site.upper())
        log.debug("Calling API: {0}".format(api_url))

        stream_url = self.session.http.get(api_url).text.strip("\"'")

        parsed = urlparse(stream_url)
        query = dict(parse_qsl(parsed.query))
        return urlunparse(parsed._replace(query="")), query
    def _get_streams(self):
        self.session.http.headers.update({"User-Agent": useragents.FIREFOX})
        info_url = None

        channel = self.url_re.match(self.url).group('channel')
        if channel:
            log.debug('Channel: {0}'.format(channel))
            API_URL = 'https://live.russia.tv/api/now/channel/{0}'
            res = self.session.http.get(API_URL.format(channel))
            data = self.session.http.json(res)
            if data:
                data['domain'] = '//player.vgtrk.com'
                data['id'] = data['live_id']
                player_url = urlparse(data['player_url'])
                args = dict(parse_qsl(player_url.query))
                if args:
                    data['sid'] = args['sid']
                else:
                    data['sid'] = player_url.path.split('/')[-1]
                info_url = self.DATA_LIVE_URL.format(**data)
        else:
            iframe_url = self._get_iframe_url(self.url)
            if iframe_url:
                log.debug('Found iframe URL: {0}'.format(iframe_url))
                info_url = self._get_stream_info_url(iframe_url)

        if info_url:
            log.debug('Getting info from URL: {0}'.format(info_url))
            res = self.session.http.get(info_url,
                                        headers={'Referer': self.url})
            data = self.session.http.json(res)
            if data['status'] == 200:
                for media in data['data']['playlist']['medialist']:
                    if media['errors']:
                        log.error(media['errors'].replace('\n', '').replace(
                            '\r', ''))

                    for media_type in media.get('sources', []):
                        if media_type == 'm3u8':
                            hls_url = media['sources'][media_type]['auto']
                            log.debug('hls_url={0}'.format(hls_url))
                            for s in HLSStream.parse_variant_playlist(
                                    self.session, hls_url).items():
                                yield s
                        elif media_type == 'http':
                            for pix, http_url in media['sources'][
                                    media_type].items():
                                log.debug('http_url={0}'.format(http_url))
                                yield '{0}p'.format(pix), HTTPStream(
                                    self.session, http_url)
            else:
                log.error('An error occurred: {0}'.format(
                    data['errors'].replace('\n', '').replace('\r', '')))
        else:
            log.error('Unable to get stream info URL')
    def get_stream_url(self, event_id):
        url_m = self.url_re.match(self.url)
        site = url_m.group(1) or url_m.group(2)
        api_url = self.api_url.format(id=event_id, site=site.upper())
        self.logger.debug("Calling API: {0}", api_url)

        stream_url = http.get(api_url).text.strip("\"'")

        parsed = urlparse(stream_url)
        query = dict(parse_qsl(parsed.query))
        return urlunparse(parsed._replace(query="")), query
Beispiel #21
0
    def _britecove_params(self, url):
        res = http.get(url, headers={"User-Agent": useragents.FIREFOX,
                                     "Referer": self.url})
        acc = self.account_id_re.search(res.text)
        pk = self.policy_key_re.search(res.text)

        query = dict(parse_qsl(urlparse(url).query))
        return {"video_id": query.get("videoId"),
                "account_id": acc and acc.group(1),
                "policy_key": pk and pk.group(1),
                }
Beispiel #22
0
    def get_stream_url(self, event_id):
        url_m = self.url_re.match(self.url)
        site = url_m.group(1) or url_m.group(2)
        api_url = self.api_url.format(id=event_id, site=site.upper())
        self.logger.debug("Calling API: {0}", api_url)

        stream_url = http.get(api_url).text.strip("\"'")

        parsed = urlparse(stream_url)
        query = dict(parse_qsl(parsed.query))
        return urlunparse(parsed._replace(query="")), query
Beispiel #23
0
    def _extract_nonce(cls, http_result):
        """
        Given an HTTP response from the sessino endpoint, extract the nonce, so we can "sign" requests with it.
        We don't really sign the requests in the traditional sense of a nonce, we just incude them in the auth requests.

        :param http_result: HTTP response from the bbc session endpoint.
        :type http_result: requests.Response
        :return: nonce to "sign" url requests with
        :rtype: string
        """

        # Extract the redirect URL from the last call
        last_redirect_url = urlparse(http_result.history[-1].request.url)
        last_redirect_query = dict(parse_qsl(last_redirect_url.query))
        # Extract the nonce from the query string in the redirect URL
        final_url = urlparse(last_redirect_query['goto'])
        goto_url = dict(parse_qsl(final_url.query))
        goto_url_query = parse_json(goto_url['state'])

        # Return the nonce we can use for future queries
        return goto_url_query['nonce']
Beispiel #24
0
    def _extract_nonce(cls, http_result):
        """
        Given an HTTP response from the sessino endpoint, extract the nonce, so we can "sign" requests with it.
        We don't really sign the requests in the traditional sense of a nonce, we just incude them in the auth requests.

        :param http_result: HTTP response from the bbc session endpoint.
        :type http_result: requests.Response
        :return: nonce to "sign" url requests with
        :rtype: string
        """

        # Extract the redirect URL from the last call
        last_redirect_url = urlparse(http_result.history[-1].request.url)
        last_redirect_query = dict(parse_qsl(last_redirect_url.query))
        # Extract the nonce from the query string in the redirect URL
        final_url = urlparse(last_redirect_query['goto'])
        goto_url = dict(parse_qsl(final_url.query))
        goto_url_query = parse_json(goto_url['state'])

        # Return the nonce we can use for future queries
        return goto_url_query['nonce']
Beispiel #25
0
    def _get_streams(self):
        params = dict(parse_qsl(urlparse(self.url).query))
        vod_id = params.get("vod")
        match = _url_re.match(self.url)
        channel = match.group("channel")

        if vod_id:
            self.logger.debug("Looking for VOD {0} from channel: {1}", vod_id, channel)
            return self._get_vod_stream(vod_id)
        else:
            self.logger.debug("Looking for channel: {0}", channel)
            return self._get_live_stream(channel)
Beispiel #26
0
def parse_qsd(data, name="query string", exception=PluginError, schema=None, **params):
    """Parses a query string into a dict.

    Unlike parse_qs and parse_qsl, duplicate keys are not preserved in
    favor of a simpler return value.
    """

    value = dict(parse_qsl(data, **params))
    if schema:
        value = schema.validate(value, name=name, exception=exception)

    return value
Beispiel #27
0
    def _get_streams(self):
        params = dict(parse_qsl(urlparse(self.url).query))
        vod_id = params.get("vod")
        match = _url_re.match(self.url)
        channel = match.group("channel")

        if vod_id:
            self.logger.debug("Looking for VOD {0} from channel: {1}", vod_id, channel)
            return self._get_vod_stream(vod_id)
        else:
            self.logger.debug("Looking for channel: {0}", channel)
            return self._get_live_stream(channel)
Beispiel #28
0
def parse_qsd(data, name="query string", exception=PluginError, schema=None, **params):
    """Parses a query string into a dict.

    Unlike parse_qs and parse_qsl, duplicate keys are not preserved in
    favor of a simpler return value.
    """

    value = dict(parse_qsl(data, **params))
    if schema:
        value = schema.validate(value, name=name, exception=exception)

    return value
Beispiel #29
0
    def _extract_nonce(cls, http_result):
        """
        Given an HTTP response from the session endpoint, extract the nonce, so we can "sign" requests with it.
        We don't really sign the requests in the traditional sense of a nonce, we just include them in the auth requests.

        :param http_result: HTTP response from the bbc session endpoint.
        :type http_result: requests.Response
        :return: nonce to "sign" url requests with
        :rtype: string
        """

        p = urlparse(http_result.url)
        d = dict(parse_qsl(p.query))
        return d.get("nonce")
Beispiel #30
0
    def _get_streams(self):
        docid = self.url_re.match(self.url).group(1)
        self.logger.debug("Google Docs ID: {0}", docid)
        res = http.get(self.api_url, params=dict(docid=docid))
        data = dict(parse_qsl(res.text))

        if data["status"] == "ok":
            fmts = dict([s.split('/')[:2] for s in data["fmt_list"].split(",")])
            streams = [s.split('|') for s in data["fmt_stream_map"].split(",")]
            for qcode, url in streams:
                _, h = fmts[qcode].split("x")
                yield "{0}p".format(h), HTTPStream(self.session, url)
        else:
            self.logger.error("{0} (ID: {1})", data["reason"], docid)
Beispiel #31
0
def parse_qsd(data,
              name="query string",
              exception=PluginError,
              schema=None,
              *args,
              **kwargs):
    """Parses a query string into a dict.

    Provides these extra features:
     - Unlike parse_qs and parse_qsl, duplicate keys are not preserved in favor of a simpler return value
     - Wraps errors in custom exception with a snippet of the data in the message
    """
    return _parse(lambda d: dict(parse_qsl(d, *args, **kwargs)), data, name,
                  exception, schema)
Beispiel #32
0
    def _get_streams(self):
        url_params = dict(parse_qsl(urlparse(self.url).query))
        video_id = url_params.get("videoid")

        if video_id:
            self.logger.debug("Found Video ID: {}", video_id)
            res = http.get(self.api_url.format(id=video_id))
            data = http.json(res, schema=self.api_schema)
            hls = self._make_stream(data["video_info"]["hlsvideosource"])
            video = self._make_stream(data["video_info"]["videosource"])
            if hls:
                yield "live", hls
            if video:
                yield "live", video
Beispiel #33
0
    def _get_streams(self):
        self.session.http.headers.update({"User-Agent": useragents.FIREFOX})
        res = self.session.http.get(self.url)
        for script in itertags(res.text, 'script'):
            if script.attributes.get("id") == "playerScript":
                log.debug("Found the playerScript script tag")
                urlparts = urlparse(script.attributes.get("src"))
                i = 0

                for key, url in parse_qsl(urlparts.query):
                    if key == "streamUrl":
                        i += 1
                        for s in HLSStream.parse_variant_playlist(self.session, url, params=dict(id=i), verify=False).items():
                            yield s
Beispiel #34
0
    def _get_streams(self):
        res = self.session.http.get(self.url)
        for script in itertags(res.text, 'script'):
            if script.attributes.get("id") == "playerScript":
                log.debug("Found the playerScript script tag")
                urlparts = urlparse(script.attributes.get("src"))
                i = 0

                for key, url in parse_qsl(urlparts.query):
                    if key == "streamUrl":
                        i += 1
                        for s in HLSStream.parse_variant_playlist(
                                self.session, url, params=dict(id=i),
                                verify=False).items():
                            yield s
Beispiel #35
0
    def _britecove_params(self, url):
        res = http.get(url,
                       headers={
                           "User-Agent": useragents.FIREFOX,
                           "Referer": self.url
                       })
        acc = self.account_id_re.search(res.text)
        pk = self.policy_key_re.search(res.text)

        query = dict(parse_qsl(urlparse(url).query))
        return {
            "video_id": query.get("videoId"),
            "account_id": acc and acc.group(1),
            "policy_key": pk and pk.group(1),
        }
Beispiel #36
0
    def _isvp_to_m3u8(self, url):
        qs = dict(parse_qsl(urlparse(url).query))
        if "comm" not in qs:
            log.error("Missing `comm` value")
        if "filename" not in qs:
            log.error("Missing `filename` value")

        d = self.url_lookup.get(qs['comm'])
        if d:
            snumber, baseurl = d
            stream_url = self.hls_url.format(filename=qs['filename'], number=snumber, base=baseurl)
        else:
            stream_url = self.hlsarch_url.format(filename=qs['filename'])

        return stream_url, self.parse_stt(qs.get('stt', 0))
Beispiel #37
0
    def _get_streams(self):
        docid = self.url_re.match(self.url).group(1)
        self.logger.debug("Google Docs ID: {0}", docid)
        res = self.session.http.get(self.api_url, params=dict(docid=docid))
        data = dict(parse_qsl(res.text))

        if data["status"] == "ok":
            fmts = dict(
                [s.split('/')[:2] for s in data["fmt_list"].split(",")])
            streams = [s.split('|') for s in data["fmt_stream_map"].split(",")]
            for qcode, url in streams:
                _, h = fmts[qcode].split("x")
                yield "{0}p".format(h), HTTPStream(self.session, url)
        else:
            self.logger.error("{0} (ID: {1})", data["reason"], docid)
Beispiel #38
0
    def _isvp_to_m3u8(self, url):
        qs = dict(parse_qsl(urlparse(url).query))
        if "comm" not in qs:
            log.error("Missing `comm` value")
        if "filename" not in qs:
            log.error("Missing `filename` value")

        d = self.url_lookup.get(qs['comm'])
        if d:
            snumber, baseurl = d
            stream_url = self.hls_url.format(filename=qs['filename'], number=snumber, base=baseurl)
        else:
            stream_url = self.hlsarch_url.format(filename=qs['filename'])

        return stream_url, self.parse_stt(qs.get('stt', 0))
Beispiel #39
0
    def _pv_params(cls, session, pvswf, pv, **request_params):
        """Returns any parameters needed for Akamai HD player verification.

        Algorithm originally documented by KSV, source:
        http://stream-recorder.com/forum/showpost.php?p=43761&postcount=13
        """

        try:
            data, hdntl = pv.split(";")
        except ValueError:
            data = pv
            hdntl = ""

        cache = Cache(filename="stream.json")
        key = "akamaihd-player:" + pvswf
        cached = cache.get(key)

        request_params = deepcopy(request_params)
        headers = request_params.pop("headers", {})
        if cached:
            headers["If-Modified-Since"] = cached["modified"]
        swf = session.http.get(pvswf, headers=headers, **request_params)

        if cached and swf.status_code == 304:  # Server says not modified
            hash = cached["hash"]
        else:
            # Calculate SHA-256 hash of the uncompressed SWF file, base-64
            # encoded
            hash = sha256()
            hash.update(swfdecompress(swf.content))
            hash = base64.b64encode(hash.digest()).decode("ascii")
            modified = swf.headers.get("Last-Modified", "")

            # Only save in cache if a valid date is given
            if len(modified) < 40:
                cache.set(key, dict(hash=hash, modified=modified))

        msg = "st=0~exp=9999999999~acl=*~data={0}!{1}".format(data, hash)
        auth = hmac.new(AKAMAIHD_PV_KEY, msg.encode("ascii"), sha256)
        pvtoken = "{0}~hmac={1}".format(msg, auth.hexdigest())

        # The "hdntl" parameter can be accepted as a cookie or passed in the
        # query string, but the "pvtoken" parameter can only be in the query
        # string
        params = [("pvtoken", pvtoken)]
        params.extend(parse_qsl(hdntl, keep_blank_values=True))

        return params
Beispiel #40
0
    def _get_stream_info_url(self, url):
        data = {}
        res = self.session.http.get(url)
        for m in self._data_re.finditer(res.text):
            data[m.group(1)] = m.group(2)

        log.debug("Got pl_data={0}".format(data))

        if data:
            if data["isVod"] == '0':
                return "https:{domain}/iframe/datalive/id/{id}/sid/{sid}".format(**data)
            else:
                return "https:{domain}/iframe/datavideo/id/{id}/sid/{sid}".format(**data)
        else:
            args = dict(parse_qsl(urlparse(url).query))
            return "https://player.vgtrk.com/iframe/datalive/id/{id}/sid/{sid}".format(**args)
Beispiel #41
0
    def _get_hls_streams(self, channel):
        channel = self.hls_channel_remap.get(channel, channel)
        embed_url = self.embed_url.format(channel)
        self.logger.debug("Found embed URL: {0}", embed_url)
        # page needs to have a mobile user agent
        embed_page = self.session.http.get(embed_url, headers={"User-Agent": useragents.ANDROID})

        m = self.embed_re.search(embed_page.text)
        if m:
            o = urlparse(m.group(1))
            prms = dict(parse_qsl(o.query))
            hls_stream_url = "{0}://{1}{2}?hdnea={3}".format(o.scheme, o.netloc, o.path, prms["hdnea"])
            try:
                for s in HLSStream.parse_variant_playlist(self.session, hls_stream_url).items():
                    yield s
            except Exception:
                self.logger.error("Failed to load the HLS playlist for {0}", channel)
Beispiel #42
0
    def _get_hls_streams(self, channel):
        channel = self.hls_channel_remap.get(channel, channel)
        embed_url = self.embed_url.format(channel)
        self.logger.debug("Found embed URL: {0}", embed_url)
        # page needs to have a mobile user agent
        embed_page = http.get(embed_url, headers={"User-Agent": useragents.ANDROID})

        m = self.embed_re.search(embed_page.text)
        if m:
            o = urlparse(m.group(1))
            prms = dict(parse_qsl(o.query))
            hls_stream_url = "{0}://{1}{2}?hdnea={3}".format(o.scheme, o.netloc, o.path, prms["hdnea"])
            try:
                for s in HLSStream.parse_variant_playlist(self.session, hls_stream_url).items():
                    yield s
            except Exception:
                self.logger.error("Failed to load the HLS playlist for {0}", channel)
Beispiel #43
0
    def _get_live_streams(self, page):
        # check if a different language has been selected
        qs = dict(parse_qsl(urlparse(self.url).query))
        channel = qs.get("channel")

        if not channel:
            m = self.channel_re.search(page.text)
            channel = m and m.group(1)

        self.logger.debug("Using sub-channel ID: {0}", channel)

        # extract the streams from the page, mapping between channel-id and stream url
        media_items = self.live_stream_div.finditer(page.text)
        stream_map = dict([m.groups((1, 2)) for m in media_items])

        stream_url = stream_map.get(str(channel) or self.default_channel)
        if stream_url:
            return self._create_stream(stream_url)
Beispiel #44
0
    def _get_stream_info(self, url):
        match = _url_re.match(url)
        user = match.group("user")
        live_channel = match.group("liveChannel")

        if user:
            video_id = self._find_channel_video()
        elif live_channel:
            return self._find_canonical_stream_info()
        else:
            video_id = match.group("video_id")
            if video_id == "live_stream":
                query_info = dict(parse_qsl(urlparse(url).query))
                if "channel" in query_info:
                    video_id = self._get_channel_video(query_info["channel"])

        if not video_id:
            return

        # normal
        _params_1 = {"el": "detailpage"}
        # age restricted
        _params_2 = {"el": "embedded"}
        # embedded restricted
        _params_3 = {"eurl": "https://youtube.googleapis.com/v/{0}".format(video_id)}

        count = 0
        for _params in (_params_1, _params_2, _params_3):
            count += 1
            params = {"video_id": video_id}
            params.update(_params)

            res = http.get(API_VIDEO_INFO, params=params, headers=HLS_HEADERS)
            info_parsed = parse_query(res.text, name="config", schema=_config_schema)
            if info_parsed.get("status") == "fail":
                self.logger.debug("get_video_info - {0}: {1}".format(
                    count, info_parsed.get("reason"))
                )
                continue
            self.logger.debug("get_video_info - {0}: Found data".format(count))
            break

        return info_parsed
Beispiel #45
0
    def get_video_id(self):
        parsed = urlparse(self.url)
        qinfo = dict(parse_qsl(parsed.query or parsed.fragment.lstrip("?")))

        site, video_id = None, None
        url_m = self.url_re.match(self.url)

        # look for the video id in the URL, otherwise find it in the page
        if "tvLiveId" in qinfo:
            video_id = qinfo["tvLiveId"]
            site = url_m.group(1)
        elif url_m.group(2):
            site, video_id = url_m.group(1), url_m.group(2)
        else:
            video_id_m = self.session.http.get(self.url, schema=self.video_id_schema)
            if video_id_m:
                site, video_id = video_id_m.groups()

        return site, video_id
Beispiel #46
0
    def _create_adaptive_streams(self, info, streams, protected):
        adaptive_streams = {}
        best_audio_itag = None

        # Extract audio streams from the DASH format list
        for stream_info in info.get("adaptive_fmts", []):
            if stream_info.get("s"):
                protected = True
                continue

            stream_params = dict(parse_qsl(stream_info["url"]))
            if "itag" not in stream_params:
                continue
            itag = int(stream_params["itag"])
            # extract any high quality streams only available in adaptive formats
            adaptive_streams[itag] = stream_info["url"]

            stream_type, stream_format = stream_info["type"]
            if stream_type == "audio":
                stream = HTTPStream(self.session, stream_info["url"])
                name = "audio_{0}".format(stream_format)
                streams[name] = stream

                # find the best quality audio stream m4a, opus or vorbis
                if best_audio_itag is None or self.adp_audio[itag] > self.adp_audio[best_audio_itag]:
                    best_audio_itag = itag

        if best_audio_itag and adaptive_streams and MuxedStream.is_usable(self.session):
            aurl = adaptive_streams[best_audio_itag]
            for itag, name in self.adp_video.items():
                if itag in adaptive_streams:
                    vurl = adaptive_streams[itag]
                    log.debug("MuxedStream: v {video} a {audio} = {name}".format(
                        audio=best_audio_itag,
                        name=name,
                        video=itag,
                    ))
                    streams[name] = MuxedStream(self.session,
                                                HTTPStream(self.session, vurl),
                                                HTTPStream(self.session, aurl))

        return streams, protected
Beispiel #47
0
    def _get_streams(self):
        args = dict(parse_qsl(urlparse(self.url).query))
        if "k" in args:
            self.logger.debug("Loading channel: {k}", **args)
            res = http.get(self.url)
            stream_data_m = self.stream_data_re.search(res.text)
            if stream_data_m:
                script_vars = b64decode(stream_data_m.group(1)).decode("utf8")
                url_m = self.m3u8_re.search(script_vars)

                hls_url = url_m and url_m.group("url")
                if hls_url:
                    for s in HLSStream.parse_variant_playlist(self.session, hls_url).items():
                        yield s

                f4m_m = self.f4mm_re.search(script_vars)
                f4m_url = f4m_m and f4m_m.group("url")
                if f4m_url:
                    for n, s in HDSStream.parse_manifest(self.session, f4m_url).items():
                        yield n, s
Beispiel #48
0
    def _get_streams(self):
        url_params = dict(parse_qsl(urlparse(self.url).query))
        video_id = url_params.get("videoid")

        if video_id:
            vali = '{0}l{1}m{2}'.format(self._random_t(4), self._random_t(4), self._random_t(5))
            data = {
                'userid': 1,
                'videoid': video_id,
                'area': '',
                'h5': 1,
                'vali': vali
            }
            self.logger.debug("Found Video ID: {0}".format(video_id))
            res = http.post(self.api_url, data=data)
            data = http.json(res, schema=self.api_schema)
            hls = self._make_stream(data["video_info"]["hlsvideosource"])
            video = self._make_stream(data["video_info"]["videosource"])
            if hls:
                yield "live", hls
            if video:
                yield "live", video
Beispiel #49
0
    def _get_streams(self):
        info = self._get_stream_info(self.url)
        if not info:
            return

        formats = info.get("fmt_list")
        streams = {}
        protected = False
        for stream_info in info.get("url_encoded_fmt_stream_map", []):
            if stream_info.get("s"):
                protected = True
                continue

            stream = HTTPStream(self.session, stream_info["url"])
            name = formats.get(stream_info["itag"]) or stream_info["quality"]

            if stream_info.get("stereo3d"):
                name += "_3d"

            streams[name] = stream

        adaptive_streams = {}
        best_audio_itag = None

        # Extract audio streams from the DASH format list
        for stream_info in info.get("adaptive_fmts", []):
            if stream_info.get("s"):
                protected = True
                continue

            stream_params = dict(parse_qsl(stream_info["url"]))
            if "itag" not in stream_params:
                continue
            itag = int(stream_params["itag"])
            # extract any high quality streams only available in adaptive formats
            adaptive_streams[itag] = stream_info["url"]

            stream_type, stream_format = stream_info["type"]
            if stream_type == "audio":
                stream = HTTPStream(self.session, stream_info["url"])
                name = "audio_{0}".format(stream_format)
                streams[name] = stream

                # find the best quality audio stream m4a, opus or vorbis
                if best_audio_itag is None or self.adp_audio[itag] > self.adp_audio[best_audio_itag]:
                    best_audio_itag = itag

        if best_audio_itag and adaptive_streams and MuxedStream.is_usable(self.session):
            aurl = adaptive_streams[best_audio_itag]
            for itag, name in self.adp_video.items():
                if itag in adaptive_streams:
                    vurl = adaptive_streams[itag]
                    streams[name] = MuxedStream(self.session,
                                                HTTPStream(self.session, vurl),
                                                HTTPStream(self.session, aurl))

        hls_playlist = info.get("hlsvp")
        if hls_playlist:
            try:
                hls_streams = HLSStream.parse_variant_playlist(
                    self.session, hls_playlist, headers=HLS_HEADERS, namekey="pixels"
                )
                streams.update(hls_streams)
            except IOError as err:
                self.logger.warning("Failed to extract HLS streams: {0}", err)

        if not streams and protected:
            raise PluginError("This plugin does not support protected videos, "
                              "try youtube-dl instead")

        return streams
Beispiel #50
0
 def get_event_id(cls, url):
     return dict(parse_qsl(urlparse(url).query.lower())).get("eventid")
Beispiel #51
0
 def can_handle_url(cls, url):
     if cls.url_re.match(url) is not None:
         args = dict(parse_qsl(urlparse(url).query))
         return args.get("y") == "tv"