Python xml_findallの例、livecli.plugin.api.validate.xml_findall Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_plugin_api_validate.py プロジェクト: longsack/livecli

    def test_xml_findall(self):
        el = Element("parent")
        children = [Element("child") for i in range(10)]
        for child in children:
            el.append(child)

        assert validate(xml_findall("child"), el) == children

コード例 #2

0

ファイルを表示

ファイル: ard_live.py プロジェクト: persianpros/livecli

class ard_live(Plugin):
    swf_url = "http://live.daserste.de/lib/br-player/swf/main.swf"
    _url_re = re.compile(r"https?://(www.)?daserste.de/", re.I)
    _player_re = re.compile(r'''dataURL\s*:\s*(?P<q>['"])(?P<url>.*?)(?P=q)''')
    _player_url_schema = validate.Schema(
        validate.transform(_player_re.search),
        validate.any(None, validate.all(validate.get("url"), validate.text)))
    _livestream_schema = validate.Schema(
        validate.xml_findall(".//assets"),
        validate.filter(lambda x: x.attrib.get("type") != "subtitles"),
        validate.get(0), validate.xml_findall(".//asset"), [
            validate.union({
                "url": validate.xml_findtext("./fileName"),
                "bitrate": validate.xml_findtext("./bitrateVideo")
            })
        ])

    @classmethod
    def can_handle_url(cls, url):
        return cls._url_re.match(url) is not None

    def _get_streams(self):
        data_url = http.get(self.url, schema=self._player_url_schema)
        if data_url:
            res = http.get(urljoin(self.url, data_url))
            stream_info = http.xml(res, schema=self._livestream_schema)

            for stream in stream_info:
                url = stream["url"]
                try:
                    if ".m3u8" in url:
                        for s in HLSStream.parse_variant_playlist(
                                self.session, url, name_key="bitrate").items():
                            yield s
                    elif ".f4m" in url:
                        for s in HDSStream.parse_manifest(
                                self.session,
                                url,
                                pvswf=self.swf_url,
                                is_akamai=True).items():
                            yield s
                    elif ".mp4" in url:
                        yield "{0}k".format(stream["bitrate"]), HTTPStream(
                            self.session, url)
                except IOError as err:
                    self.logger.warning("Error parsing stream: {0}", err)

コード例 #3

0

ファイルを表示

class Rtve(Plugin):
    secret_key = base64.b64decode("eWVMJmRhRDM=")
    content_id_re = re.compile(r'data-id\s*=\s*"(\d+)"')
    url_re = re.compile(
        r"""
        https?://(?:www\.)?rtve\.es/(?:directo|noticias|television|deportes|alacarta|drmn)/.*?/?
    """, re.VERBOSE)
    cdn_schema = validate.Schema(
        validate.transform(partial(parse_xml, invalid_char_entities=True)),
        validate.xml_findall(".//preset"), [
            validate.union({
                "quality":
                validate.all(validate.getattr("attrib"), validate.get("type")),
                "urls":
                validate.all(validate.xml_findall(".//url"),
                             [validate.getattr("text")])
            })
        ])
    subtitles_api = "http://www.rtve.es/api/videos/{id}/subtitulos.json"
    subtitles_schema = validate.Schema(
        {"page": {
            "items": [{
                "src": validate.url(),
                "lang": validate.text
            }]
        }}, validate.get("page"), validate.get("items"))
    video_api = "http://www.rtve.es/api/videos/{id}.json"
    video_schema = validate.Schema(
        {
            "page": {
                "items": [{
                    "qualities": [{
                        "preset": validate.text,
                        "height": int
                    }]
                }]
            }
        }, validate.get("page"), validate.get("items"), validate.get(0))
    options = PluginOptions({"mux_subtitles": False})

    @classmethod
    def can_handle_url(cls, url):
        return cls.url_re.match(url) is not None

    def __init__(self, url):
        Plugin.__init__(self, url)
        self.zclient = ZTNRClient(self.secret_key)
        http.headers = {"User-Agent": useragents.SAFARI_8}

    def _get_content_id(self):
        res = http.get(self.url)
        m = self.content_id_re.search(res.text)
        return m and int(m.group(1))

    def _get_subtitles(self, content_id):
        res = http.get(self.subtitles_api.format(id=content_id))
        return http.json(res, schema=self.subtitles_schema)

    def _get_quality_map(self, content_id):
        res = http.get(self.video_api.format(id=content_id))
        data = http.json(res, schema=self.video_schema)
        qmap = {}
        for item in data["qualities"]:
            qname = {
                "MED": "Media",
                "HIGH": "Alta",
                "ORIGINAL": "Original"
            }.get(item["preset"], item["preset"])
            qmap[qname] = u"{0}p".format(item["height"])
        return qmap

    def _get_streams(self):
        streams = []
        content_id = self._get_content_id()
        if content_id:
            self.logger.debug("Found content with id: {0}", content_id)
            stream_data = self.zclient.get_cdn_list(content_id,
                                                    schema=self.cdn_schema)
            quality_map = None

            for stream in stream_data:
                for url in stream["urls"]:
                    if url.endswith("m3u8"):
                        try:
                            streams.extend(
                                HLSStream.parse_variant_playlist(
                                    self.session, url).items())
                        except (IOError, OSError):
                            self.logger.debug("Failed to load m3u8 url: {0}",
                                              url)
                    elif ((url.endswith("mp4") or url.endswith("mov")
                           or url.endswith("avi"))
                          and http.head(
                              url, raise_for_status=False).status_code == 200):
                        if quality_map is None:  # only make the request when it is necessary
                            quality_map = self._get_quality_map(content_id)
                        # rename the HTTP sources to match the HLS sources
                        quality = quality_map.get(stream["quality"],
                                                  stream["quality"])
                        streams.append((quality, HTTPStream(self.session,
                                                            url)))

            subtitles = None
            if self.get_option("mux_subtitles"):
                subtitles = self._get_subtitles(content_id)
            if subtitles:
                substreams = {}
                for i, subtitle in enumerate(subtitles):
                    substreams[subtitle["lang"]] = HTTPStream(
                        self.session, subtitle["src"])

                for q, s in streams:
                    yield q, MuxedStream(self.session, s, subtitles=substreams)
            else:
                for s in streams:
                    yield s

コード例 #4

0

ファイルを表示

ファイル: adultswim.py プロジェクト: persianpros/livecli

class AdultSwim(Plugin):
    API_URL = "http://www.adultswim.com/videos/api/v2/videos/{id}?fields=stream"
    vod_api = "http://www.adultswim.com/videos/api/v0/assets"

    url_re = re.compile(
        r"""https?://(?:www\.)?adultswim\.com/videos
            (?:/(streams))?
            (?:/([^/]+))?
            (?:/([^/]+))?
            """, re.VERBOSE)
    _stream_data_re = re.compile(r"(?:__)?AS_INITIAL_DATA(?:__)? = (\{.*?});",
                                 re.M | re.DOTALL)

    live_schema = validate.Schema({
        u"streams": {
            validate.text: {
                u"stream":
                validate.text,
                validate.optional(u"isLive"):
                bool,
                u"archiveEpisodes": [{
                    u"id": validate.text,
                    u"slug": validate.text,
                }]
            }
        }
    })
    vod_id_schema = validate.Schema(
        {u"show": {
            u"sluggedVideo": {
                u"id": validate.text
            }
        }}, validate.transform(lambda x: x["show"]["sluggedVideo"]["id"]))
    _api_schema = validate.Schema({
        u'status': u'ok',
        u'data': {
            u'stream': {
                u'assets': [{
                    u'url': validate.url()
                }]
            }
        }
    })
    _vod_api_schema = validate.Schema(
        validate.all(validate.xml_findall(".//files/file"), [
            validate.xml_element,
            validate.transform(lambda v: {
                "bitrate": v.attrib.get("bitrate"),
                "url": v.text
            })
        ]))

    @classmethod
    def can_handle_url(cls, url):
        match = AdultSwim.url_re.match(url)
        return match is not None

    def _make_hls_hds_stream(self, func, stream, *args, **kwargs):
        return func(self.session, stream["url"], *args, **kwargs)

    def _get_show_streams(self,
                          stream_data,
                          show,
                          episode,
                          platform="desktop"):
        video_id = parse_json(stream_data.group(1), schema=self.vod_id_schema)
        res = http.get(self.vod_api,
                       params={
                           "platform": platform,
                           "id": video_id
                       })

        # create a unique list of the stream manifest URLs
        streams = []
        urldups = []
        for stream in parse_xml(res.text, schema=self._vod_api_schema):
            if stream["url"] not in urldups:
                streams.append(stream)
                urldups.append(stream["url"])

        mapper = StreamMapper(lambda fmt, strm: strm["url"].endswith(fmt))
        mapper.map(".m3u8", self._make_hls_hds_stream,
                   HLSStream.parse_variant_playlist)
        mapper.map(".f4m",
                   self._make_hls_hds_stream,
                   HDSStream.parse_manifest,
                   is_akamai=True)
        mapper.map(
            ".mp4", lambda s:
            (s["bitrate"] + "k", HTTPStream(self.session, s["url"])))

        for q, s in mapper(streams):
            yield q, s

    def _get_live_stream(self, stream_data, show, episode=None):
        # parse the stream info as json
        stream_info = parse_json(stream_data.group(1), schema=self.live_schema)
        # get the stream ID
        stream_id = None
        show_info = stream_info[u"streams"][show]

        if episode:
            self.logger.debug("Loading replay of episode: {0}/{1}", show,
                              episode)
            for epi in show_info[u"archiveEpisodes"]:
                if epi[u"slug"] == episode:
                    stream_id = epi[u"id"]
        elif show_info.get("isLive") or not len(show_info[u"archiveEpisodes"]):
            self.logger.debug("Loading LIVE streams for: {0}", show)
            stream_id = show_info[u"stream"]
        else:  # off-air
            if len(show_info[u"archiveEpisodes"]):
                epi = show_info[u"archiveEpisodes"][0]
                self.logger.debug("Loading replay of episode: {0}/{1}", show,
                                  epi[u"slug"])
                stream_id = epi[u"id"]
            else:
                self.logger.error("This stream is currently offline")
                return

        if stream_id:
            api_url = self.API_URL.format(id=stream_id)

            res = http.get(api_url,
                           headers={"User-Agent": useragents.SAFARI_8})
            stream_data = http.json(res, schema=self._api_schema)

            mapper = StreamMapper(lambda fmt, surl: surl.endswith(fmt))
            mapper.map(".m3u8", HLSStream.parse_variant_playlist, self.session)
            mapper.map(".f4m", HDSStream.parse_manifest, self.session)

            stream_urls = [
                asset[u"url"]
                for asset in stream_data[u'data'][u'stream'][u'assets']
            ]
            for q, s in mapper(stream_urls):
                yield q, s

        else:
            self.logger.error(
                "Couldn't find the stream ID for this stream: {0}".format(
                    show))

    def _get_streams(self):
        # get the page
        url_match = self.url_re.match(self.url)
        live_stream, show_name, episode_name = url_match.groups()
        if live_stream:
            show_name = show_name or "live-stream"

        res = http.get(self.url, headers={"User-Agent": useragents.SAFARI_8})
        # find the big blob of stream info in the page
        stream_data = self._stream_data_re.search(res.text)

        if stream_data:
            if live_stream:
                streams = self._get_live_stream(stream_data, show_name,
                                                episode_name)
            else:
                self.logger.debug("Loading VOD streams for: {0}/{1}",
                                  show_name, episode_name)
                streams = self._get_show_streams(stream_data, show_name,
                                                 episode_name)

            # De-dup the streams, some of the mobile streams overlap the desktop streams
            dups = set()
            for q, s in streams:
                if hasattr(s, "args") and "url" in s.args:
                    if s.args["url"] not in dups:
                        yield q, s
                        dups.add(s.args["url"])
                else:
                    yield q, s

        else:
            self.logger.error(
                "Couldn't find the stream data for this stream: {0}".format(
                    show_name))

コード例 #5

0

ファイルを表示

}

ASSET_URL = "http://prima.tv4play.se/api/web/asset/{0}/play"
SWF_URL = "http://www.tv4play.se/flash/tv4video.swf"

_url_re = re.compile(
    r"""
    http(s)?://(www\.)?
    (?:
        tv4play.se/program/[^\?/]+|
        fotbollskanalen.se/video
    )
    .+(video_id|videoid)=(?P<video_id>\d+)
""", re.VERBOSE)

_asset_schema = validate.Schema(validate.xml_findall("items/item"), [
    validate.all(
        validate.xml_findall("*"), validate.map(lambda e: (e.tag, e.text)),
        validate.transform(dict), {
            "base": validate.text,
            "bitrate": validate.all(validate.text, validate.transform(int)),
            "url": validate.text
        })
])


class TV4Play(Plugin):
    @classmethod
    def can_handle_url(cls, url):
        return _url_re.match(url)

コード例 #6

0

ファイルを表示

class RTE(Plugin):
    VOD_API_URL = 'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id={0}'
    LIVE_API_URL = 'http://feeds.rasset.ie/livelistings/playlist'

    _url_re = re.compile(r'http://www\.rte\.ie/player/[a-z0-9]+/(?:show/[a-z-]+-[0-9]+/(?P<video_id>[0-9]+)|live/(?P<channel_id>[0-9]+))')

    _vod_api_schema = validate.Schema({
        'current_date': validate.text,
        'shows': validate.Schema(
            list,
            validate.length(1),
            validate.get(0),
            validate.Schema({
                'valid_start': validate.text,
                'valid_end': validate.text,
                'media:group': validate.Schema(
                    list,
                    validate.length(1),
                    validate.get(0),
                    validate.Schema(
                        {
                            'hls_server': validate.url(),
                            'hls_url': validate.text,
                            'hds_server': validate.url(),
                            'hds_url': validate.text,
                            # API returns RTMP streams that don't seem to work, ignore them
                            # 'url': validate.any(
                            #     validate.url(scheme="rtmp"),
                            #     validate.url(scheme="rtmpe")
                            # )
                        },
                        validate.transform(lambda x: [x['hls_server'] + x['hls_url'], x['hds_server'] + x['hds_url']])
                    ),
                ),
            }),
        )
    })

    _live_api_schema = validate.Schema(
        validate.xml_findall('.//{http://search.yahoo.com/mrss/}content'),
        [
            validate.all(
                validate.xml_element(attrib={'url': validate.url()}),
                validate.get('url')
            )
        ]
    )
    _live_api_iphone_schema = validate.Schema(
        list,
        validate.length(1),
        validate.get(0),
        validate.Schema(
            {'fullUrl': validate.any(validate.url(), 'none')},
            validate.get('fullUrl')
        )
    )

    @classmethod
    def can_handle_url(cls, url):
        return RTE._url_re.match(url)

    def _get_streams(self):
        match = self._url_re.match(self.url)
        video_id = match.group('video_id')

        if video_id is not None:
            # VOD
            res = http.get(self.VOD_API_URL.format(video_id))
            stream_data = http.json(res, schema=self._vod_api_schema)

            # Check whether video format is expired
            current_date = datetime.strptime(stream_data['current_date'], '%Y-%m-%dT%H:%M:%S.%f')
            valid_start = datetime.strptime(stream_data['shows']['valid_start'], '%Y-%m-%dT%H:%M:%S')
            valid_end = datetime.strptime(stream_data['shows']['valid_end'], '%Y-%m-%dT%H:%M:%S')
            if current_date < valid_start or current_date > valid_end:
                self.logger.error('Failed to access stream, may be due to expired content')
                return

            streams = stream_data['shows']['media:group']
        else:
            # Live
            channel_id = match.group('channel_id')
            # Get live streams for desktop
            res = http.get(self.LIVE_API_URL, params={'channelid': channel_id})
            streams = http.xml(res, schema=self._live_api_schema)

            # Get HLS streams for Iphone
            res = http.get(self.LIVE_API_URL, params={'channelid': channel_id, 'platform': 'iphone'})
            stream = http.json(res, schema=self._live_api_iphone_schema)
            if stream != 'none':
                streams.append(stream)

        for stream in streams:
            if '.f4m' in stream:
                for s in HDSStream.parse_manifest(self.session, stream).items():
                    yield s
            if '.m3u8' in stream:
                for s in HLSStream.parse_variant_playlist(self.session, stream).items():
                    yield s

コード例 #7

0

ファイルを表示

ファイル: livestream.py プロジェクト: persianpros/livecli

    validate.optional("lsPlayerSwfUrl"): validate.text,
    validate.optional("hdPlayerSwfUrl"): validate.text
})
_smil_schema = validate.Schema(validate.union({
    "http_base": validate.all(
        validate.xml_find("{http://www.w3.org/2001/SMIL20/Language}head/"
                          "{http://www.w3.org/2001/SMIL20/Language}meta"
                          "[@name='httpBase']"),
        validate.xml_element(attrib={
            "content": validate.text
        }),
        validate.get("content")
    ),
    "videos": validate.all(
        validate.xml_findall("{http://www.w3.org/2001/SMIL20/Language}body/"
                             "{http://www.w3.org/2001/SMIL20/Language}switch/"
                             "{http://www.w3.org/2001/SMIL20/Language}video"),
        [
            validate.all(
                validate.xml_element(attrib={
                    "src": validate.text,
                    "system-bitrate": validate.all(
                        validate.text,
                        validate.transform(int)
                    )
                }),
                validate.transform(
                    lambda e: (e.attrib["src"], e.attrib["system-bitrate"])
                )
            )
        ],

コード例 #8

0

ファイルを表示

            "_stream":
            validate.any(validate.text, [validate.text]),
            "_quality":
            validate.any(int, validate.text)
        }]
    }]
})
_smil_schema = validate.Schema(
    validate.union({
        "base":
        validate.all(validate.xml_find("head/meta"), validate.get("base"),
                     validate.url(scheme="http")),
        "cdn":
        validate.all(validate.xml_find("head/meta"), validate.get("cdn")),
        "videos":
        validate.all(validate.xml_findall("body/seq/video"),
                     [validate.get("src")])
    }))


class ard_mediathek(Plugin):
    @classmethod
    def can_handle_url(cls, url):
        return _url_re.match(url)

    def _get_http_streams(self, info):
        name = QUALITY_MAP.get(info["_quality"], "vod")
        urls = info["_stream"]
        if not isinstance(info["_stream"], list):
            urls = [urls]

コード例 #9

0

ファイルを表示

            }
        ),
        "clientlibs": validate.all(
            validate.transform(_clientlibs_re.search),
            validate.get(2),
            validate.text
        )
    })
)

_language_schema = validate.Schema(
    validate.xml_findtext("./country_code")
)

_xml_to_srt_schema = validate.Schema(
    validate.xml_findall(".//body/div"),
    [
        validate.union([validate.all(
            validate.getattr("attrib"),
            validate.get("{http://www.w3.org/XML/1998/namespace}lang")
        ),
            validate.all(
                validate.xml_findall("./p"),
                validate.transform(lambda x: list(enumerate(x, 1))),
                [
                    validate.all(
                        validate.union({
                            "i": validate.get(0),
                            "begin": validate.all(
                                validate.get(1),
                                validate.getattr("attrib"),

コード例 #10

0

ファイルを表示

class WWENetwork(Plugin):
    url_re = re.compile(r"https?://network.wwe.com")
    content_id_re = re.compile(r'''"content_id" : "(\d+)"''')
    playback_scenario = "HTTP_CLOUD_WIRED"
    login_url = "https://secure.net.wwe.com/workflow.do"
    login_page_url = "https://secure.net.wwe.com/enterworkflow.do?flowId=account.login&forwardUrl=http%3A%2F%2Fnetwork.wwe.com"
    api_url = "https://ws.media.net.wwe.com/ws/media/mf/op-findUserVerifiedEvent/v-2.3"
    _info_schema = validate.Schema(
        validate.union({
            "status": validate.union({
                "code": validate.all(validate.xml_findtext(".//status-code"), validate.transform(int)),
                "message": validate.xml_findtext(".//status-message"),
            }),
            "urls": validate.all(
                validate.xml_findall(".//url"),
                [validate.getattr("text")]
            ),
            validate.optional("fingerprint"): validate.xml_findtext(".//updated-fingerprint"),
            validate.optional("session_key"): validate.xml_findtext(".//session-key"),
            "session_attributes": validate.all(
                validate.xml_findall(".//session-attribute"),
                [validate.getattr("attrib"),
                 validate.union({
                     "name": validate.get("name"),
                     "value": validate.get("value")
                 })]
            )
        })
    )
    options = PluginOptions({
        "email": None,
        "password": None,
    })

    def __init__(self, url):
        super(WWENetwork, self).__init__(url)
        http.headers.update({"User-Agent": useragents.CHROME})
        self._session_attributes = Cache(filename="plugin-cache.json", key_prefix="wwenetwork:attributes")
        self._session_key = self.cache.get("session_key")
        self._authed = self._session_attributes.get("ipid") and self._session_attributes.get("fprt")

    @classmethod
    def can_handle_url(cls, url):
        return cls.url_re.match(url) is not None

    def login(self, email, password):
        self.logger.debug("Attempting login as {0}", email)
        # sets some required cookies to login
        http.get(self.login_page_url)
        # login
        res = http.post(self.login_url, data=dict(registrationAction='identify',
                                                  emailAddress=email,
                                                  password=password,
                                                  submitButton=""),
                        headers={"Referer": self.login_page_url},
                        allow_redirects=False)

        self._authed = "Authentication Error" not in res.text
        if self._authed:
            self._session_attributes.set("ipid", res.cookies.get("ipid"), expires=3600 * 1.5)
            self._session_attributes.set("fprt", res.cookies.get("fprt"), expires=3600 * 1.5)

        return self._authed

    def _update_session_attribute(self, key, value):
        if value:
            self._session_attributes.set(key, value, expires=3600 * 1.5)  # 1h30m expiry
            http.cookies.set(key, value)

    @property
    def session_key(self):
        return self._session_key

    @session_key.setter
    def session_key(self, value):
        self.cache.set("session_key", value)
        self._session_key = value

    def _get_media_info(self, content_id):
        """
        Get the info about the content, based on the ID
        :param content_id:
        :return:
        """
        params = {"identityPointId": self._session_attributes.get("ipid"),
                  "fingerprint": self._session_attributes.get("fprt"),
                  "contentId": content_id,
                  "playbackScenario": self.playback_scenario,
                  "platform": "WEB_MEDIAPLAYER_5",
                  "subject": "LIVE_EVENT_COVERAGE",
                  "frameworkURL": "https://ws.media.net.wwe.com",
                  "_": int(time.time())}
        if self.session_key:
            params["sessionKey"] = self.session_key
        url = self.api_url.format(id=content_id)
        res = http.get(url, params=params)
        return http.xml(res, ignore_ns=True, schema=self._info_schema)

    def _get_content_id(self):
        #  check the page to find the contentId
        res = http.get(self.url)
        m = self.content_id_re.search(res.text)
        if m:
            return m.group(1)

    def _get_streams(self):
        email = self.get_option("email")
        password = self.get_option("password")

        if not self._authed and (not email and not password):
            self.logger.error("A login for WWE Network is required, use --wwenetwork-email/"
                              "--wwenetwork-password to set them")
            return

        if not self._authed:
            if not self.login(email, password):
                self.logger.error("Failed to login, check your username/password")
                return

        content_id = self._get_content_id()
        if content_id:
            self.logger.debug("Found content ID: {0}", content_id)
            info = self._get_media_info(content_id)
            if info["status"]["code"] == 1:
                # update the session attributes
                self._update_session_attribute("fprt", info.get("fingerprint"))
                for attr in info["session_attributes"]:
                    self._update_session_attribute(attr["name"], attr["value"])

                if info.get("session_key"):
                    self.session_key = info.get("session_key")
                for url in info["urls"]:
                    for s in HLSStream.parse_variant_playlist(self.session, url, name_fmt="{pixels}_{bitrate}").items():
                        yield s
            else:
                raise PluginError("Could not load streams: {message} ({code})".format(**info["status"]))