def test_parse_xml_encoding(self): tree = parse_xml("""<?xml version="1.0" encoding="UTF-8"?><test>ä</test>""") self.assertEqual(tree.xpath(".//text()"), ["ä"]) tree = parse_xml("""<test>ä</test>""") self.assertEqual(tree.xpath(".//text()"), ["ä"]) tree = parse_xml(b"""<?xml version="1.0" encoding="UTF-8"?><test>\xC3\xA4</test>""") self.assertEqual(tree.xpath(".//text()"), ["ä"]) tree = parse_xml(b"""<test>\xC3\xA4</test>""") self.assertEqual(tree.xpath(".//text()"), ["ä"])
def test_parse_xml_ns_ignore(self): expected = Element("test", {"foo": "bar"}) actual = parse_xml("""<test foo="bar" xmlns="foo:bar"/>""", ignore_ns=True) self.assertEqual(expected.tag, actual.tag) self.assertEqual(expected.attrib, actual.attrib) actual = parse_xml("""<test foo="bar" xmlns="foo:bar"/>""", ignore_ns=True) self.assertEqual(expected.tag, actual.tag) self.assertEqual(expected.attrib, actual.attrib) actual = parse_xml("""<test\nfoo="bar"\nxmlns="foo:bar"/>""", ignore_ns=True) self.assertEqual(expected.tag, actual.tag) self.assertEqual(expected.attrib, actual.attrib)
def test_parse_xml_validate(self): expected = Element("test", {"foo": "bar"}) actual = parse_xml( """<test foo="bar"/>""", schema=validate.Schema(xml_element(tag="test", attrib={"foo": str})) ) self.assertEqual(expected.tag, actual.tag) self.assertEqual(expected.attrib, actual.attrib)
def test_parse_xml_entities(self): expected = Element("test", {"foo": "bar &"}) actual = parse_xml( """<test foo="bar &"/>""", schema=validate.Schema(xml_element(tag="test", attrib={"foo": str})), invalid_char_entities=True ) self.assertEqual(expected.tag, actual.tag) self.assertEqual(expected.attrib, actual.attrib)
def parse_manifest(cls, session, url_or_manifest: str, **args) -> Dict[str, "DASHStream"]: """ Parse a DASH manifest file and return its streams. :param streamlink.Streamlink session: Streamlink session instance :param url_or_manifest: URL of the manifest file or an XML manifest string :param args: Additional keyword arguments passed to :meth:`requests.Session.request` """ if url_or_manifest.startswith('<?xml'): mpd = MPD(parse_xml(url_or_manifest, ignore_ns=True)) else: res = session.http.get(url_or_manifest, **session.http.valid_request_args(**args)) url = res.url urlp = list(urlparse(url)) urlp[2], _ = urlp[2].rsplit("/", 1) mpd = MPD(session.http.xml(res, ignore_ns=True), base_url=urlunparse(urlp), url=url) video, audio = [], [] # Search for suitable video and audio representations for aset in mpd.periods[0].adaptationSets: if aset.contentProtection: raise PluginError("{} is protected by DRM".format(url)) for rep in aset.representations: if rep.mimeType.startswith("video"): video.append(rep) elif rep.mimeType.startswith("audio"): audio.append(rep) if not video: video = [None] if not audio: audio = [None] locale = session.localization locale_lang = locale.language lang = None available_languages = set() # if the locale is explicitly set, prefer that language over others for aud in audio: if aud and aud.lang: available_languages.add(aud.lang) try: if locale.explicit and aud.lang and Language.get( aud.lang) == locale_lang: lang = aud.lang except LookupError: continue if not lang: # filter by the first language that appears lang = audio[0] and audio[0].lang log.debug( "Available languages for DASH audio streams: {0} (using: {1})". format(", ".join(available_languages) or "NONE", lang or "n/a")) # if the language is given by the stream, filter out other languages that do not match if len(available_languages) > 1: audio = list( filter(lambda a: a.lang is None or a.lang == lang, audio)) ret = [] for vid, aud in itertools.product(video, audio): stream = DASHStream(session, mpd, vid, aud, **args) stream_name = [] if vid: stream_name.append("{:0.0f}{}".format( vid.height or vid.bandwidth_rounded, "p" if vid.height else "k")) if audio and len(audio) > 1: stream_name.append("a{:0.0f}k".format(aud.bandwidth)) ret.append(('+'.join(stream_name), stream)) # rename duplicate streams dict_value_list = defaultdict(list) for k, v in ret: dict_value_list[k].append(v) def sortby_bandwidth(dash_stream: DASHStream) -> int: if dash_stream.video_representation: return dash_stream.video_representation.bandwidth if dash_stream.audio_representation: return dash_stream.audio_representation.bandwidth return 0 # pragma: no cover ret_new = {} for q in dict_value_list: items = dict_value_list[q] try: items = sorted(items, key=sortby_bandwidth, reverse=True) except AttributeError: pass for n in range(len(items)): if n == 0: ret_new[q] = items[n] elif n == 1: ret_new[f'{q}_alt'] = items[n] else: ret_new[f'{q}_alt{n}'] = items[n] return ret_new
def xml(cls, res, *args, **kwargs): """Parses XML from a response.""" return parse_xml(res.text, *args, **kwargs)
def test_parse_xml_ns(self): expected = Element("{foo:bar}test", {"foo": "bar"}) actual = parse_xml("""<h:test foo="bar" xmlns:h="foo:bar"/>""") self.assertEqual(expected.tag, actual.tag) self.assertEqual(expected.attrib, actual.attrib)