def test_xml_element(self): el = Element("tag", attrib={"key": "value"}) el.text = "test" assert validate(xml_element("tag"), el).tag == "tag" assert validate(xml_element(text="test"), el).text == "test" assert validate(xml_element(attrib={"key": text}), el).attrib == {"key": "value"}
def test_xml_element(self): el = Element("tag") el.set("key", "value") el.text = "test" childA = Element("childA") childB = Element("childB") el.append(childA) el.append(childB) upper = transform(str.upper) newelem: Element = validate( xml_element(tag=upper, text=upper, attrib={upper: upper}), el) assert newelem.tag == "TAG" assert newelem.text == "TEST" assert newelem.attrib == {"KEY": "VALUE"} assert list(newelem.iterchildren()) == [childA, childB] with self.assertRaises(ValueError) as cm: validate(xml_element(tag="invalid"), el) assert str(cm.exception).startswith("Unable to validate XML tag: ") with self.assertRaises(ValueError) as cm: validate(xml_element(text="invalid"), el) assert str(cm.exception).startswith("Unable to validate XML text: ") with self.assertRaises(ValueError) as cm: validate(xml_element(attrib={"key": "invalid"}), el) assert str( cm.exception).startswith("Unable to validate XML attributes: ")
def test_parse_xml_validate(self): expected = ET.Element("test", {"foo": "bar"}) actual = parse_xml(u"""<test foo="bar"/>""", schema=validate.Schema( xml_element(tag="test", attrib={"foo": text}))) self.assertEqual(expected.tag, actual.tag) self.assertEqual(expected.attrib, actual.attrib)
def test_parse_xml_entities(self): expected = ET.Element("test", {"foo": "bar &"}) actual = parse_xml(u"""<test foo="bar &"/>""", schema=validate.Schema(xml_element(tag="test", attrib={"foo": text})), invalid_char_entities=True) self.assertEqual(expected.tag, actual.tag) self.assertEqual(expected.attrib, actual.attrib)
def test_parse_xml_entities(self): expected = ET.Element("test", {"foo": "bar &"}) actual = parse_xml("""<test foo="bar &"/>""", schema=validate.Schema(xml_element(tag="test", attrib={"foo": text})), invalid_char_entities=True) self.assertEqual(expected.tag, actual.tag) self.assertEqual(expected.attrib, actual.attrib)
def test_failure_schema(self): with pytest.raises(validate.ValidationError) as cm: validate.validate(validate.xml_element(), "not-an-element") assert_validationerror( cm.value, """ ValidationError(Callable): iselement('not-an-element') is not true """)
class RTE(Plugin): VOD_API_URL = 'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id={0}' LIVE_API_URL = 'http://feeds.rasset.ie/livelistings/playlist' _url_re = re.compile( r'http://www\.rte\.ie/player/[a-z0-9]+/(?:show/[a-z-]+-[0-9]+/(?P<video_id>[0-9]+)|live/(?P<channel_id>[0-9]+))' ) _vod_api_schema = validate.Schema({ 'current_date': validate.text, 'shows': validate.Schema( list, validate.length(1), validate.get(0), validate.Schema({ 'valid_start': validate.text, 'valid_end': validate.text, 'media:group': validate.Schema( list, validate.length(1), validate.get(0), validate.Schema( { 'hls_server': validate.url(), 'hls_url': validate.text, 'hds_server': validate.url(), 'hds_url': validate.text, # API returns RTMP streams that don't seem to work, ignore them # 'url': validate.any( # validate.url(scheme="rtmp"), # validate.url(scheme="rtmpe") # ) }, validate.transform(lambda x: [ x['hls_server'] + x['hls_url'], x['hds_server'] + x['hds_url'] ])), ), }), ) }) _live_api_schema = validate.Schema( validate.xml_findall('.//{http://search.yahoo.com/mrss/}content'), [ validate.all(validate.xml_element(attrib={'url': validate.url()}), validate.get('url')) ]) _live_api_iphone_schema = validate.Schema( list, validate.length(1), validate.get(0), validate.Schema({'fullUrl': validate.any(validate.url(), 'none')}, validate.get('fullUrl'))) @classmethod def can_handle_url(cls, url): return RTE._url_re.match(url) def _get_streams(self): match = self._url_re.match(self.url) video_id = match.group('video_id') if video_id is not None: # VOD res = http.get(self.VOD_API_URL.format(video_id)) stream_data = http.json(res, schema=self._vod_api_schema) # Check whether video format is expired current_date = datetime.strptime(stream_data['current_date'], '%Y-%m-%dT%H:%M:%S.%f') valid_start = datetime.strptime( stream_data['shows']['valid_start'], '%Y-%m-%dT%H:%M:%S') valid_end = datetime.strptime(stream_data['shows']['valid_end'], '%Y-%m-%dT%H:%M:%S') if current_date < valid_start or current_date > valid_end: self.logger.error( 'Failed to access stream, may be due to expired content') return streams = stream_data['shows']['media:group'] else: # Live channel_id = match.group('channel_id') # Get live streams for desktop res = http.get(self.LIVE_API_URL, params={'channelid': channel_id}) streams = http.xml(res, schema=self._live_api_schema) # Get HLS streams for Iphone res = http.get(self.LIVE_API_URL, params={ 'channelid': channel_id, 'platform': 'iphone' }) stream = http.json(res, schema=self._live_api_iphone_schema) if stream != 'none': streams.append(stream) for stream in streams: if '.f4m' in stream: for s in HDSStream.parse_manifest(self.session, stream).items(): yield s if '.m3u8' in stream: for s in HLSStream.parse_variant_playlist( self.session, stream).items(): yield s
path=validate.endswith(".m3u8") ), }, None) }, validate.optional("playerUri"): validate.text, validate.optional("viewerPlusSwfUrl"): validate.url(scheme="http"), validate.optional("lsPlayerSwfUrl"): validate.text, validate.optional("hdPlayerSwfUrl"): validate.text }) _smil_schema = validate.Schema(validate.union({ "http_base": validate.all( validate.xml_find("{http://www.w3.org/2001/SMIL20/Language}head/" "{http://www.w3.org/2001/SMIL20/Language}meta" "[@name='httpBase']"), validate.xml_element(attrib={ "content": validate.text }), validate.get("content") ), "videos": validate.all( validate.xml_findall("{http://www.w3.org/2001/SMIL20/Language}body/" "{http://www.w3.org/2001/SMIL20/Language}switch/" "{http://www.w3.org/2001/SMIL20/Language}video"), [ validate.all( validate.xml_element(attrib={ "src": validate.text, "system-bitrate": validate.all( validate.text, validate.transform(int) )
class DeutscheWelle(Plugin): default_channel = "1" url_re = re.compile(r"https?://(?:www\.)?dw\.com/") channel_re = re.compile(r'''<a.*?data-id="(\d+)".*?class="ici"''') live_stream_div = re.compile( r''' <div\s+class="mediaItem"\s+data-channel-id="(\d+)".*?>.*? <input\s+type="hidden"\s+name="file_name"\s+value="(.*?)"\s*>.*?<div ''', re.DOTALL | re.VERBOSE) smil_api_url = "http://www.dw.com/smil/{}" html5_api_url = "http://www.dw.com/html5Resource/{}" vod_player_type_re = re.compile( r'<input type="hidden" name="player_type" value="(?P<stream_type>.+?)">' ) stream_vod_data_re = re.compile( r'<input\s+type="hidden"\s+name="file_name"\s+value="(?P<stream_url>.+?)">.*?' r'<input\s+type="hidden"\s+name="media_id"\s+value="(?P<stream_id>\d+)">', re.DOTALL) smil_schema = validate.Schema( validate.union({ "base": validate.all(validate.xml_find(".//meta"), validate.xml_element(attrib={"base": validate.text}), validate.get("base")), "streams": validate.all(validate.xml_findall(".//switch/*"), [ validate.all( validate.getattr("attrib"), { "src": validate.text, "system-bitrate": validate.all( validate.text, validate.transform(int), ), validate.optional("width"): validate.all(validate.text, validate.transform(int)) }) ]) })) @classmethod def can_handle_url(cls, url): return cls.url_re.match(url) is not None def _create_stream(self, url, quality=None): if url.startswith('rtmp://'): return (quality, RTMPStream(self.session, {'rtmp': url})) if url.endswith('.m3u8'): return HLSStream.parse_variant_playlist(self.session, url).items() return (quality, HTTPStream(self.session, url)) def _get_live_streams(self, page): # check if a different language has been selected qs = dict(parse_qsl(urlparse(self.url).query)) channel = qs.get("channel") if not channel: m = self.channel_re.search(page.text) channel = m and m.group(1) self.logger.debug("Using sub-channel ID: {0}", channel) # extract the streams from the page, mapping between channel-id and stream url media_items = self.live_stream_div.finditer(page.text) stream_map = dict([mi.groups((1, 2)) for mi in media_items]) stream_url = stream_map.get(str(channel) or self.default_channel) if stream_url: return self._create_stream(stream_url) def _get_vod_streams(self, stream_type, page): m = self.stream_vod_data_re.search(page.text) if m is None: return stream_url, stream_id = m.groups() if stream_type == "video": stream_api_id = "v-{}".format(stream_id) default_quality = "vod" elif stream_type == "audio": stream_api_id = "a-{}".format(stream_id) default_quality = "audio" else: return # Retrieve stream embedded in web page yield self._create_stream(stream_url, default_quality) # Retrieve streams using API res = self.session.http.get(self.smil_api_url.format(stream_api_id)) videos = self.session.http.xml(res, schema=self.smil_schema) for video in videos['streams']: url = videos["base"] + video["src"] if url == stream_url or url.replace("_dwdownload.", ".") == stream_url: continue if video["system-bitrate"] > 0: # If width is available, use it to select the best stream # amongst those with same bitrate quality = "{}k".format( (video["system-bitrate"] + video.get("width", 0)) // 1000) else: quality = default_quality yield self._create_stream(url, quality) def _get_streams(self): res = self.session.http.get(self.url) m = self.vod_player_type_re.search(res.text) if m is None: return stream_type = m.group("stream_type") if stream_type == "dwlivestream": return self._get_live_streams(res) return self._get_vod_streams(stream_type, res)
validate.optional("play_url"): validate.url(scheme="http"), validate.optional("m3u8_url"): validate.url( scheme="http", path=validate.endswith(".m3u8") ), }, None) }, validate.optional("playerUri"): validate.text }) _smil_schema = validate.Schema(validate.union({ "http_base": validate.all( validate.xml_find("{http://www.w3.org/2001/SMIL20/Language}head/" "{http://www.w3.org/2001/SMIL20/Language}meta" "[@name='httpBase']"), validate.xml_element(attrib={ "content": validate.text }), validate.get("content") ), "videos": validate.all( validate.xml_findall("{http://www.w3.org/2001/SMIL20/Language}body/" "{http://www.w3.org/2001/SMIL20/Language}switch/" "{http://www.w3.org/2001/SMIL20/Language}video"), [ validate.all( validate.xml_element(attrib={ "src": validate.text, "system-bitrate": validate.all( validate.text, validate.transform(int) )
class TestXmlElementSchema: upper = validate.transform(str.upper) @pytest.fixture(scope="function") def element(self): childA = Element("childA", {"a": "1"}) childB = Element("childB", {"b": "2"}) childC = Element("childC") childA.text = "childAtext" childA.tail = "childAtail" childB.text = "childBtext" childB.tail = "childBtail" childB.append(childC) parent = Element("parent", { "attrkey1": "attrval1", "attrkey2": "attrval2" }) parent.text = "parenttext" parent.tail = "parenttail" parent.append(childA) parent.append(childB) return parent @pytest.mark.parametrize( "schema, expected", [ ( validate.xml_element(), ("<parent attrkey1=\"attrval1\" attrkey2=\"attrval2\">" "parenttext" "<childA a=\"1\">childAtext</childA>" "childAtail" "<childB b=\"2\">childBtext<childC/></childB>" "childBtail" "</parent>" "parenttail"), ), ( validate.xml_element( tag=upper, attrib={upper: upper}, text=upper, tail=upper), ("<PARENT ATTRKEY1=\"ATTRVAL1\" ATTRKEY2=\"ATTRVAL2\">" "PARENTTEXT" "<childA a=\"1\">childAtext</childA>" "childAtail" "<childB b=\"2\">childBtext<childC/></childB>" "childBtail" "</PARENT>" "PARENTTAIL"), ), ], ids=[ "empty", "subschemas", ], ) def test_success(self, element, schema, expected): newelement = validate.validate(schema, element) assert etree_tostring(newelement).decode("utf-8") == expected assert newelement is not element assert newelement[0] is not element[0] assert newelement[1] is not element[1] assert newelement[1][0] is not element[1][0] @pytest.mark.parametrize("schema, error", [ ( validate.xml_element(tag="invalid"), """ ValidationError(XmlElementSchema): Unable to validate XML tag Context(equality): 'parent' does not equal 'invalid' """, ), ( validate.xml_element(attrib={"invalid": "invalid"}), """ ValidationError(XmlElementSchema): Unable to validate XML attributes Context(dict): Key 'invalid' not found in {'attrkey1': 'attrval1', 'attrkey2': 'attrval2'} """, ), ( validate.xml_element(text="invalid"), """ ValidationError(XmlElementSchema): Unable to validate XML text Context(equality): 'parenttext' does not equal 'invalid' """, ), ( validate.xml_element(tail="invalid"), """ ValidationError(XmlElementSchema): Unable to validate XML tail Context(equality): 'parenttail' does not equal 'invalid' """, ), ], ids=[ "tag", "attrib", "text", "tail", ]) def test_failure(self, element, schema, error): with pytest.raises(validate.ValidationError) as cm: validate.validate(schema, element) assert_validationerror(cm.value, error) def test_failure_schema(self): with pytest.raises(validate.ValidationError) as cm: validate.validate(validate.xml_element(), "not-an-element") assert_validationerror( cm.value, """ ValidationError(Callable): iselement('not-an-element') is not true """)