def test_youtube_object_with_a_tag(self): """hxs_media_utils.extract_url_from_youtube_object() can extract the source url if provided as a sub <a> tag""" raw_html = """ <html> <body> <div id="youtube-media"> <object type="application/x-shockwave-flash" height="200" width="300" data="http://www.youtube.com/v/4tkHmGycfz4&amp;rel=0&amp;enablejsapi=1&amp;playerapiid=ytplayer&amp;fs=1" id="media-youtube-default-external-object-1"> <param name="movie" value="http://www.youtube.com/v/4tkHmGycfz4&amp;rel=0&amp;enablejsapi=1&amp;playerapiid=ytplayer&amp;fs=1"> <param name="allowScriptAccess" value="sameDomain"> <param name="quality" value="best"> <param name="allowFullScreen" value="true"> <param name="bgcolor" value="#FFFFFF"> <param name="scale" value="noScale"> <param name="salign" value="TL"> <param name="FlashVars" value="playerMode=embedded"> <param name="wmode" value="transparent"> <!-- Fallback content --> <a href="http://www.youtube.com/watch?v=4tkHmGycfz4"><img src="http://img.youtube.com/vi/4tkHmGycfz4/0.jpg" alt="See video" title="See video" height="200" width="300"></a> </object> </div> </body> </html> """ hxs = HtmlXPathSelector(text=raw_html) youtube_object = hxs.select("//div [@id='youtube-media']/object") expected_url = "http://www.youtube.com/watch?v=4tkHmGycfz4" url = hxs_media_utils.extract_url_from_youtube_object(youtube_object) eq_(expected_url, url)
def test_youtube_object_no_known_url(self): """hxs_media_utils.extract_url_from_youtube_object() raises ValueError if there is no known way to extract the URL""" raw_html = """ <html> <body> <div id="youtube-media"> <object type="application/x-shockwave-flash" height="200" width="300" > <param name="movie" value="http://www.youtube.com/v/4tkHmGycfz4&amp;rel=0&amp;enablejsapi=1&amp;playerapiid=ytplayer&amp;fs=1"> <param name="allowScriptAccess" value="sameDomain"> <param name="quality" value="best"> <param name="allowFullScreen" value="true"> <param name="bgcolor" value="#FFFFFF"> <param name="scale" value="noScale"> <param name="salign" value="TL"> <param name="FlashVars" value="playerMode=embedded"> <param name="wmode" value="transparent"> </div> </body> </html> """ hxs = HtmlXPathSelector(text=raw_html) youtube_object = hxs.select("//div [@id='youtube-media']/object") hxs_media_utils.extract_url_from_youtube_object(youtube_object)