def test_search_alternate_links(self): feed_str = get_data_file("bbc_homepage.html") feed = feedparser.parse(feed_str) links = feedutil.search_alternate_links(feed) self.assertListEqual(links, [ "http://newsrss.bbc.co.uk/rss/newsonline_world_edition/" "front_page/rss.xml"]) feed_str = get_data_file("newsweek_homepage.html") feed = feedparser.parse(feed_str) links = feedutil.search_alternate_links(feed) self.assertListEqual(links, [ "http://feeds.newsweek.com/newsweek/TopNews"])
def test_search_link_malformed(self): """ SGML and others are not able to parse it """ feed_str = get_data_file("foxnews.com.html") links = feedutil.search_links_url("http://www.foxnews.com/", feed_str) self.assertListEqual(links, ['http://feeds.feedburner.com/foxnews/latest'] )
def test_serach_links_join_url(self): feed_str = get_data_file("chooseopera.html") links = feedutil.search_links_url( "http://my.opera.com/chooseopera/blog/", feed_str) self.assertListEqual(links, ['http://my.opera.com/chooseopera/xml/rss/blog/', 'http://my.opera.com/chooseopera/xml/atom/blog/'])
def test_search_links_order(self): """ Different order for type and href in link sentence. Format in UTF-8 (Russian)""" feed_str = get_data_file("mk.ru.html") links = feedutil.search_links_url("http://www.mk.ru/", feed_str) self.assertListEqual(links, ['http://www.mk.ru/rss/news/index.xml', 'http://www.mk.ru/rss/mk/index.xml'] )
def test_get_img(self): """ Check that find_post_content adds an image to the content if theres no img tag and is a media namespace """ feed_str = get_data_file("dailymotion.rss") feed = feedparser.parse(feed_str) elements = ("http://ak2.static.dailymotion.com/static/video/454/" "695/26596454:jpeg_preview_large.jpg?20101129171226", "320", "240") post = find_post_content(None, feed.entries[0]) for elem in elements: self.assertTrue(post.find(elem) != -1, elem)
def test_search_alternate_links_double_function(self): feed_str = get_data_file("smp.no.html") feed = feedparser.parse(feed_str) links = feedutil.search_alternate_links(feed) self.assertListEqual(links, [u'http://www.smp.no/?service=rss', u'http://www.smp.no/?service=rss&t=0', u'http://www.smp.no/nyheter/?service=rss', u'http://www.smp.no/kultur/?service=rss'] ) links = feedutil.search_links_url("http://www.smp.no/", feed_str) self.assertListEqual(links, [u'http://www.smp.no/?service=rss', u'http://www.smp.no/?service=rss&t=0', u'http://www.smp.no/nyheter/?service=rss', u'http://www.smp.no/kultur/?service=rss'] )
def test_search_links_mixed(self): """ Mixed atom and rss sentences """ feed_str = get_data_file("elcomercio.pe.html") links = feedutil.search_links_url( "http://www.elcomercio.pe/", feed_str) self.assertListEqual(links, ['http://www.elcomercio.pe/feed/portada.xml', 'http://www.elcomercio.pe/feed/portada/politica.xml', 'http://www.elcomercio.pe/feed/portada/lima.xml', 'http://www.elcomercio.pe/feed/portada/peru.xml', 'http://www.elcomercio.pe/feed/portada/mundo.xml', 'http://www.elcomercio.pe/feed/portada/economia.xml', 'http://www.elcomercio.pe/feed/portada/tecnologia.xml', 'http://www.elcomercio.pe/feed/portada/deportes.xml', 'http://www.elcomercio.pe/feed/portada/espectaculos.xml', 'http://www.elcomercio.pe/feed/portada/ecologia.xml', 'http://www.elcomercio.pe/feed/portada/opinion.xml'] )