def test_get_title_from_html(self): from softwarecenter.utils import get_title_from_html html = """ <html> <head> <title>Title & text</title> </head> <body> <h1>header1</h1> </body> </html>""" # get the title from the html self.assertEqual(get_title_from_html(html), "Title & text") # fallback to the first h1 if there is no title html = "<body><h1>foo ></h1><h1>bar</h1></body>" self.assertEqual(get_title_from_html(html), "foo >") # broken html = "<sadfsa>dsf" self.assertEqual(get_title_from_html(html), "") # not supported to have sub-html tags in the extractor html = "<body><h1>foo <emph>bar</emph></h1></body>" self.assertEqual(get_title_from_html(html), "") html = "<body><h1>foo <emph>bar</emph> x</h1><h2>some text</h2></body>" self.assertEqual(get_title_from_html(html), "")
def _on_exhibits_data_available(self, spawner, exhibits): for exhibit in exhibits: # special case, if there is no title provided by the server # just extract the title from the first "h1" html if not hasattr(exhibit, "title_translated"): if exhibit.html: from softwarecenter.utils import get_title_from_html exhibit.title_translated = get_title_from_html(exhibit.html) else: exhibit.title_translated = "" self.emit("exhibits", exhibits)
def _on_exhibits_data_available(self, spawner, exhibits): for exhibit in exhibits: # special case, if there is no title provided by the server # just extract the title from the first "h1" html if not hasattr(exhibit, "title_translated"): if exhibit.html: from softwarecenter.utils import get_title_from_html exhibit.title_translated = get_title_from_html( exhibit.html) else: exhibit.title_translated = "" # allow having urls to click on in a banner if not hasattr(exhibit, "click_url"): exhibit.click_url = "" # ensure to fix #1004417 if exhibit.package_names: exhibit.package_names = exhibit.package_names.strip() self.emit("exhibits", exhibits)