def test_parsers(): soup = bs.to_soup(ex.HTML_1, parser='html.parser') assert isinstance(soup, BeautifulSoup) # soup = bs.to_soup(ex.HTML_1, parser='lxml') assert isinstance(soup, BeautifulSoup) # soup = bs.to_soup(ex.HTML_1, parser='html5lib') assert isinstance(soup, BeautifulSoup)
def test_get_links(): soup = bs.to_soup(ex.LINKS) links = bs.get_links(soup, 'http://retrogames.com') assert links == ['http://retrogames.com', 'http://retrogames.com/games/elite', 'http://retrogames.com/games/commando'] # links = bs.get_links(soup) assert links == ['http://retrogames.com', '/games/elite', '/games/commando']
def test_make_links_absolute(): soup = bs.to_soup(ex.LINKS) soup = bs.make_links_absolute(soup, 'http://retrogames.com') # links = bs.get_links(soup) assert links == ['http://retrogames.com', 'http://retrogames.com/games/elite', 'http://retrogames.com/games/commando']
def test_make_links_absolute(): soup = bs.to_soup(ex.LINKS) soup = bs.make_links_absolute(soup, 'http://retrogames.com') # links = bs.get_links(soup) assert links == [ 'http://retrogames.com', 'http://retrogames.com/games/elite', 'http://retrogames.com/games/commando' ]
def test_get_links(): soup = bs.to_soup(ex.LINKS) links = bs.get_links(soup, 'http://retrogames.com') assert links == [ 'http://retrogames.com', 'http://retrogames.com/games/elite', 'http://retrogames.com/games/commando' ] # links = bs.get_links(soup) assert links == [ 'http://retrogames.com', '/games/elite', '/games/commando' ]
def check(): """Just an example how to use the BS4 library.""" text = str(get_page(URL)) soup = bs.to_soup(text, PARSER) book = soup.find('div', {'class' : 'module bookSmall'}) link = book.find('a', href=True) print link['href'] # book = soup.find('div', {'class' : 'module fullBook'}) try: title = book.find('span', {'property': 'dc:title'}).text.lower() except: title = "" print title tabs = soup.find_all('div', {'class' : 'tabModules'})[-1] try: desc = tabs.find('p', {'class' : 'paragraph'}).text.lower() except: desc = "" print desc
def test_to_soup(): soup = bs.to_soup(ex.HTML_1) assert isinstance(soup, BeautifulSoup) assert str(soup) == """
def test_prettify(): soup = bs.to_soup(ex.UGLY) assert soup.prettify() == """<html>