Ejemplo n.º 1
0
def test_parsers():
    soup = bs.to_soup(ex.HTML_1, parser='html.parser')
    assert isinstance(soup, BeautifulSoup)
    #
    soup = bs.to_soup(ex.HTML_1, parser='lxml')
    assert isinstance(soup, BeautifulSoup)
    #
    soup = bs.to_soup(ex.HTML_1, parser='html5lib')
    assert isinstance(soup, BeautifulSoup)
Ejemplo n.º 2
0
def test_parsers():
    soup = bs.to_soup(ex.HTML_1, parser='html.parser')
    assert isinstance(soup, BeautifulSoup)
    #
    soup = bs.to_soup(ex.HTML_1, parser='lxml')
    assert isinstance(soup, BeautifulSoup)
    #
    soup = bs.to_soup(ex.HTML_1, parser='html5lib')
    assert isinstance(soup, BeautifulSoup)
Ejemplo n.º 3
0
def test_get_links():
    soup = bs.to_soup(ex.LINKS)
    links = bs.get_links(soup, 'http://retrogames.com')
    assert links == ['http://retrogames.com', 'http://retrogames.com/games/elite', 'http://retrogames.com/games/commando']
    #
    links = bs.get_links(soup)
    assert links == ['http://retrogames.com', '/games/elite', '/games/commando']
Ejemplo n.º 4
0
def test_make_links_absolute():
    soup = bs.to_soup(ex.LINKS)
    soup = bs.make_links_absolute(soup, 'http://retrogames.com')
    #
    links = bs.get_links(soup)
    assert links == ['http://retrogames.com', 'http://retrogames.com/games/elite', 'http://retrogames.com/games/commando']
    
Ejemplo n.º 5
0
def test_make_links_absolute():
    soup = bs.to_soup(ex.LINKS)
    soup = bs.make_links_absolute(soup, 'http://retrogames.com')
    #
    links = bs.get_links(soup)
    assert links == [
        'http://retrogames.com', 'http://retrogames.com/games/elite',
        'http://retrogames.com/games/commando'
    ]
Ejemplo n.º 6
0
def test_get_links():
    soup = bs.to_soup(ex.LINKS)
    links = bs.get_links(soup, 'http://retrogames.com')
    assert links == [
        'http://retrogames.com', 'http://retrogames.com/games/elite',
        'http://retrogames.com/games/commando'
    ]
    #
    links = bs.get_links(soup)
    assert links == [
        'http://retrogames.com', '/games/elite', '/games/commando'
    ]
Ejemplo n.º 7
0
def check():
    """Just an example how to use the BS4 library."""
    text = str(get_page(URL))
    soup = bs.to_soup(text, PARSER)
    book = soup.find('div', {'class' : 'module bookSmall'})
    link = book.find('a', href=True)
    print link['href']
    #
    book = soup.find('div', {'class' : 'module fullBook'})
    try:
        title = book.find('span', {'property': 'dc:title'}).text.lower()
    except:
        title = ""
    print title
    tabs = soup.find_all('div', {'class' : 'tabModules'})[-1]
    try:
        desc = tabs.find('p', {'class' : 'paragraph'}).text.lower()
    except:
        desc = ""
    print desc
Ejemplo n.º 8
0
def test_to_soup():
    soup = bs.to_soup(ex.HTML_1)
    assert isinstance(soup, BeautifulSoup)
    assert str(soup) == """
Ejemplo n.º 9
0
def test_prettify():
    soup = bs.to_soup(ex.UGLY)
    assert soup.prettify() == """<html>
Ejemplo n.º 10
0
def test_to_soup():
    soup = bs.to_soup(ex.HTML_1)
    assert isinstance(soup, BeautifulSoup)
    assert str(soup) == """
Ejemplo n.º 11
0
def test_prettify():
    soup = bs.to_soup(ex.UGLY)
    assert soup.prettify() == """<html>