l.success()


def loop(xml):
    for item in xml:
        if item.name == 'folder':
            if item['description'] != 'UA List :: About':
                loop(item)
        elif item.name == 'useragent':
            uas.add(item['useragent'].strip())

with log.waitfor('Parsing list') as l:
    loop(soup.useragentswitcher)
    l.success()

with log.waitfor('Fetching from http://www.user-agents.org') as l:
    xml = getxml('http://www.user-agents.org/allagents.xml')
    soup = BeautifulSoup(xml)
    l.success()

with log.waitfor('Parsing list') as l:
    for item in soup.__getattr__('user-agents'):
        if item.name == 'user-agent':
            ua = item.select('string')[0].string.strip()
            uas.add(ua)
    l.success()

log.info('Fetched %d user agents' % len(uas))

write('useragents.txt', ''.join(sorted(ua + '\n' for ua in uas)))
Example #2
0

def loop(xml):
    for item in xml:
        if item.name == 'folder':
            if item['description'] != 'UA List :: About':
                loop(item)
        elif item.name == 'useragent':
            uas.add(item['useragent'].strip())


with log.waitfor('Parsing list') as l:
    loop(soup.useragentswitcher)
    l.success()

with log.waitfor('Fetching from http://www.user-agents.org') as l:
    xml = getxml('http://www.user-agents.org/allagents.xml')
    soup = BeautifulSoup(xml)
    l.success()

with log.waitfor('Parsing list') as l:
    for item in soup.__getattr__('user-agents'):
        if item.name == 'user-agent':
            ua = item.select('string')[0].string.strip()
            uas.add(ua)
    l.success()

log.info('Fetched %d user agents' % len(uas))

write('useragents.txt', ''.join(sorted(ua + '\n' for ua in uas)))
Example #3
0
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>

<p class="story">...</p>
"""

soup = BeautifulSoup(html_doc, "html.parser")

#print(soup.prettify())
for tag in soup.find_all(re.compile("^b")):
    print(tag.name)
for tag in soup.find_all(lambda x: x.name.startswith('b')):
    print(tag.name)

soup2 = BeautifulSoup("&ldquo;Dammit!&rdquo; he said.", "html5lib")
print(str(soup2))

print('title' in dir(soup))
print(hasattr(soup, 'title'))
print('title' in soup.__dict__)
print(soup.__getattr__('title'))

markup = "<h1>Sacr\xc3\xa9 bleu!</h1>"
soup = BeautifulSoup(markup, "html5lib")
print(soup.h1)
# <h1>Sacré bleu!</h1>
print(soup.h1.string)
# u'Sacr\xe9 bleu!'
print(soup.original_encoding)
print(soup.contains_replacement_characters)
print(soup.prettify("latin-1"))