from lxml.html import fromstring
from advanced_link_crawler import download

url = 'http://example.webscraping.com/places/default/view/Afghanistan-1'
html = download(url)

tree = fromstring(html)
img = tree.xpath('//tr[@id="places_national_flag__row"]/td[@class="w2p_fw"]//@src')[0]
print('http://example.webscraping.com' + img)


Esempio n. 2
0
import time
import re
from all_scrapers import re_scraper, bs_scraper, \
    lxml_scraper, lxml_xpath_scraper
from advanced_link_crawler import download

NUM_ITERATIONS = 1000  # number of times to test each scraper
html = download('http://example.webscraping.com/places/default/view/Singapore-203')

scrapers = [
    ('Regular expressions', re_scraper),
    ('BeautifulSoup', bs_scraper),
    ('Lxml', lxml_scraper),
    ('Xpath', lxml_xpath_scraper)]

for name, scraper in scrapers:
    # record start time of scrape
    start = time.time()
    for i in range(NUM_ITERATIONS):
        if scraper == re_scraper:
            re.purge()
        result = scraper(html)
        # check scraped result is as expected
        assert result['area'] == '692 square kilometres'
    # record end time of scrape and output the total
    end = time.time()
    print('%s: %.2f seconds' % (name, end - start))
Esempio n. 3
0
import time
import re
from all_scrapers import re_scraper, bs_scraper, \
    lxml_scraper, lxml_xpath_scraper
from advanced_link_crawler import download

NUM_ITERATIONS = 1000  # number of times to test each scraper
html = download(
    'http://example.webscraping.com/places/default/view/Afghanistan-1')

scrapers = [('Regular expressions', re_scraper), ('BeautifulSoup', bs_scraper),
            ('Lxml', lxml_scraper), ('Xpath', lxml_xpath_scraper)]

for name, scraper in scrapers:
    # record start time of scrape
    start = time.time()
    for i in range(NUM_ITERATIONS):
        if scraper == re_scraper:
            re.purge()
        result = scraper(html)
        # check scraped result is as expected
        assert result['area'] == '647,500 square kilometres'
    # record end time of scrape and output the total
    end = time.time()
    print('%s: %.2f seconds' % (name, end - start))
def fetch_youtube_url(watch_id):
    if watch_id == "": watch_id = '0uUoqD8a0V4'
    url = "https://www.youtube.com/watch?v=" + watch_id
    return download(url)