#! /usr/bin/python # -*- coding: utf-8 -*- from BeautifulSoup import BeautifulSoup import db_wrapper import scrape_mike as ssm url = "http://www.canadianwebradio.com/" soup = ssm.get_page(url) countries = soup.find("table", {"id" : "thetable"}) db_wrapper.connect() # Get every countries's page for row in countries.findAll('tr'): link = row.findAll('td')[0].findAll('a')[0] soup = ssm.get_page(url + link['href']) data = soup.find("table", {"id" : "thetable3"}) ssm.scrape( data, name_td_id=0, location_td_id=2, stream_td_id=4, categ_td_id=5, country="Canada") db_wrapper.disconnect()
#! /usr/bin/python # -*- coding: utf-8 -*- from BeautifulSoup import BeautifulSoup import db_wrapper import scrape_mike as ssm db_wrapper.connect() url = "http://www.usliveradio.com/" soup = ssm.get_page(url) data = soup.find("table", {"id": "thetable3"}) ssm.scrape(data, name_td_id=0, location_td_id=1, stream_td_id=4, categ_td_id=5, country="US") db_wrapper.disconnect()
"Uruguay", "Uzbekistan", "Vanuatu", "Vatican City", "Venezuela", "Vietnam", "Wallis-Futuna Islands", "Western Sahara", "Yemen", "Zambia", "Zimbabwe" ] db_wrapper.connect() soup = ssm.get_page("http://www.listenlive.eu/index.html") countries = soup.find("table", {"id" : "thetable"}) for row in countries.findAll('tr'): link = row.findAll('td')[0].findAll('a')[0] country_name = link.text found = False if "Vatican State" in country_name: country_name = "Vatican City" for cnt in COUNTRIES: if cnt in country_name: found = True if found == False: print "Country (%s) not found in list. Moving on." % country_name sys.exit(0)