def process (name, date): newdate = date[8:10] + "_" + date[5:7] + "_" + date[0:4] url = r"http://www.lloydsbankinggroup.com/media/excel/2010/%s_historic_data.xls" % newdate print url url = r"http://www.lloydsbankinggroup.com/media/excel/2010/04_06_10_historic_data.xls" book = xlrd.open_workbook(file_contents=scrape(url)) sheet = book.sheet_by_name (name) months=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'] data = [] i = 1 while i < 500: try: month = sheet.cell_value (i, 0) year = sheet.cell_value (i, 1) level = sheet.cell_value (i, 2) except: break when= "%04d-%02d-01" % (int(year), months.index (month) + 1) i = i + 1 data.append (level) sqlite.save(unique_keys=["Date"], data={"Date":when, "Index":level}) chart = SimpleLineChart(500, 255, y_range=[0, 700]) chart.add_data (data) metadata.save("chart", chart.get_url())
def historic(): try: from scraperwiki import metadata hmark = metadata.get('hist_marker', default=(date.today() - timedelta(days=1)).strftime("%d/%m/%Y")) hmark = strpdate(hmark) if hmark < date(2005, 1, 1): print "History marker is %s (earlier than 2005), stopping." % hmark.strptime("%d/%m/%Y") fdate = hmark - timedelta(days=5) # XXX should retreive record and update only fields that arn't # present, or it will overwrite more recent data print "Historic: scraping %s to %s" % (fdate.strftime("%d/%m/%Y"), hmark.strftime("%d/%m/%Y")) for rec in (clean(r) for r in walk(fdate, hmark)): sw_save(rec) metadata.save('hist_marker', fdate.strftime('%d/%m/%Y')) except ImportError: print "no historic"
def historic(): try: from scraperwiki import metadata hmark = metadata.get('hist_marker', default=(date.today() - timedelta(days=1)).strftime("%d/%m/%Y")) hmark = strpdate(hmark) if hmark < date(2005, 1, 1): print "History marker is %s (earlier than 2005), stopping." % hmark.strptime( "%d/%m/%Y") fdate = hmark - timedelta(days=5) # XXX should retreive record and update only fields that arn't # present, or it will overwrite more recent data print "Historic: scraping %s to %s" % (fdate.strftime("%d/%m/%Y"), hmark.strftime("%d/%m/%Y")) for rec in (clean(r) for r in walk(fdate, hmark)): sw_save(rec) metadata.save('hist_marker', fdate.strftime('%d/%m/%Y')) except ImportError: print "no historic"
try: res2 = br.follow_link(text_regex='^next') break except mechanize.LinkNotFoundError: link_not_found = True break except URLError: # try again if attempt == 4: raise print 'retry, attempt:', attempt if link_not_found: break assert br.viewing_html() bad_towns = [] def do_search(base): error = False for l in string.lowercase: town = base + l try: read_town(town) except SearchErrors: error = True do_search(town) if not error and base != '': bad_towns.append(base) print "start" do_search('') metadata.save('bad_towns', bad_towns)
link_not_found = True break except URLError: # try again if attempt == 4: raise print 'retry, attempt:', attempt if link_not_found: break assert br.viewing_html() bad_towns = [] def do_search(base): error = False for l in string.lowercase: town = base + l try: read_town(town) except SearchErrors: error = True do_search(town) if not error and base != '': bad_towns.append(base) print "start" do_search('') metadata.save('bad_towns', bad_towns)
error = False for l in string.lowercase: town = base + l try: print "Trying %s" % town read_town(town) except SearchErrors: error = True do_search(town) if not error and base != "": bad_towns.append(base) print "start" do_search("") metadata.save("bad_towns", bad_towns) import mechanize, string, sys, re, os print "start" from lxml.html import parse, tostring, fromstring from scraperwiki import sqlite, metadata from scraperwiki.geo import gb_postcode_to_latlng re_time = re.compile("^\s*([\d ]\d[:.]\d\d)?\xa0\s*(\d\d[:.]\d\d)?\s*$") days = set(["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]) br = mechanize.Browser() br.addheaders = [("Accept", "text/html")] br.set_handle_robots(False) postcode = "/rmg/finder/po/POFindTheNearestFormHandler.value.postcode"
bad_towns = [] def do_search(base): error = False for l in string.lowercase: town = base + l try: read_town(town) except SearchErrors: error = True do_search(town) if not error and base != '': bad_towns.append(base) print "start" do_search('') metadata.save('bad_towns', bad_towns)import mechanize, string, sys, re, os print "start" from lxml.html import parse, tostring, fromstring from scraperwiki import metadata, sqlite from scraperwiki.geo import gb_postcode_to_latlng from time import sleep re_time = re.compile('^\s*([\d ]\d[:.]\d\d)?\xa0\s*(\d\d[:.]\d\d)?\s*$') days = set(["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]) br = mechanize.Browser() br.addheaders = [("Accept", "text/html"),] br.set_handle_robots(False) postcode = 'postcode_or_town' url = 'http://www.postoffice.co.uk/portal/po/finder'