try: from scraperwiki.utils import httpresponseheader from scraperwiki.sqlite import attach, select except ImportError: def httpresponseheader(a, b): pass from lxml.html import fromstring from urllib2 import urlopen from time import time attach('combine_mix_scraper_spreadsheets_1') httpresponseheader("Content-Type", "text/csv") httpresponseheader("Content-Disposition", "attachment; filename=combined_spreadsheets.csv") print select('spreadsheet from combined_spreadsheets where time = (select max(time) from combined_spreadsheets)')[0]['spreadsheet']try: from scraperwiki.utils import httpresponseheader from scraperwiki.sqlite import attach, select except ImportError: def httpresponseheader(a, b): pass from lxml.html import fromstring from urllib2 import urlopen from time import time attach('combine_mix_scraper_spreadsheets_1') httpresponseheader("Content-Type", "text/csv") httpresponseheader("Content-Disposition", "attachment; filename=combined_spreadsheets.csv") print select('spreadsheet from combined_spreadsheets where time = (select max(time) from combined_spreadsheets)')[0]['spreadsheet']
] } </pre> ''' address = parse_qs(os.environ.get('QUERY_STRING', '')) if address == {}: print DOCS else: try: from scraperwiki.utils import httpresponseheader except ImportError: print "Content-Type: application/json" print else: httpresponseheader('Content-Type', 'application/json') try: address = check_and_format(address) except ValueError, msg: print dumps({"status": str(msg)}) else: print do_request(address)''' Send a query string ''' import os from urllib2 import urlopen, URLError from urllib import urlencode from urlparse import parse_qs from lxml.html import fromstring
ax.yaxis.set_major_formatter(ff(sec2minsec)) plt.xlabel("Date of Parkrun") plt.ylabel("Time (min:sec)") ax.plot_date(x, winners, 'ro', label="Winning time") ax.errorbar(x, medians, yerr=spreads, label="Median time (and spread)") fig.autofmt_xdate() legend = plt.legend() format = "png" imagedata = StringIO() plt.savefig(imagedata, format=format, dpi=96) utils.httpresponseheader("Content-Type", "image/%s" % format) dumpMessage({"content": imagedata.getvalue().encode("base64"), "message_type": "console", "encoding":"base64"}) import datetime as dt import dateutil.parser from StringIO import StringIO import matplotlib.pyplot as plt import matplotlib.mlab as mlab import matplotlib as mpl from matplotlib.ticker import FuncFormatter as ff import numpy import scraperwiki from scraperwiki import dumpMessage, sqlite, utils
# This feed was validated by http://feedvalidator.org/ import scraperwiki from scraperwiki import utils from time import strptime, strftime, gmtime from cgi import escape # It's an RSS feed, so serve it as such utils.httpresponseheader("Content-Type", "application/rss+xml") #scraperwiki.dumpMessage({'message_type': 'httpresponseheader', 'headerkey': "Content-Type", 'headervalue': "application/rss+xml"}) sourcescraper = 'philadelphia_legislative_files' view = 'feed_of_philadelphia_legislative_files' print '<?xml version="1.0" encoding="UTF-8" ?>' print print ' <rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">' print ' <channel>' print ' <title>Philadelphia Legislative Documents</title>' print ' <description>RSS feed of new Philadelphia legislative documents. Search through the documents at http://legislation.phila.gov/. Do even more with the data at http://scraperwiki.com/scrapers/philadelphia_legislative_files/.</description>' print ' <link>http://scraperwiki.com/scrapers/%s/</link>' % sourcescraper #print ' <lastBuildDate>%s</lastBuildDate>\n' % strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) print ' <pubDate>%s</pubDate>' % strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) print ' <atom:link href="http://scraperwikiviews.com/run/%s/" rel="self" type="application/rss+xml" />' % view # rows scraperwiki.sqlite.attach('philadelphia_legislative_files') rows = scraperwiki.sqlite.select( '* from `philadelphia_legislative_files`.swdata')
ax = fig.add_subplot(111) ax.yaxis.set_major_formatter(ff(sec2minsec)) plt.xlabel("Date of Parkrun") plt.ylabel("Time (min:sec)") ax.plot_date(x, winners, 'ro', label="Winning time") ax.errorbar(x, medians, yerr=spreads, label="Median time (and spread)") fig.autofmt_xdate() legend = plt.legend() format = "png" imagedata = StringIO() plt.savefig(imagedata, format=format, dpi=96) utils.httpresponseheader("Content-Type", "image/%s" % format) dumpMessage({ "content": imagedata.getvalue().encode("base64"), "message_type": "console", "encoding": "base64" }) import datetime as dt import dateutil.parser from StringIO import StringIO import matplotlib.pyplot as plt import matplotlib.mlab as mlab import matplotlib as mpl from matplotlib.ticker import FuncFormatter as ff import numpy
from scraperwiki.utils import httpresponseheader import os slugs = ['mix_backup', 'mix_scraper_spreadsheets'] curls = ['curl --insecure https://scraperwiki.com/editor/raw/{0} > {0}.py 2> /dev/null'.format(slug) for slug in slugs] # Download and put in a directory os.system('rm -R mix_backup 2>/dev/null; mkdir mix_backup 2> /dev/null; cd mix_backup 2> /dev/null;' + ';'.join(curls)) # Tar and gzip os.system('tar czf mix_backup.tar.gz mix_backup 2> /dev/null') # Print httpresponseheader('Content-Type', 'application/x-gzip') print open('mix_backup.tar.gz').read()from scraperwiki.utils import httpresponseheader import os slugs = ['mix_backup', 'mix_scraper_spreadsheets'] curls = ['curl --insecure https://scraperwiki.com/editor/raw/{0} > {0}.py 2> /dev/null'.format(slug) for slug in slugs] # Download and put in a directory os.system('rm -R mix_backup 2>/dev/null; mkdir mix_backup 2> /dev/null; cd mix_backup 2> /dev/null;' + ';'.join(curls)) # Tar and gzip os.system('tar czf mix_backup.tar.gz mix_backup 2> /dev/null') # Print httpresponseheader('Content-Type', 'application/x-gzip') print open('mix_backup.tar.gz').read()
# This feed was validated by http://feedvalidator.org/ import scraperwiki from scraperwiki import utils from time import strptime, strftime, gmtime from cgi import escape # It's an RSS feed, so serve it as such utils.httpresponseheader("Content-Type", "application/rss+xml") #scraperwiki.dumpMessage({'message_type': 'httpresponseheader', 'headerkey': "Content-Type", 'headervalue': "application/rss+xml"}) sourcescraper = 'philadelphia_legislative_files' view = 'feed_of_philadelphia_legislative_files' print '<?xml version="1.0" encoding="UTF-8" ?>' print print ' <rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">' print ' <channel>' print ' <title>Philadelphia Legislative Documents</title>' print ' <description>RSS feed of new Philadelphia legislative documents. Search through the documents at http://legislation.phila.gov/. Do even more with the data at http://scraperwiki.com/scrapers/philadelphia_legislative_files/.</description>' print ' <link>http://scraperwiki.com/scrapers/%s/</link>' % sourcescraper #print ' <lastBuildDate>%s</lastBuildDate>\n' % strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) print ' <pubDate>%s</pubDate>' % strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) print ' <atom:link href="http://scraperwikiviews.com/run/%s/" rel="self" type="application/rss+xml" />' % view # rows scraperwiki.sqlite.attach('philadelphia_legislative_files') rows = scraperwiki.sqlite.select('* from `philadelphia_legislative_files`.swdata') # Since I did not store the dates as datetime fields in the database, I have to