コード例 #1
0
sourcescraper = "city-of-ottawa-development-applications"

limit  = 15

print '<?xml version="1.0" encoding="UTF-8" ?>\n'
print ' <rss version="2.0">\n'
print '  <channel>\n'
print '    <title>City of Ottawa Development Applications</title>\n'
print '    <description>Unofficial RSS feed for City of Ottawa Development Applications</description>\n'
print '    <link>http://scraperwiki.com/scrapers/%s/</link>\n' % sourcescraper
print '    <lastBuildDate>%s</lastBuildDate>\n' % strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime())
print '    <pubDate>%s</pubDate>\n'  % strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime())
 
# rows
current = 0
for row in sorted(getData(sourcescraper, 0, 0), reverse=True, key=lambda row: strptime(row.get('Status_Date'), '%b %d, %Y')):
    current += 1
    if current >= limit:
        break

    title = row.get('Application_Number') + ' - ' + row.get('Primary_Address') + ' - ' + row.get('Review_Status')

    print '    <item>\n'
    print '      <title>%s</title>\n' % title
    print '      <description>%s</description>\n' % row.get('Description')
    print '      <link>%s</link>\n' % escape(row.get('Application_Link'))
    print '      <guid>%s</guid>\n' % row.get('Application_Number')
    print '      <pubDate>%s</pubDate>\n' %  strftime("%a, %d %b %Y %H:%M:%S +0000", strptime(row.get('Status_Date'), '%b %d, %Y'))
    print '    </item>\n'

print ' </channel>\n'
コード例 #2
0
limit = 100  #Number of records
offset = 0

keys = getKeys(sourcescraper)
#keys.sort()  # alphabetically

print '<h2>Some data from scraper: %s  (%d columns)</h2>' % (sourcescraper,
                                                             len(keys))
print '<table border="4" style="border-collapse:collapse;">'

print "<tr>",  #This scetion interprets column headings
for key in keys:
    print "<th>%s</th>" % key,
print "</tr>"

for row in getData(sourcescraper, limit, offset):  #This section interpret rows
    print "<tr>",
    for key in keys:
        print "<td>%s</td>" % row.get(key),
    print "</tr>"

print "</table>"

from scraperwiki.apiwrapper import getKeys, getData, getDataByDate, getDataByLocation

sourcescraper = "pl"  #Data table

limit = 100  #Number of records
offset = 0

keys = getKeys(sourcescraper)
sourcescraper = "dun-laoghaire-rathdown-county-council-planning-app"

keys = getKeys(sourcescraper)
keys.sort()  # alphabetically

print '<h2>Data from scraper: %s</h2>' % sourcescraper
print '<table border="1" style="border-collapse:collapse;">'

# column headings
print "<tr>",
for key in keys:
    print "<th>%s</th>" % key,
print "</tr>"

# rows
for row in getData(sourcescraper):
    print "<tr>",
    for key in keys:
        print "<td>%s</td>" % row.get(key),
    print "</tr>"
    
print "</table>"
#########################################
# Simple table of values from one scraper
#########################################
from scraperwiki.apiwrapper import getKeys, getData, getDataByDate, getDataByLocation

sourcescraper = "dun-laoghaire-rathdown-county-council-planning-app"

keys = getKeys(sourcescraper)
keys.sort()  # alphabetically
コード例 #4
0
#                                            USP - Disciplinas                                           #
#                                Informações coletadas do sistema JupiterWeb                             #
#                                                                                                        #
#                                                                                                        #
##########################################################################################################

sourcescraper = "usp-departamentos" #listagem de todos os departamentos da USP de unidades que possuem Comissão de Graduação

import scraperwiki
import re
from BeautifulSoup import BeautifulSoup
from scraperwiki.apiwrapper import getKeys, getData, getDataByDate, getDataByLocation
from operator import itemgetter, attrgetter

keys = getKeys(sourcescraper)
departamentos = getData(sourcescraper)
total_disciplinas = 0
for departamento in departamentos:
    #####################Coletando disciplinas de um departamento #####################
    codigoUnidade = departamento.get(keys[2])
    print codigoUnidade
    siglaDepartamento = departamento.get(keys[1])
    print siglaDepartamento
    nomeDepartamento = departamento.get(keys[3])
    ##link de busca
    starting_pagina_url = 'http://sistemas2.usp.br/jupiterweb/jupDisciplinaLista?codcg=' + codigoUnidade + '&pfxdisval=' + siglaDepartamento + '&tipo=D'
    ##pegando o html da página
    html_pagina = scraperwiki.scrape(starting_pagina_url)
    ##transformando num objeto do BeautifulSoup
    paginaSoup = BeautifulSoup(html_pagina)
    ##usar o BeautifulSoup para pegar a lista de docentes e seus códigos
コード例 #5
0
#########################################
# Simple table of values from one scraper
#########################################
from scraperwiki.apiwrapper import getKeys, getData, getDataByDate, getDataByLocation

sourcescraper = "environment-agency-river-level-monitors"

# a bigger limit will get a more representative sample.  
# or set a high offset to get a different sample
limit = 200  
offset = 0

m = list(getData(sourcescraper, limit, offset))

def ks(a):
    v = a.get('proportion')
    if not v:
        return 0.0
    return -float(v)

m.sort(key=ks)
for a in m[:200]:
    if not a.get('low'):
        continue
    low = float(a.get('low'))
    high = float(a.get('high'))
    level = float(a.get('level'))
    proportion= float(a.get('proportion'))
    letter = level >0.5 and "W" or "L"
    img = "http://www.ocean30.us/idx/prop-list.html?page=1" % (int(proportion*100))
コード例 #6
0

limit  = 15

print '<?xml version="1.0" encoding="UTF-8" ?>\n'
print ' <rss version="2.0">\n'
print '  <channel>\n'
print '    <title>National Policing Improvement Agency Press Releases</title>\n'
print '    <description>RSS feed for Press Releases from the National Policing Improvement Agency</description>\n'
print '    <link>http://scraperwiki.com/scrapers/%s/</link>\n' % sourcescraper
print '    <lastBuildDate>%s</lastBuildDate>\n' % strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime())
print '    <pubDate>%s</pubDate>\n'  % strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime())
 
# rows
current = 0
for row in sorted(getData(sourcescraper, 0, 0), reverse=True, key=lambda row: strptime(row.get('date'), '%Y-%m-%d')):
    current += 1
    if current >= limit:
        break

    print '    <item>\n'
    print '      <title>%s</title>\n' % row.get('title')
    print '      <link>%s</link>\n' % escape(row.get('url'))
    print '      <pubDate>%s</pubDate>\n' %  strftime("%a, %d %b %Y %H:%M:%S +0000", strptime(row.get('date'), '%Y-%m-%d'))
    print '    </item>\n'

print ' </channel>\n'
print '</rss> \n'
# Here we're going to build an RSS feed from the NPIA press release data
# this view is based on the Lincoln Council Committee RSS view
from scraperwiki.apiwrapper import getKeys, getData, getDataByDate, getDataByLocation
コード例 #7
0
from scraperwiki.apiwrapper import getKeys, getData, getDataByDate, getDataByLocation
import urllib
import csv
import datetime

days4 = datetime.date.today() - datetime.timedelta(2)
days8 = datetime.date.today() - datetime.timedelta(4)

sourcescraper = "takeover-panel-info"

# get from the primary data
limit = 90
offset = 0
rows = list(getData(sourcescraper, limit, offset))

# join the human generated data from the spreadsheet

# couldn't download from google directly!!!  so had to cache the result
#url = "http://spreadsheets.google.com/feeds/download/spreadsheets/Export?key=0Aju6C2vCJrTNdEJkRkNGR1V4ZHJzWHRGR2x6cmEzN1E&exportFormat=csv"
url = 'http://seagrass.goatchurch.org.uk/~julian/takeoverpanel.csv'
url = 'https://spreadsheets.google.com/pub?key=0Aju6C2vCJrTNdEJkRkNGR1V4ZHJzWHRGR2x6cmEzN1E&output=csv'
fin = urllib.urlopen(url)

hdata = {}
lines = fin.readlines()
clist = list(csv.reader(lines))
headers = clist.pop(0)
for row in clist:
    hrow = dict(zip(headers, row))
    stockcode = hrow.get('OffereeStockCode')
    hrow['articles'] = []
offset = 0

keys = getKeys(sourcescraper)
keys.sort()  # alphabetically

print '<h2>Some data from scraper: %s  (%d columns)</h2>' % (sourcescraper, len(keys))
print '<table border="1" style="border-collapse:collapse;">'

# column headings
print "<tr>",
for key in keys:
    print "<th>%s</th>" % key,
print "</tr>"

# rows
for row in getData(sourcescraper, limit, offset):
    print "<tr>",
    for key in keys:
        print "<td>%s</td>" % row.get(key),
    print "</tr>"
    
print "</table>"
#########################################
# Simple table of values from one scraper
#########################################
from scraperwiki.apiwrapper import getKeys, getData, getDataByDate, getDataByLocation

sourcescraper = "cost_of_gov_websites"

limit = 20
offset = 0
コード例 #9
0
sourcescraper = "dun-laoghaire-rathdown-county-council-planning-app"

keys = getKeys(sourcescraper)
keys.sort()  # alphabetically

print '<h2>Data from scraper: %s</h2>' % sourcescraper
print '<table border="1" style="border-collapse:collapse;">'

# column headings
print "<tr>",
for key in keys:
    print "<th>%s</th>" % key,
print "</tr>"

# rows
for row in getData(sourcescraper):
    print "<tr>",
    for key in keys:
        print "<td>%s</td>" % row.get(key),
    print "</tr>"

print "</table>"
#########################################
# Simple table of values from one scraper
#########################################
from scraperwiki.apiwrapper import getKeys, getData, getDataByDate, getDataByLocation

sourcescraper = "dun-laoghaire-rathdown-county-council-planning-app"

keys = getKeys(sourcescraper)
keys.sort()  # alphabetically
コード例 #10
0
limit = 100#Number of records
offset = 0

keys = getKeys(sourcescraper)
#keys.sort()  # alphabetically

print '<h2>Some data from scraper: %s  (%d columns)</h2>' % (sourcescraper, len(keys))
print '<table border="4" style="border-collapse:collapse;">'

print "<tr>",#This scetion interprets column headings
for key in keys:
    print "<th>%s</th>" % key,
print "</tr>"


for row in getData(sourcescraper, limit, offset):#This section interpret rows
    print "<tr>",
    for key in keys:
        print "<td>%s</td>" % row.get(key),
    print "</tr>"
    
print "</table>"

from scraperwiki.apiwrapper import getKeys, getData, getDataByDate, getDataByLocation

sourcescraper = "pl" #Data table

limit = 100#Number of records
offset = 0

keys = getKeys(sourcescraper)
コード例 #11
0
#                                            USP - Disciplinas                                           #
#                                Informações coletadas do sistema JupiterWeb                             #
#                                                                                                        #
#                                                                                                        #
##########################################################################################################

sourcescraper = "usp-departamentos"  #listagem de todos os departamentos da USP de unidades que possuem Comissão de Graduação

import scraperwiki
import re
from BeautifulSoup import BeautifulSoup
from scraperwiki.apiwrapper import getKeys, getData, getDataByDate, getDataByLocation
from operator import itemgetter, attrgetter

keys = getKeys(sourcescraper)
departamentos = getData(sourcescraper)
total_disciplinas = 0
for departamento in departamentos:
    #####################Coletando disciplinas de um departamento #####################
    codigoUnidade = departamento.get(keys[2])
    print codigoUnidade
    siglaDepartamento = departamento.get(keys[1])
    print siglaDepartamento
    nomeDepartamento = departamento.get(keys[3])
    ##link de busca
    starting_pagina_url = 'http://sistemas2.usp.br/jupiterweb/jupDisciplinaLista?codcg=' + codigoUnidade + '&pfxdisval=' + siglaDepartamento + '&tipo=D'
    ##pegando o html da página
    html_pagina = scraperwiki.scrape(starting_pagina_url)
    ##transformando num objeto do BeautifulSoup
    paginaSoup = BeautifulSoup(html_pagina)
    ##usar o BeautifulSoup para pegar a lista de docentes e seus códigos
コード例 #12
0
#########################################
# Simple table of values from one scraper
#########################################
from scraperwiki.apiwrapper import getKeys, getData, getDataByDate, getDataByLocation

sourcescraper = "environment-agency-river-level-monitors"

# a bigger limit will get a more representative sample.
# or set a high offset to get a different sample
limit = 200
offset = 0

m = list(getData(sourcescraper, limit, offset))


def ks(a):
    v = a.get('proportion')
    if not v:
        return 0.0
    return -float(v)


m.sort(key=ks)
for a in m[:200]:
    if not a.get('low'):
        continue
    low = float(a.get('low'))
    high = float(a.get('high'))
    level = float(a.get('level'))
    proportion = float(a.get('proportion'))
    letter = level > 0.5 and "W" or "L"
sourcescraper = "lisburn-city-council-minutes"

limit = 15

print '<?xml version="1.0" encoding="UTF-8" ?>\n'
print ' <rss version="2.0">\n'
print "  <channel>\n"
print "    <title>Lisburn City Council minutes</title>\n"
print "    <description>RSS feed for the latest public minutes published by Lisburn City Council</description>\n"
print "    <link>http://scraperwiki.com/scrapers/%s/</link>\n" % sourcescraper
print "    <lastBuildDate>%s</lastBuildDate>\n" % strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime())
print "    <pubDate>%s</pubDate>\n" % strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime())

# rows
current = 0
for row in sorted(getData(sourcescraper, 0, 0), reverse=True, key=lambda row: strptime(row.get("Date"), "%d %B %Y")):
    current += 1
    if current >= limit:
        break

    print "    <item>\n"
    print "      <title>%s</title>\n" % row.get("Title")
    print "      <description>%s</description>\n" % row.get("Committee")
    print "      <link>%s</link>\n" % escape(row.get("Link"))
    print "      <pubDate>%s</pubDate>\n" % strftime(
        "%a, %d %b %Y %H:%M:%S +0000", strptime(row.get("Date"), "%d %B %Y")
    )
    print "    </item>\n"

print " </channel>\n"
print "</rss> \n"  # Creating an RSS feed from the 'Lisburn City Council minutes' scraper
コード例 #14
0
limit = 200
offset = 0

cin = { }
cco = { }

memberkeys = [ 'advisors', 'banks', 'contractor', 'members', 'private_advisors', 'private_contractors' ]

pfis = [ ]
memberquery = os.getenv('URLQUERY')
if memberquery:
    memberquery = urllib.unquote_plus(memberquery[2:])
print memberquery


for i, row in enumerate(getData(sourcescraper, limit, offset)):

    if i <= 1:
        print row

    members = set()
    for k in memberkeys:
        try:
            members = members.union(eval(row.get(k, '[]')))
        except SyntaxError:
            pass
        except NameError:
            pass

    value = float(row.get('Capital_Value', 0))
    members = sorted(members)
コード例 #15
0
limit = 15

print '<?xml version="1.0" encoding="UTF-8" ?>\n'
print ' <rss version="2.0">\n'
print '  <channel>\n'
print '    <title>City of Lincoln Council Committee Updates</title>\n'
print '    <description>RSS feed for Monthly Committee Updates from the City of Lincoln Council</description>\n'
print '    <link>http://scraperwiki.com/scrapers/%s/</link>\n' % sourcescraper
print '    <lastBuildDate>%s</lastBuildDate>\n' % strftime(
    "%a, %d %b %Y %H:%M:%S +0000", gmtime())
print '    <pubDate>%s</pubDate>\n' % strftime("%a, %d %b %Y %H:%M:%S +0000",
                                               gmtime())

# rows
current = 0
for row in sorted(getData(sourcescraper, 0, 0),
                  reverse=True,
                  key=lambda row: strptime(row.get('Date'), '%d %B %Y')):
    current += 1
    if current >= limit:
        break

    print '    <item>\n'
    print '      <title>%s</title>\n' % row.get('Title')
    print '      <description>%s</description>\n' % row.get('Event')
    print '      <link>%s</link>\n' % escape(row.get('Link'))
    print '      <pubDate>%s</pubDate>\n' % strftime(
        "%a, %d %b %Y %H:%M:%S +0000", strptime(row.get('Date'), '%d %B %Y'))
    print '    </item>\n'

print ' </channel>\n'
print '</br>'
print '<tr><font color=red face="verdana" size=5>POINTS TABLE</font></th></tr>' 
print '</br>'
print '</br>'
print '<a href="http://scraperwiki.com/views/jan11-match-result-premier-league/full/">January 2011 Results</a>'
print '</br>'
print '</br>'

print '<table border="5" bgcolor ="CCCCFF" cellpadding="15" style="border-collapse:collapse;">'

print "<tr>",#column headings section
for key in keys:
    print "<th><bgcolor=003366>%s</th>" % key,
print "</tr>"

for row in getData(sourcescraper, limit, offset):#row headings section
    print "<tr>",
    for key in keys:
        print "<td>%s</td>" % row.get(key),
        print "**"
    print "</tr>"
print "</table>"

print '<h4>Result from scraper: %s</h4>' % (sourcescraper)
print '<h5>Creator:1019053-Yomal A. Mudalige</h5>'
##################
#Note  from creator(Yomal Mudalige)- I have been tried to change column orders, however still it is not finalized. Hence I added prefix 'A', 'B'.....etc. Thanks 
#Reference for scraper- Scraperwiki Tutorial 3 and Python power!: the comprehensive guide By Matt Telles,pp.333.
##################

from scraperwiki.apiwrapper import getKeys, getData, getDataByDate, getDataByLocation 
print '</br>'
print '<tr><font color=red face="verdana" size=5>POINTS TABLE</font></th></tr>'
print '</br>'
print '</br>'
print '<a href="http://scraperwiki.com/views/jan11-match-result-premier-league/full/">January 2011 Results</a>'
print '</br>'
print '</br>'

print '<table border="5" bgcolor ="CCCCFF" cellpadding="15" style="border-collapse:collapse;">'

print "<tr>",  #column headings section
for key in keys:
    print "<th><bgcolor=003366>%s</th>" % key,
print "</tr>"

for row in getData(sourcescraper, limit, offset):  #row headings section
    print "<tr>",
    for key in keys:
        print "<td>%s</td>" % row.get(key),
        print "**"
    print "</tr>"
print "</table>"

print '<h4>Result from scraper: %s</h4>' % (sourcescraper)
print '<h5>Creator:1019053-Yomal A. Mudalige</h5>'
##################
#Note  from creator(Yomal Mudalige)- I have been tried to change column orders, however still it is not finalized. Hence I added prefix 'A', 'B'.....etc. Thanks
#Reference for scraper- Scraperwiki Tutorial 3 and Python power!: the comprehensive guide By Matt Telles,pp.333.
##################

from scraperwiki.apiwrapper import getKeys, getData, getDataByDate, getDataByLocation
コード例 #18
0
#########################################
# Simple table of values from one scraper
#########################################
from scraperwiki.apiwrapper import getKeys, getData, getDataByDate, getDataByLocation
import re

sourcescraper = "swale-democratic-services-events-diary"

limit = 30
offset = 0

keys = getKeys(sourcescraper)
data = getData(sourcescraper, 1, offset)

#res
ure = re.compile(".*(http.*uid=[0-9]*)", re.DOTALL)
nre = re.compile(".*strong.(.*strong)", re.DOTALL)
allre = re.compile(".*", re.DOTALL)

#meta
for row in getData(sourcescraper, limit, offset):
    updated = "%s%s" % (row.get('datetime'), "Z")
    break

print """<?xml version="1.0" encoding="utf-8"?>

<feed xmlns='http://www.w3.org/2005/Atom'>
    <title type='text'>Swale Democratic Services Calendar</title>
    <subtitle type='html'>Open Swale</subtitle>
    <updated>%s</updated>
    <id>http://scraperwikiviews.com/run/feed/?</id>
コード例 #19
0
from scraperwiki.apiwrapper import getKeys, getData, getDataByDate, getDataByLocation
import urllib
import csv
import datetime

days4 = datetime.date.today() - datetime.timedelta(2)
days8 = datetime.date.today() - datetime.timedelta(4)

sourcescraper = "takeover-panel-info"

# get from the primary data
limit = 90
offset = 0
rows = list(getData(sourcescraper, limit, offset))


# join the human generated data from the spreadsheet

# couldn't download from google directly!!!  so had to cache the result
#url = "http://spreadsheets.google.com/feeds/download/spreadsheets/Export?key=0Aju6C2vCJrTNdEJkRkNGR1V4ZHJzWHRGR2x6cmEzN1E&exportFormat=csv"
url = 'http://seagrass.goatchurch.org.uk/~julian/takeoverpanel.csv'
url = 'https://spreadsheets.google.com/pub?key=0Aju6C2vCJrTNdEJkRkNGR1V4ZHJzWHRGR2x6cmEzN1E&output=csv'
fin = urllib.urlopen(url)

hdata = { }
lines = fin.readlines()
clist = list(csv.reader(lines))
headers = clist.pop(0)
for row in clist:
    hrow = dict(zip(headers, row))
    stockcode = hrow.get('OffereeStockCode')