Exemple #1
0
from scraperwiki.utils import swimport
dbget=swimport('dbgetpy')

#Save
urls=["https://scraperwiki.com/scrapers/dbgetpy","https://scraperwiki.com"]
for url in urls:
  dbget.save_page(url)

#Retrieve
html=dbget.get_page("https://scraperwiki.com/scrapers/dbgetpy")
print htmlfrom scraperwiki.utils import swimport
dbget=swimport('dbgetpy')

#Save
urls=["https://scraperwiki.com/scrapers/dbgetpy","https://scraperwiki.com"]
for url in urls:
  dbget.save_page(url)

#Retrieve
html=dbget.get_page("https://scraperwiki.com/scrapers/dbgetpy")
print html
#!/usr/bin/env python2
from urllib2 import urlopen, build_opener,HTTPCookieProcessor
from lxml.html import fromstring
from httplib import BadStatusLine
from json import loads

try:
  from htmltable2matrix import htmltable2matrix
except ImportError:
  from scraperwiki.utils import swimport
  htmltable2matrix=swimport('htmltable2matrix').htmltable2matrix

from scraperwiki.sqlite import save,get_var,save_var

def main():
  if get_var('skip')==None:
    save_var('skip',0)
  routesTable=getroutes()
  for row in routesTable:
    if row['key'][0:2]!=row['key'][2:4]:
      get_route_schedules(row['id'],row['key'])

#------------------------------------------

def get_route_schedules(routeId,route):
  #Check that it's not a route within one city
  assert route[0:2]!=route[2:4]

  xml,theurl=grab(route)
  save(['routeId','url'],{
    "routeId":routeId
#Load Chainsaw
from scraperwiki.utils import swimport

chainsaw = swimport('chainsaw')

#Load other stuff
from urllib2 import urlopen
from lxml.html import fromstring

xml = fromstring(urlopen('http://scraperwiki.com').read())


def example_htmltable2matrix():
    table = xml.xpath('//table')[0]
    print chainsaw.htmltable2matrix(table)


def main():
    example_htmltable2matrix()


main()  #Load Chainsaw
from scraperwiki.utils import swimport

chainsaw = swimport('chainsaw')

#Load other stuff
from urllib2 import urlopen
from lxml.html import fromstring

xml = fromstring(urlopen('http://scraperwiki.com').read())
#Load Chainsaw
from scraperwiki.utils import swimport
chainsaw=swimport('chainsaw')

#Load other stuff
from urllib2 import urlopen
from lxml.html import fromstring
xml=fromstring(urlopen('http://scraperwiki.com').read())

def example_htmltable2matrix():
  table=xml.xpath('//table')[0]
  print chainsaw.htmltable2matrix(table)

def main():
  example_htmltable2matrix()

main()#Load Chainsaw
from scraperwiki.utils import swimport
chainsaw=swimport('chainsaw')

#Load other stuff
from urllib2 import urlopen
from lxml.html import fromstring
xml=fromstring(urlopen('http://scraperwiki.com').read())

def example_htmltable2matrix():
  table=xml.xpath('//table')[0]
  print chainsaw.htmltable2matrix(table)

def main():
  example_htmltable2matrix()
#!/usr/bin/env python2
from urllib2 import urlopen, build_opener, HTTPCookieProcessor
from lxml.html import fromstring
from httplib import BadStatusLine
from json import loads

try:
    from htmltable2matrix import htmltable2matrix
except ImportError:
    from scraperwiki.utils import swimport
    htmltable2matrix = swimport('htmltable2matrix').htmltable2matrix

from scraperwiki.sqlite import save, get_var, save_var


def main():
    if get_var('skip') == None:
        save_var('skip', 0)
    routesTable = getroutes()
    for row in routesTable:
        if row['key'][0:2] != row['key'][2:4]:
            get_route_schedules(row['id'], row['key'])


#------------------------------------------


def get_route_schedules(routeId, route):
    #Check that it's not a route within one city
    assert route[0:2] != route[2:4]