Beispiel #1
0
def main():
	table = Datasheet()
	tel = ''
	street = ''
	locality = ''
	title = ''
	for i in range(3):
		page = i+1
		url = 	URL("http://torino.paginegialle.it/pgol/4-veterinari/3-torino/p-%s?mr=50" % page)
		print "collecting from %s" % url
		connection = url.open()
		doc = Document( connection.read() )
		items = doc.by_class('item_sx')
		row = []
		for j, item in enumerate(items):
			divs = item.by_class('address')
			try:	
				title = item.by_class('item_head')[0].by_tag('a')[0].content
			except IndexError, e:
				print >> sys.stderr, "%s" % j, e
				pass
			for z, div in enumerate(divs):
				if div != None:
					try:
						street = div.by_class('street-address')[0].content
						locality = div.by_class('locality')[0].content
						tel = div.by_class('tel')[0].by_class('value')[0].content
					except IndexError, e:
						print >> sys.stderr, "%s" % z, e
						pass
					save = "%s, %s %s, %s \n" % ( plaintext(title), plaintext(street).replace(",", ""), plaintext(locality).replace('(TO)', ''), plaintext(tel).replace(",", "") )
					print >> sys.stderr, save
					row.append(save)
Beispiel #2
0
def main():
	table = Datasheet()

	url = 	URL("http://www.comuniecitta.it/torino/elenco-ospedali-di-torino.html")
	connection = url.open()
	doc = Document( connection.read() )
	items = doc.by_class('ulamm')[1:]
	row = []
	for ul in items:
		li = ul.by_tag('li')
		kind = plaintext(ul.previous.content)
		for el in li:
			if el != None:
				save = "%s, %s \n" % ( plaintext(el.content).replace('\n', ','), kind, )
				row.append(save)
	table.append( row )
		
	table.save("files/h_torino.txt")