Beispiel #1
0
	def extractItems(response):
		item = DliMetaItem()
		soup = BeautifulSoup.BeautifulSoup(response.body)
		table = soup.find('table', width='90%')
		rows = table.findAll('tr')
		for row in rows:
			cell = row.find('td')
			if not cell:
				continue
			anchorTag = data.find('a')
			if anchorTag:
				item.metadataLink = anchorTag.attrMap['href']
			metaText = data.findAll(text=True)
			item.pages = itemmetaText[1].split('.')[-2] # -1 is empty since there is a dot at the end.
			item.title = metaText[0]
			item.barcode = metaText[1].lstrip(', ')
			self.temp_items[item.barcode] = item