Beispiel #1
0
	def extractItems(response):
		item = DliMetaItem()
		soup = BeautifulSoup.BeautifulSoup(response.body)
		table = soup.find('table', width='90%')
		rows = table.findAll('tr')
		for row in rows:
			cell = row.find('td')
			if not cell:
				continue
			anchorTag = data.find('a')
			if anchorTag:
				item.metadataLink = anchorTag.attrMap['href']
			metaText = data.findAll(text=True)
			item.pages = itemmetaText[1].split('.')[-2] # -1 is empty since there is a dot at the end.
			item.title = metaText[0]
			item.barcode = metaText[1].lstrip(', ')
			self.temp_items[item.barcode] = item
Beispiel #2
0
	def extractItems(self, response):
		# we keep this dictionary for partially filled items. 
		temp_items = {}
		item = DliMetaItem()
		soup = BeautifulSoup.BeautifulSoup(response.body)
		table = soup.find('table', width='90%')
		rows = table.findAll('tr')
		for row in rows:
			cell = row.find('td')
			if not cell:
				continue
			anchorTag = cell.find('a')
			if anchorTag and anchorTag.attrMap:
				item.metadataLink = anchorTag.attrMap['href']
			metaText = cell.findAll(text=True)
			item['pages'] = metaText[1].split('.')[-2].strip('\n \t') # -1 is empty since there is a dot at the end.
			item['title'] = metaText[0]
			item['barcode'] = metaText[1].lstrip(', ')
			self.temp_items[item['barcode']] = item
		return temp_items
Beispiel #3
0
 def extractItems(self, response):
     # we keep this dictionary for partially filled items.
     temp_items = {}
     item = DliMetaItem()
     soup = BeautifulSoup.BeautifulSoup(response.body)
     table = soup.find('table', width='90%')
     rows = table.findAll('tr')
     for row in rows:
         cell = row.find('td')
         if not cell:
             continue
         anchorTag = cell.find('a')
         if anchorTag and anchorTag.attrMap:
             item.metadataLink = anchorTag.attrMap['href']
         metaText = cell.findAll(text=True)
         item['pages'] = metaText[1].split('.')[-2].strip(
             '\n \t')  # -1 is empty since there is a dot at the end.
         item['title'] = metaText[0]
         item['barcode'] = metaText[1].lstrip(', ')
         self.temp_items[item['barcode']] = item
     return temp_items