def parse_item(self, response): hxs = HtmlXPathSelector(response) data = {"url": response.url, "source": self.source} recipe = RecipeItem.from_dict(parse_recipe(hxs, data)) loader = RecipeItemLoader(item=recipe) loader.add_value("image", select_class(hxs, "post_image").select("@src").extract()) loader.add_value("description", hxs.select('//meta[@name="description"]/@content').extract()) loader.add_value("name", select_class(hxs, "entry-title").select("text()").extract()) return [loader.load_item()]
def parse_item(self, response): hxs = HtmlXPathSelector(response) data = {'url': response.url, 'source': self.source} recipe = RecipeItem.from_dict(parse_recipe(hxs, data)) loader = RecipeItemLoader(item=recipe) loader.add_value('image', select_class(hxs, 'post_image').select('@src').extract()) loader.add_value('description', hxs.select('//meta[@name="description"]/@content').extract()) loader.add_value('name', select_class(hxs, 'entry-title').select('text()').extract()) return loader.load_item()
def parse_item(self, response): hxs = HtmlXPathSelector(response) data = {'url': response.url, 'source': self.source} recipe = RecipeItem.from_dict(parse_recipe(hxs, data)) loader = RecipeItemLoader(item=recipe) loader.add_value( 'image', select_class(hxs, 'post_image').select('@src').extract()) loader.add_value( 'description', hxs.select('//meta[@name="description"]/@content').extract()) loader.add_value( 'name', select_class(hxs, 'entry-title').select('text()').extract()) return [loader.load_item()]
def parse_recipe(scope, data={}): root = select_class(scope, 'hrecipe') data['name'] = select_class(root, 'fn').select('.//text()').extract() data['yield'] = select_class(root, 'yield').select('.//text()').extract() data['published'] = select_class(root, 'published').select('.//text()').extract() data['description'] = select_class(root, 'summary').select('.//text()').extract() data['duration'] = select_class(root, 'duration').select('.//text()').extract() data['prepTime'] = select_class(root, 'preptime').select('.//text()').extract() data['cookTime'] = select_class(root, 'cooktime').select('.//text()').extract() data['ingredients'] = [] for ingredient in select_class(root, 'ingredient'): data['ingredients'].append(''.join(ingredient.select('.//text()').extract())) return data