コード例 #1
0
    def parse_item(self, response):

        hxs = HtmlXPathSelector(response)
        data = {"url": response.url, "source": self.source}
        recipe = RecipeItem.from_dict(parse_recipe(hxs, data))
        loader = RecipeItemLoader(item=recipe)
        loader.add_value("image", select_class(hxs, "post_image").select("@src").extract())
        loader.add_value("description", hxs.select('//meta[@name="description"]/@content').extract())
        loader.add_value("name", select_class(hxs, "entry-title").select("text()").extract())
        return [loader.load_item()]
コード例 #2
0
    def parse_item(self, response):

        hxs = HtmlXPathSelector(response)
        data = {'url': response.url, 'source': self.source}
        recipe = RecipeItem.from_dict(parse_recipe(hxs, data))
        loader = RecipeItemLoader(item=recipe)
        loader.add_value('image', select_class(hxs, 'post_image').select('@src').extract())
        loader.add_value('description', hxs.select('//meta[@name="description"]/@content').extract())
        loader.add_value('name', select_class(hxs, 'entry-title').select('text()').extract())
        return loader.load_item()
コード例 #3
0
    def parse_item(self, response):

        hxs = HtmlXPathSelector(response)
        data = {'url': response.url, 'source': self.source}
        recipe = RecipeItem.from_dict(parse_recipe(hxs, data))
        loader = RecipeItemLoader(item=recipe)
        loader.add_value(
            'image',
            select_class(hxs, 'post_image').select('@src').extract())
        loader.add_value(
            'description',
            hxs.select('//meta[@name="description"]/@content').extract())
        loader.add_value(
            'name',
            select_class(hxs, 'entry-title').select('text()').extract())
        return [loader.load_item()]
コード例 #4
0
ファイル: hrecipe_parser.py プロジェクト: jterskine/my_stuff
def parse_recipe(scope, data={}):
    root = select_class(scope, 'hrecipe')
    data['name'] = select_class(root, 'fn').select('.//text()').extract()
    data['yield'] = select_class(root, 'yield').select('.//text()').extract()
    data['published'] = select_class(root, 'published').select('.//text()').extract()
    data['description'] = select_class(root, 'summary').select('.//text()').extract()
    data['duration'] = select_class(root, 'duration').select('.//text()').extract()
    data['prepTime'] = select_class(root, 'preptime').select('.//text()').extract()
    data['cookTime'] = select_class(root, 'cooktime').select('.//text()').extract()
    data['ingredients'] = []
    for ingredient in select_class(root, 'ingredient'):
        data['ingredients'].append(''.join(ingredient.select('.//text()').extract()))
    return data