Python parse_recipes Beispiele

Programmiersprache: Python

Namespace / Paketname: openrecipes.schema_org_parser

Methode / Funktion: parse_recipes

Beispiele auf hotexamples.com: 7

Python parse_recipes - 7 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die openrecipes.schema_org_parser.parse_recipes, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

Datei anzeigen

    def parse_item(self, response):

        hxs = HtmlXPathSelector(response)
        image_path = hxs.select("descendant-or-self::img[@class and contains(@class, 'wp-image')][1]/@data-lazy-src").extract()

        raw_recipes = parse_recipes(hxs, {'source': self.source, 'url': response.url})
        if raw_recipes:
            # schema.org.  Yay!
            for recipe in raw_recipes:
                recipe['image'] = image_path

            return [RecipeItem.from_dict(recipe) for recipe in raw_recipes]
        else:
            # not schema.org.  Boo!
            il = RecipeItemLoader(item=RecipeItem())

            il.add_value('source', self.source)
            il.add_value('url', response.url)
            il.add_value('image', image_path)

            name_path = '//*[@class="post-title"]/h1/text()'
            il.add_value('name', hxs.select(name_path).extract())
            # maybe it's in the P's
            for p in hxs.select('//div[@id="recipe" or @class="span9"]/p'):
                if is_ingredient_container(p):
                    il.add_value('ingredients', p.select('text()').extract())
            # or maybe it's in the LI's
            for li in hxs.select('//*[@class="span9"]//ul/li'):
                if is_ingredient_container(li):
                    il.add_value('ingredients', li.select('text()').extract())
            # or maybe it's in these other LI's
            for li in hxs.select('//li[@class="ingredient"]/text()'):
                il.add_value('ingredients', li.extract())
            return il.load_item()

Beispiel #2

Datei anzeigen

Datei: marthastewart_sitemapspider.py Projekt: rkroll/openrecipes

    def parse_item(self, response):
        hxs = HtmlXPathSelector(response)
        raw_recipes = parse_recipes(hxs, {
            'source': self.source,
            'url': response.url
        })

        return [RecipeItem.from_dict(recipe) for recipe in raw_recipes]

Beispiel #3

Datei anzeigen

Datei: food_spider.py Projekt: eleclerc/openrecipes

    def parse_item(self, response):
        # skip review pages, which are hard to distinguish from recipe pages
        # in the link extractor regex
        if response.url.endswith('/review'):
            return []

        hxs = HtmlXPathSelector(response)
        raw_recipes = parse_recipes(hxs, {'source': self.source})
        for recipe in raw_recipes:
            if 'photo' in recipe:
                recipe['photo'] = flatten(recipe['photo'])
            if 'image' in recipe:
                recipe['image'] = flatten(recipe['image'])

        return [RecipeItem.from_dict(recipe) for recipe in raw_recipes]

Beispiel #4

Datei anzeigen

Datei: food_spider.py Projekt: jterskine/my_stuff

    def parse_item(self, response):
        # skip review pages, which are hard to distinguish from recipe pages
        # in the link extractor regex
        if response.url.endswith('/review'):
            return []

        hxs = HtmlXPathSelector(response)
        raw_recipes = parse_recipes(hxs, {'source': self.source})
        for recipe in raw_recipes:
            if 'photo' in recipe:
                recipe['photo'] = flatten(recipe['photo'])
            if 'image' in recipe:
                recipe['image'] = flatten(recipe['image'])

        return [RecipeItem.from_dict(recipe) for recipe in raw_recipes]

Beispiel #5

Datei anzeigen

Datei: foodnetwork_spider.py Projekt: eleclerc/openrecipes

    def parse_item(self, response):
        # skip review pages, which are hard to distinguish from recipe pages
        # in the link extractor regex
        if '/reviews/' in response.url:
            return []

        hxs = HtmlXPathSelector(response)
        raw_recipes = parse_recipes(hxs, {'source': self.source, 'url': response.url})
        for recipe in raw_recipes:
            if 'photo' in recipe:
                recipe['photo'] = flatten(recipe['photo'])
                recipe['photo'] = recipe['photo'].replace('_med.', '_lg.')
            if 'image' in recipe:
                recipe['image'] = flatten(recipe['image'])
                recipe['image'] = recipe['image'].replace('_med.', '_lg.')

        return [RecipeItem.from_dict(recipe) for recipe in raw_recipes]

Beispiel #6

Datei anzeigen

    def parse_item(self, response):
        # skip review pages, which are hard to distinguish from recipe pages
        # in the link extractor regex
        if '/reviews/' in response.url:
            return []

        hxs = HtmlXPathSelector(response)
        raw_recipes = parse_recipes(hxs, {'source': self.source, 'url': response.url})
        for recipe in raw_recipes:
            if 'photo' in recipe:
                recipe['photo'] = flatten(recipe['photo'])
                recipe['photo'] = recipe['photo'].replace('_med.', '_lg.')
            if 'image' in recipe:
                recipe['image'] = flatten(recipe['image'])
                recipe['image'] = recipe['image'].replace('_med.', '_lg.')

        return [RecipeItem.from_dict(recipe) for recipe in raw_recipes]

Beispiel #7

Datei anzeigen

Datei: marthastewart_spider.py Projekt: rkroll/openrecipes

    def parse_item(self, response):

      hxs = HtmlXPathSelector(response)
      raw_recipes = parse_recipes(hxs, {'source': self.source, 'url': response.url})

      return [RecipeItem.from_dict(recipe) for recipe in raw_recipes]