Python flatten Exemples, openrecipes.util.flatten Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : food_spider.py Projet : eleclerc/openrecipes

    def parse_item(self, response):
        # skip review pages, which are hard to distinguish from recipe pages
        # in the link extractor regex
        if response.url.endswith('/review'):
            return []

        hxs = HtmlXPathSelector(response)
        raw_recipes = parse_recipes(hxs, {'source': self.source})
        for recipe in raw_recipes:
            if 'photo' in recipe:
                recipe['photo'] = flatten(recipe['photo'])
            if 'image' in recipe:
                recipe['image'] = flatten(recipe['image'])

        return [RecipeItem.from_dict(recipe) for recipe in raw_recipes]

Exemple #2

0

Afficher le fichier

Fichier : food_spider.py Projet : jterskine/my_stuff

    def parse_item(self, response):
        # skip review pages, which are hard to distinguish from recipe pages
        # in the link extractor regex
        if response.url.endswith('/review'):
            return []

        hxs = HtmlXPathSelector(response)
        raw_recipes = parse_recipes(hxs, {'source': self.source})
        for recipe in raw_recipes:
            if 'photo' in recipe:
                recipe['photo'] = flatten(recipe['photo'])
            if 'image' in recipe:
                recipe['image'] = flatten(recipe['image'])

        return [RecipeItem.from_dict(recipe) for recipe in raw_recipes]

Exemple #3

0

Afficher le fichier

Fichier : foodnetwork_spider.py Projet : eleclerc/openrecipes

    def parse_item(self, response):
        # skip review pages, which are hard to distinguish from recipe pages
        # in the link extractor regex
        if '/reviews/' in response.url:
            return []

        hxs = HtmlXPathSelector(response)
        raw_recipes = parse_recipes(hxs, {'source': self.source, 'url': response.url})
        for recipe in raw_recipes:
            if 'photo' in recipe:
                recipe['photo'] = flatten(recipe['photo'])
                recipe['photo'] = recipe['photo'].replace('_med.', '_lg.')
            if 'image' in recipe:
                recipe['image'] = flatten(recipe['image'])
                recipe['image'] = recipe['image'].replace('_med.', '_lg.')

        return [RecipeItem.from_dict(recipe) for recipe in raw_recipes]

Exemple #4

0

Afficher le fichier

    def parse_item(self, response):
        # skip review pages, which are hard to distinguish from recipe pages
        # in the link extractor regex
        if '/reviews/' in response.url:
            return []

        hxs = HtmlXPathSelector(response)
        raw_recipes = parse_recipes(hxs, {'source': self.source, 'url': response.url})
        for recipe in raw_recipes:
            if 'photo' in recipe:
                recipe['photo'] = flatten(recipe['photo'])
                recipe['photo'] = recipe['photo'].replace('_med.', '_lg.')
            if 'image' in recipe:
                recipe['image'] = flatten(recipe['image'])
                recipe['image'] = recipe['image'].replace('_med.', '_lg.')

        return [RecipeItem.from_dict(recipe) for recipe in raw_recipes]

Exemple #5

0

Afficher le fichier

Fichier : tasteofhome_spider.py Projet : rkroll/openrecipes

  def parse_item(self, response):
    hxs = HtmlXPathSelector(response)
    raw_recipes = self.parse_recipes(hxs, {'source': self.source, 'url': response.url})
    for recipe in raw_recipes:
      if 'photo' in recipe:
        photo_url = flatten(recipe['photo'])
        if photo_url.startswith('//'):
          photo_url = 'http:' + photo_url
        recipe['photo'] = photo_url

      if 'image' in recipe:
        photo_url = flatten(recipe['image'])
        if photo_url.startswith('//'):
          photo_url = 'http:' + photo_url
        recipe['image'] = photo_url

    return [RecipeItem.from_dict(recipe) for recipe in raw_recipes]

Exemple #6

0

Afficher le fichier

    def parse_item(self, response):
        hxs = HtmlXPathSelector(response)
        raw_recipes = self.parse_recipes(hxs, {
            'source': self.source,
            'url': response.url
        })
        for recipe in raw_recipes:
            if 'photo' in recipe:
                photo_url = flatten(recipe['photo'])
                if photo_url.startswith('//'):
                    photo_url = 'http:' + photo_url
                recipe['photo'] = photo_url

            if 'image' in recipe:
                photo_url = flatten(recipe['image'])
                if photo_url.startswith('//'):
                    photo_url = 'http:' + photo_url
                recipe['image'] = photo_url

        return [RecipeItem.from_dict(recipe) for recipe in raw_recipes]