Example #1
0
    def iter_recipes(self):
        for div in self.parser.select(self.document.getroot(),
                                      'div.m_search_result'):
            tds = self.parser.select(div, 'td')
            if len(tds) == 2:
                title = NotAvailable
                thumbnail_url = NotAvailable
                short_description = NotAvailable
                imgs = self.parser.select(tds[0], 'img')
                if len(imgs) > 0:
                    thumbnail_url = unicode(imgs[0].attrib.get('src', ''))
                link = self.parser.select(tds[1],
                                          'div.m_search_titre_recette a', 1)
                title = unicode(link.text)
                id = link.attrib.get('href', '').replace('.aspx', '').replace(
                    '/recettes/recette_', '')
                short_description = unicode(' '.join(
                    self.parser.select(tds[1], 'div.m_search_result_part4',
                                       1).text.strip().split('\n')))

                recipe = Recipe(id, title)
                recipe.thumbnail_url = thumbnail_url
                recipe.short_description = short_description
                recipe.instructions = NotLoaded
                recipe.author = NotLoaded
                recipe.ingredients = NotLoaded
                recipe.nb_person = NotLoaded
                recipe.cooking_time = NotLoaded
                recipe.preparation_time = NotLoaded
                yield recipe
Example #2
0
    def iter_recipes(self):
        for div in self.parser.select(self.document.getroot(), 'div.m_search_result'):
            tds = self.parser.select(div, 'td')
            if len(tds) == 2:
                title = NotAvailable
                thumbnail_url = NotAvailable
                short_description = NotAvailable
                imgs = self.parser.select(tds[0], 'img')
                if len(imgs) > 0:
                    thumbnail_url = unicode(imgs[0].attrib.get('src', ''))
                link = self.parser.select(tds[1], 'div.m_search_titre_recette a', 1)
                title = unicode(link.text)
                id = link.attrib.get('href', '').replace('.aspx', '').replace('/recettes/recette_', '')
                short_description = unicode(' '.join(self.parser.select(tds[
                                            1], 'div.m_search_result_part4', 1).text.strip().split('\n')))

                recipe = Recipe(id, title)
                recipe.thumbnail_url = thumbnail_url
                recipe.short_description = short_description
                recipe.instructions = NotLoaded
                recipe.author = NotLoaded
                recipe.ingredients = NotLoaded
                recipe.nb_person = NotLoaded
                recipe.cooking_time = NotLoaded
                recipe.preparation_time = NotLoaded
                yield recipe
Example #3
0
    def iter_recipes(self):
        for div in self.parser.select(self.document.getroot(),
                                      'div.recipe-info'):
            thumbnail_url = NotAvailable
            short_description = NotAvailable
            imgs = self.parser.select(div.getparent(), 'img')
            if len(imgs) > 0:
                url = unicode(imgs[0].attrib.get('src', ''))
                if url.startswith('http://'):
                    thumbnail_url = url

            link = self.parser.select(div, 'a.title', 1)
            title = unicode(link.text)
            id = unicode(link.attrib.get('href', '').split('/')[2])

            recipe = Recipe(id, title)
            recipe.thumbnail_url = thumbnail_url
            recipe.short_description = short_description
            recipe.instructions = NotLoaded
            recipe.ingredients = NotLoaded
            recipe.nb_person = NotLoaded
            recipe.cooking_time = NotLoaded
            recipe.preparation_time = NotLoaded
            recipe.author = NotLoaded
            yield recipe
Example #4
0
    def iter_recipes(self):
        for div in self.parser.select(self.document.getroot(), 'div.result-recipe'):
            thumbnail_url = NotAvailable
            short_description = NotAvailable
            imgs = self.parser.select(div, 'a.pull-image-left img')
            if len(imgs) > 0:
                url = unicode(imgs[0].attrib.get('src', ''))
                if url.startswith('http://'):
                    thumbnail_url = url

            link = self.parser.select(div, 'div.result-text a', 1)
            title = unicode(link.text)
            id = unicode(link.attrib.get('href', '').split('/')[2])

            txt = self.parser.select(div, 'div.result-text p', 1)
            short_description = unicode(txt.text_content())

            recipe = Recipe(id, title)
            recipe.thumbnail_url = thumbnail_url
            recipe.short_description = short_description
            recipe.instructions = NotLoaded
            recipe.ingredients = NotLoaded
            recipe.nb_person = NotLoaded
            recipe.cooking_time = NotLoaded
            recipe.preparation_time = NotLoaded
            recipe.author = NotLoaded
            yield recipe
Example #5
0
    def iter_recipes(self):
        for div in self.parser.select(self.document.getroot(), 'div.recette_description > div.data'):
            links = self.parser.select(div, 'div.info > p.title > a.fn')
            if len(links) > 0:
                link = links[0]
                title = unicode(link.text)
                # id = unicode(link.attrib.get('href','').strip('/').replace('.htm','htm'))
                id = unicode(self.parser.select(div, 'div.carnet-add a', 1).attrib.get('href', '').split('=')[-1])
                thumbnail_url = NotAvailable
                short_description = NotAvailable

                imgs = self.parser.select(div, 'img.recipe-image')
                if len(imgs) > 0:
                    thumbnail_url = unicode(imgs[0].attrib.get('src', ''))
                short_description = unicode(' '.join(self.parser.select(
                    div, 'div.infos_column', 1).text_content().split()).strip())
                imgs_cost = self.parser.select(div, 'div.infos_column img')
                cost_tot = len(imgs_cost)
                cost_on = 0
                for img in imgs_cost:
                    if img.attrib.get('src', '').endswith('euro_on.png'):
                        cost_on += 1
                short_description += u' %s/%s' % (cost_on, cost_tot)

                recipe = Recipe(id, title)
                recipe.thumbnail_url = thumbnail_url
                recipe.short_description = short_description
                recipe.instructions = NotLoaded
                recipe.ingredients = NotLoaded
                recipe.nb_person = NotLoaded
                recipe.cooking_time = NotLoaded
                recipe.preparation_time = NotLoaded
                recipe.author = NotLoaded
                yield recipe
Example #6
0
    def iter_recipes(self):
        for div in self.parser.select(self.document.getroot(), 'div.rechRecette'):
            thumbnail_url = NotAvailable
            short_description = NotAvailable
            imgs = self.parser.select(div, 'img')
            if len(imgs) > 0:
                url = unicode(imgs[0].attrib.get('src', ''))
                if url.startswith('http://'):
                    thumbnail_url = url

            link = self.parser.select(div, 'a.rechRecetTitle', 1)
            title = unicode(link.text)
            id = unicode(link.attrib.get('href', '').split(
                '/')[-1].replace('.aspx', ''))

            short_description = u''
            ldivprix = self.parser.select(div, 'div.prix')
            if len(ldivprix) > 0:
                divprix = ldivprix[0]
                nbprixneg = 0
                spanprix = self.parser.select(divprix, 'span')
                if len(spanprix) > 0:
                    nbprixneg = unicode(spanprix[0].text).count(u'€')
                nbprixtot = unicode(divprix.text_content()).count(u'€')
                short_description += u'Cost: %s/%s ; ' % (
                    nbprixtot - nbprixneg, nbprixtot)

            short_description += unicode(' '.join(self.parser.select(
                div, 'div.rechResume', 1).text_content().split()).strip()).replace(u'€', '')
            short_description += u' '
            short_description += unicode(' '.join(self.parser.select(
                div, 'div.rechIngredients', 1).text_content().split()).strip())

            recipe = Recipe(id, title)
            recipe.thumbnail_url = thumbnail_url
            recipe.short_description = short_description
            recipe.instructions = NotLoaded
            recipe.ingredients = NotLoaded
            recipe.nb_person = NotLoaded
            recipe.cooking_time = NotLoaded
            recipe.preparation_time = NotLoaded
            recipe.author = NotLoaded
            yield recipe
Example #7
0
    def iter_recipes(self):
        for div in self.parser.select(self.document.getroot(),
                                      'div.recette_description > div.data'):
            links = self.parser.select(div, 'div.info > p.title > a.fn')
            if len(links) > 0:
                link = links[0]
                title = unicode(link.text)
                # id = unicode(link.attrib.get('href','').strip('/').replace('.htm','htm'))
                id = unicode(
                    self.parser.select(div, 'div.carnet-add a',
                                       1).attrib.get('href',
                                                     '').split('=')[-1])
                thumbnail_url = NotAvailable
                short_description = NotAvailable

                imgs = self.parser.select(div, 'img.recipe-image')
                if len(imgs) > 0:
                    thumbnail_url = unicode(imgs[0].attrib.get('src', ''))
                short_description = unicode(' '.join(
                    self.parser.select(div, 'div.infos_column',
                                       1).text_content().split()).strip())
                imgs_cost = self.parser.select(div, 'div.infos_column img')
                cost_tot = len(imgs_cost)
                cost_on = 0
                for img in imgs_cost:
                    if img.attrib.get('src', '').endswith('euro_on.png'):
                        cost_on += 1
                short_description += u' %s/%s' % (cost_on, cost_tot)

                recipe = Recipe(id, title)
                recipe.thumbnail_url = thumbnail_url
                recipe.short_description = short_description
                recipe.instructions = NotLoaded
                recipe.ingredients = NotLoaded
                recipe.nb_person = NotLoaded
                recipe.cooking_time = NotLoaded
                recipe.preparation_time = NotLoaded
                recipe.author = NotLoaded
                yield recipe
Example #8
0
    def iter_recipes(self):
        for div in self.parser.select(self.document.getroot(), "div.recipe-info"):
            thumbnail_url = NotAvailable
            short_description = NotAvailable
            imgs = self.parser.select(div.getparent(), "img")
            if len(imgs) > 0:
                url = unicode(imgs[0].attrib.get("src", ""))
                if url.startswith("http://"):
                    thumbnail_url = url

            link = self.parser.select(div, "a.title", 1)
            title = unicode(link.text)
            id = unicode(link.attrib.get("href", "").split("/")[2])

            recipe = Recipe(id, title)
            recipe.thumbnail_url = thumbnail_url
            recipe.short_description = short_description
            recipe.instructions = NotLoaded
            recipe.ingredients = NotLoaded
            recipe.nb_person = NotLoaded
            recipe.cooking_time = NotLoaded
            recipe.preparation_time = NotLoaded
            recipe.author = NotLoaded
            yield recipe