def iter_recipes(self): for div in self.parser.select(self.document.getroot(), 'div.recipe-info'): thumbnail_url = NotAvailable short_description = NotAvailable imgs = self.parser.select(div.getparent(), 'img') if len(imgs) > 0: url = unicode(imgs[0].attrib.get('src', '')) if url.startswith('http://'): thumbnail_url = url link = self.parser.select(div, 'a.title', 1) title = unicode(link.text) id = unicode(link.attrib.get('href', '').split('/')[2]) recipe = Recipe(id, title) recipe.thumbnail_url = thumbnail_url recipe.short_description = short_description recipe.instructions = NotLoaded recipe.ingredients = NotLoaded recipe.nb_person = NotLoaded recipe.cooking_time = NotLoaded recipe.preparation_time = NotLoaded recipe.author = NotLoaded yield recipe
def iter_recipes(self): for div in self.parser.select(self.document.getroot(), 'div.m_search_result'): tds = self.parser.select(div, 'td') if len(tds) == 2: title = NotAvailable thumbnail_url = NotAvailable short_description = NotAvailable imgs = self.parser.select(tds[0], 'img') if len(imgs) > 0: thumbnail_url = unicode(imgs[0].attrib.get('src', '')) link = self.parser.select(tds[1], 'div.m_search_titre_recette a', 1) title = unicode(link.text) id = link.attrib.get('href', '').replace('.aspx', '').replace('/recettes/recette_', '') short_description = unicode(' '.join(self.parser.select(tds[ 1], 'div.m_search_result_part4', 1).text.strip().split('\n'))) recipe = Recipe(id, title) recipe.thumbnail_url = thumbnail_url recipe.short_description = short_description recipe.instructions = NotLoaded recipe.author = NotLoaded recipe.ingredients = NotLoaded recipe.nb_person = NotLoaded recipe.cooking_time = NotLoaded recipe.preparation_time = NotLoaded yield recipe
def iter_recipes(self): for div in self.parser.select(self.document.getroot(), 'div.result-recipe'): thumbnail_url = NotAvailable short_description = NotAvailable imgs = self.parser.select(div, 'a.pull-image-left img') if len(imgs) > 0: url = unicode(imgs[0].attrib.get('src', '')) if url.startswith('http://'): thumbnail_url = url link = self.parser.select(div, 'div.result-text a', 1) title = unicode(link.text) id = unicode(link.attrib.get('href', '').split('/')[2]) txt = self.parser.select(div, 'div.result-text p', 1) short_description = unicode(txt.text_content()) recipe = Recipe(id, title) recipe.thumbnail_url = thumbnail_url recipe.short_description = short_description recipe.instructions = NotLoaded recipe.ingredients = NotLoaded recipe.nb_person = NotLoaded recipe.cooking_time = NotLoaded recipe.preparation_time = NotLoaded recipe.author = NotLoaded yield recipe
def iter_recipes(self): for div in self.parser.select(self.document.getroot(), 'div.recette_description > div.data'): links = self.parser.select(div, 'div.info > p.title > a.fn') if len(links) > 0: link = links[0] title = unicode(link.text) # id = unicode(link.attrib.get('href','').strip('/').replace('.htm','htm')) id = unicode(self.parser.select(div, 'div.carnet-add a', 1).attrib.get('href', '').split('=')[-1]) thumbnail_url = NotAvailable short_description = NotAvailable imgs = self.parser.select(div, 'img.recipe-image') if len(imgs) > 0: thumbnail_url = unicode(imgs[0].attrib.get('src', '')) short_description = unicode(' '.join(self.parser.select( div, 'div.infos_column', 1).text_content().split()).strip()) imgs_cost = self.parser.select(div, 'div.infos_column img') cost_tot = len(imgs_cost) cost_on = 0 for img in imgs_cost: if img.attrib.get('src', '').endswith('euro_on.png'): cost_on += 1 short_description += u' %s/%s' % (cost_on, cost_tot) recipe = Recipe(id, title) recipe.thumbnail_url = thumbnail_url recipe.short_description = short_description recipe.instructions = NotLoaded recipe.ingredients = NotLoaded recipe.nb_person = NotLoaded recipe.cooking_time = NotLoaded recipe.preparation_time = NotLoaded recipe.author = NotLoaded yield recipe
def iter_recipes(self): for div in self.parser.select(self.document.getroot(), 'div.m_search_result'): tds = self.parser.select(div, 'td') if len(tds) == 2: title = NotAvailable thumbnail_url = NotAvailable short_description = NotAvailable imgs = self.parser.select(tds[0], 'img') if len(imgs) > 0: thumbnail_url = unicode(imgs[0].attrib.get('src', '')) link = self.parser.select(tds[1], 'div.m_search_titre_recette a', 1) title = unicode(link.text) id = link.attrib.get('href', '').replace('.aspx', '').replace( '/recettes/recette_', '') short_description = unicode(' '.join( self.parser.select(tds[1], 'div.m_search_result_part4', 1).text.strip().split('\n'))) recipe = Recipe(id, title) recipe.thumbnail_url = thumbnail_url recipe.short_description = short_description recipe.instructions = NotLoaded recipe.author = NotLoaded recipe.ingredients = NotLoaded recipe.nb_person = NotLoaded recipe.cooking_time = NotLoaded recipe.preparation_time = NotLoaded yield recipe
def get_recipe(self, id): title = NotAvailable preparation_time = NotAvailable cooking_time = NotAvailable nb_person = NotAvailable ingredients = NotAvailable picture_url = NotAvailable instructions = NotAvailable comments = NotAvailable title = unicode(self.parser.select(self.document.getroot(), 'h1.m_title', 1).text_content().strip()) main = self.parser.select(self.document.getroot(), 'div.m_content_recette_main', 1) preparation_time = int(self.parser.select(main, 'p.m_content_recette_info span.preptime', 1).text_content()) cooking_time = int(self.parser.select(main, 'p.m_content_recette_info span.cooktime', 1).text_content()) ing_header_line = self.parser.select(main, 'p.m_content_recette_ingredients span', 1).text_content() if '(pour' in ing_header_line and ')' in ing_header_line: nb_person = [int(ing_header_line.split('pour ')[-1].split('personnes)')[0].split()[0])] ingredients = self.parser.select(main, 'p.m_content_recette_ingredients', 1).text_content().strip().split('- ') ingredients = ingredients[1:] rinstructions = self.parser.select(main, 'div.m_content_recette_todo', 1).text_content().strip() instructions = u'' for line in rinstructions.split('\n'): instructions += '%s\n' % line.strip() instructions = instructions.strip('\n') imgillu = self.parser.select(self.document.getroot(), 'a.m_content_recette_illu img') if len(imgillu) > 0: picture_url = unicode(imgillu[0].attrib.get('src', '')) divcoms = self.parser.select(self.document.getroot(), 'div.m_commentaire_row') if len(divcoms) > 0: comments = [] for divcom in divcoms: note = self.parser.select(divcom, 'div.m_commentaire_note span', 1).text.strip() user = self.parser.select(divcom, 'div.m_commentaire_content span', 1).text.strip() content = self.parser.select(divcom, 'div.m_commentaire_content p', 1).text.strip() comments.append(Comment(author=user, rate=note, text=content)) recipe = Recipe(id, title) recipe.preparation_time = preparation_time recipe.cooking_time = cooking_time recipe.nb_person = nb_person recipe.ingredients = ingredients recipe.instructions = instructions recipe.picture_url = picture_url recipe.comments = comments recipe.thumbnail_url = NotLoaded recipe.author = NotAvailable return recipe
def iter_recipes(self): for div in self.parser.select(self.document.getroot(), 'div.rechRecette'): thumbnail_url = NotAvailable short_description = NotAvailable imgs = self.parser.select(div, 'img') if len(imgs) > 0: url = unicode(imgs[0].attrib.get('src', '')) if url.startswith('http://'): thumbnail_url = url link = self.parser.select(div, 'a.rechRecetTitle', 1) title = unicode(link.text) id = unicode(link.attrib.get('href', '').split( '/')[-1].replace('.aspx', '')) short_description = u'' ldivprix = self.parser.select(div, 'div.prix') if len(ldivprix) > 0: divprix = ldivprix[0] nbprixneg = 0 spanprix = self.parser.select(divprix, 'span') if len(spanprix) > 0: nbprixneg = unicode(spanprix[0].text).count(u'€') nbprixtot = unicode(divprix.text_content()).count(u'€') short_description += u'Cost: %s/%s ; ' % ( nbprixtot - nbprixneg, nbprixtot) short_description += unicode(' '.join(self.parser.select( div, 'div.rechResume', 1).text_content().split()).strip()).replace(u'€', '') short_description += u' ' short_description += unicode(' '.join(self.parser.select( div, 'div.rechIngredients', 1).text_content().split()).strip()) recipe = Recipe(id, title) recipe.thumbnail_url = thumbnail_url recipe.short_description = short_description recipe.instructions = NotLoaded recipe.ingredients = NotLoaded recipe.nb_person = NotLoaded recipe.cooking_time = NotLoaded recipe.preparation_time = NotLoaded recipe.author = NotLoaded yield recipe
def iter_recipes(self): for div in self.parser.select(self.document.getroot(), 'div.recette_description > div.data'): links = self.parser.select(div, 'div.info > p.title > a.fn') if len(links) > 0: link = links[0] title = unicode(link.text) # id = unicode(link.attrib.get('href','').strip('/').replace('.htm','htm')) id = unicode( self.parser.select(div, 'div.carnet-add a', 1).attrib.get('href', '').split('=')[-1]) thumbnail_url = NotAvailable short_description = NotAvailable imgs = self.parser.select(div, 'img.recipe-image') if len(imgs) > 0: thumbnail_url = unicode(imgs[0].attrib.get('src', '')) short_description = unicode(' '.join( self.parser.select(div, 'div.infos_column', 1).text_content().split()).strip()) imgs_cost = self.parser.select(div, 'div.infos_column img') cost_tot = len(imgs_cost) cost_on = 0 for img in imgs_cost: if img.attrib.get('src', '').endswith('euro_on.png'): cost_on += 1 short_description += u' %s/%s' % (cost_on, cost_tot) recipe = Recipe(id, title) recipe.thumbnail_url = thumbnail_url recipe.short_description = short_description recipe.instructions = NotLoaded recipe.ingredients = NotLoaded recipe.nb_person = NotLoaded recipe.cooking_time = NotLoaded recipe.preparation_time = NotLoaded recipe.author = NotLoaded yield recipe
def iter_recipes(self): for div in self.parser.select(self.document.getroot(), "div.recipe-info"): thumbnail_url = NotAvailable short_description = NotAvailable imgs = self.parser.select(div.getparent(), "img") if len(imgs) > 0: url = unicode(imgs[0].attrib.get("src", "")) if url.startswith("http://"): thumbnail_url = url link = self.parser.select(div, "a.title", 1) title = unicode(link.text) id = unicode(link.attrib.get("href", "").split("/")[2]) recipe = Recipe(id, title) recipe.thumbnail_url = thumbnail_url recipe.short_description = short_description recipe.instructions = NotLoaded recipe.ingredients = NotLoaded recipe.nb_person = NotLoaded recipe.cooking_time = NotLoaded recipe.preparation_time = NotLoaded recipe.author = NotLoaded yield recipe
def get_recipe(self, id): title = NotAvailable preparation_time = NotAvailable cooking_time = NotAvailable author = NotAvailable nb_person = NotAvailable ingredients = NotAvailable picture_url = NotAvailable instructions = NotAvailable comments = NotAvailable title = unicode( self.parser.select(self.document.getroot(), 'h1#itemTitle', 1).text) imgillu = self.parser.select(self.document.getroot(), 'img#imgPhoto') if len(imgillu) > 0: picture_url = unicode(imgillu[0].attrib.get('src', '')) ingredients = [] l_ing = self.parser.select(self.document.getroot(), 'li#liIngredient') for ing in l_ing: ingtxt = unicode(ing.text_content().strip()) if ingtxt != '': ingredients.append(ingtxt) instructions = u'' l_divinst = self.parser.select(self.document.getroot(), 'div.directLeft li') for num_instr, inst in enumerate(l_divinst, start=1): instructions += '%s: %s\n' % (num_instr, inst.text_content()) prepmin = 0 emprep = self.parser.select(self.document.getroot(), 'span#prepHoursSpan em') if len(emprep) > 0: prepmin += int(emprep[0].text) * 60 emprep = self.parser.select(self.document.getroot(), 'span#prepMinsSpan em') if len(emprep) > 0: prepmin += int(emprep[0].text) if prepmin != 0: preparation_time = prepmin cookmin = 0 emcooktime = self.parser.select(self.document.getroot(), 'span#cookHoursSpan em') if len(emcooktime) > 0: cookmin += int(emcooktime[0].text) * 60 emcooktime = self.parser.select(self.document.getroot(), 'span#cookMinsSpan em') if len(emcooktime) > 0: cookmin += int(emcooktime[0].text) if cookmin != 0: cooking_time = cookmin l_nbpers = self.parser.select(self.document.getroot(), 'span#lblYield[itemprop=recipeYield]') if len(l_nbpers) > 0 and 'servings' in l_nbpers[0].text: nb_person = [int(l_nbpers[0].text.split()[0])] recipe = Recipe(id, title) recipe.preparation_time = preparation_time recipe.cooking_time = cooking_time recipe.nb_person = nb_person recipe.ingredients = ingredients recipe.instructions = instructions recipe.picture_url = picture_url recipe.comments = comments recipe.author = author recipe.thumbnail_url = NotLoaded return recipe
def get_recipe(self, id): title = NotAvailable preparation_time = NotAvailable cooking_time = NotAvailable author = NotAvailable nb_person = NotAvailable ingredients = NotAvailable picture_url = NotAvailable instructions = NotAvailable comments = NotAvailable title = unicode(self.parser.select(self.document.getroot(), 'h1 span[property$=name]', 1).text) main = self.parser.select(self.document.getroot(), 'div[typeof$=Recipe]', 1) imgillu = self.parser.select(main, 'div.image-with-credit img') if len(imgillu) > 0: picture_url = unicode(imgillu[0].attrib.get('src', '')) l_spanprep = self.parser.select(self.document.getroot(), 'span.preptime[property$=prepTime]') if len(l_spanprep) > 0: preparation_time = 0 prep = l_spanprep[0].attrib.get('content','') if 'H' in prep: preparation_time += 60 * (int(prep.split('PT')[-1].split('H')[0])) if 'M' in prep: preparation_time += int(prep.split('PT')[-1].split('H')[-1].split('M')[0]) l_cooktime = self.parser.select(main, 'span.cooktime[property$=cookTime]') if len(l_cooktime) > 0: cooking_time = 0 cook = l_cooktime[0].attrib.get('content','') if 'H' in cook: cooking_time += 60 * (int(cook.split('PT')[-1].split('H')[0])) if 'M' in cook: cooking_time += int(cook.split('PT')[-1].split('H')[-1].split('M')[0]) l_nbpers = self.parser.select(main, 'div.ingredients p.servings') if len(l_nbpers) > 0: rawnb = l_nbpers[0].text.strip(string.letters+' ') if '/' in rawnb: nbs = rawnb.split('/') nb_person = [int(nbs[0]), int(nbs[1])] else: nb_person = [int(rawnb)] ingredients = [] l_ing = self.parser.select(main, 'div.ingredients ul.dotlist') for ing in l_ing: sublists = self.parser.select(ing, 'li') for i in sublists: ingtxt = unicode(i.text_content().strip()) if ingtxt != '': ingredients.append(' '.join(ingtxt.split())) instructions = u'' num_inst = 1 l_divinst = self.parser.select(self.document.getroot(), 'div#recipe-steps-list p.step-details') for inst in l_divinst: instructions += '%s: %s\n' % (num_inst, inst.text_content()) num_inst += 1 recipe = Recipe(id, title) recipe.preparation_time = preparation_time recipe.cooking_time = cooking_time recipe.nb_person = nb_person recipe.ingredients = ingredients recipe.instructions = instructions recipe.picture_url = picture_url recipe.comments = comments recipe.author = author recipe.thumbnail_url = NotLoaded return recipe
def get_recipe(self, id): title = NotAvailable preparation_time = NotAvailable cooking_time = NotAvailable nb_person = NotAvailable ingredients = NotAvailable picture_url = NotAvailable instructions = NotAvailable author = NotAvailable comments = NotAvailable title = unicode( self.parser.select(self.document.getroot(), 'head > title', 1).text.split(' - ')[1]) main = self.parser.select(self.document.getroot(), 'div.recette_description', 1) rec_infos = self.parser.select( self.document.getroot(), 'div.recette_infos div.infos_column strong') for info_title in rec_infos: if u'Temps de préparation' in unicode(info_title.text): if info_title.tail.strip() != '': preparation_time = int(info_title.tail.split()[0]) if 'h' in info_title.tail: preparation_time = 60 * preparation_time if 'Temps de cuisson' in info_title.text: if info_title.tail.strip() != '': cooking_time = int(info_title.tail.split()[0]) if 'h' in info_title.tail: cooking_time = 60 * cooking_time if 'Nombre de personnes' in info_title.text: if info_title.tail.strip() != '': nb_person = [int(info_title.tail)] ingredients = [] p_ing = self.parser.select(main, 'div.data.top.left > div.content p') for ing in p_ing: ingtxt = unicode(ing.text_content().strip()) if ingtxt != '': ingredients.append(ingtxt) lines_instr = self.parser.select(main, 'div.data.top.right div.content li') if len(lines_instr) > 0: instructions = u'' for line in lines_instr: inst = ' '.join(line.text_content().strip().split()) instructions += '%s\n' % inst instructions = instructions.strip('\n') imgillu = self.parser.select(self.document.getroot(), 'div.resume_recette_illustree img.photo') if len(imgillu) > 0: picture_url = unicode(imgillu[0].attrib.get('src', '')) divcoms = self.parser.select(self.document.getroot(), 'div.comment-outer') if len(divcoms) > 0: comments = [] for divcom in divcoms: comtxt = unicode(' '.join( divcom.text_content().strip().split())) if u'| Répondre' in comtxt: comtxt = comtxt.strip('0123456789').replace( u' | Répondre', '') author = None if 'par ' in comtxt: author = comtxt.split('par ')[-1].split('|')[0] comtxt = comtxt.replace('par %s' % author, '') comments.append(Comment(text=comtxt, author=author)) links_author = self.parser.select(self.document.getroot(), 'p.auteur a.couleur_membre') if len(links_author) > 0: author = unicode(links_author[0].text.strip()) recipe = Recipe(id, title) recipe.preparation_time = preparation_time recipe.cooking_time = cooking_time recipe.nb_person = nb_person recipe.ingredients = ingredients recipe.instructions = instructions recipe.picture_url = picture_url recipe.comments = comments recipe.author = author recipe.thumbnail_url = NotLoaded return recipe
def get_recipe(self, id): title = NotAvailable preparation_time = NotAvailable cooking_time = NotAvailable author = NotAvailable nb_person = NotAvailable ingredients = NotAvailable picture_url = NotAvailable instructions = NotAvailable comments = NotAvailable title = unicode( self.parser.select(self.document.getroot(), 'h1 span[property$=name]', 1).text) main = self.parser.select(self.document.getroot(), 'div[typeof$=Recipe]', 1) imgillu = self.parser.select(main, 'div.image-with-credit img') if len(imgillu) > 0: picture_url = unicode(imgillu[0].attrib.get('src', '')) l_spanprep = self.parser.select(self.document.getroot(), 'span.preptime[property$=prepTime]') if len(l_spanprep) > 0: preparation_time = 0 prep = l_spanprep[0].attrib.get('content', '') if 'H' in prep: preparation_time += 60 * (int( prep.split('PT')[-1].split('H')[0])) if 'M' in prep: preparation_time += int( prep.split('PT')[-1].split('H')[-1].split('M')[0]) l_cooktime = self.parser.select(main, 'span.cooktime[property$=cookTime]') if len(l_cooktime) > 0: cooking_time = 0 cook = l_cooktime[0].attrib.get('content', '') if 'H' in cook: cooking_time += 60 * (int(cook.split('PT')[-1].split('H')[0])) if 'M' in cook: cooking_time += int( cook.split('PT')[-1].split('H')[-1].split('M')[0]) l_nbpers = self.parser.select(main, 'div.ingredients p.servings') if len(l_nbpers) > 0: rawnb = l_nbpers[0].text.strip(string.letters + ' ') if '/' in rawnb: nbs = rawnb.split('/') nb_person = [int(nbs[0]), int(nbs[1])] else: nb_person = [int(rawnb)] ingredients = [] l_ing = self.parser.select(main, 'div.ingredients ul.dotlist') for ing in l_ing: sublists = self.parser.select(ing, 'li') for i in sublists: ingtxt = unicode(i.text_content().strip()) if ingtxt != '': ingredients.append(' '.join(ingtxt.split())) instructions = u'' num_inst = 1 l_divinst = self.parser.select(self.document.getroot(), 'div#recipe-steps-list p.step-details') for inst in l_divinst: instructions += '%s: %s\n' % (num_inst, inst.text_content()) num_inst += 1 recipe = Recipe(id, title) recipe.preparation_time = preparation_time recipe.cooking_time = cooking_time recipe.nb_person = nb_person recipe.ingredients = ingredients recipe.instructions = instructions recipe.picture_url = picture_url recipe.comments = comments recipe.author = author recipe.thumbnail_url = NotLoaded return recipe
def get_recipe(self, id): title = NotAvailable preparation_time = NotAvailable cooking_time = NotAvailable nb_person = NotAvailable ingredients = NotAvailable picture_url = NotAvailable instructions = NotAvailable author = NotAvailable comments = NotAvailable title = unicode(self.parser.select(self.document.getroot(), 'head > title', 1).text.split(' - ')[1]) main = self.parser.select(self.document.getroot(), 'div.recette_description', 1) rec_infos = self.parser.select(self.document.getroot(), 'div.recette_infos div.infos_column strong') for info_title in rec_infos: if u'Temps de préparation' in unicode(info_title.text): if info_title.tail.strip() != '': preparation_time = int(info_title.tail.split()[0]) if 'h' in info_title.tail: preparation_time = 60*preparation_time if 'Temps de cuisson' in info_title.text: if info_title.tail.strip() != '': cooking_time = int(info_title.tail.split()[0]) if 'h' in info_title.tail: cooking_time = 60*cooking_time if 'Nombre de personnes' in info_title.text: if info_title.tail.strip() != '': nb_person = [int(info_title.tail)] ingredients = [] p_ing = self.parser.select(main, 'div.data.top.left > div.content p') for ing in p_ing: ingtxt = unicode(ing.text_content().strip()) if ingtxt != '': ingredients.append(ingtxt) lines_instr = self.parser.select(main, 'div.data.top.right div.content li') if len(lines_instr) > 0: instructions = u'' for line in lines_instr: inst = ' '.join(line.text_content().strip().split()) instructions += '%s\n' % inst instructions = instructions.strip('\n') imgillu = self.parser.select(self.document.getroot(), 'div.resume_recette_illustree img.photo') if len(imgillu) > 0: picture_url = unicode(imgillu[0].attrib.get('src', '')) divcoms = self.parser.select(self.document.getroot(), 'div.comment-outer') if len(divcoms) > 0: comments = [] for divcom in divcoms: comtxt = unicode(' '.join(divcom.text_content().strip().split())) if u'| Répondre' in comtxt: comtxt = comtxt.strip('0123456789').replace(u' | Répondre', '') author = None if 'par ' in comtxt: author = comtxt.split('par ')[-1].split('|')[0] comtxt = comtxt.replace('par %s' % author, '') comments.append(Comment(text=comtxt, author=author)) links_author = self.parser.select(self.document.getroot(), 'p.auteur a.couleur_membre') if len(links_author) > 0: author = unicode(links_author[0].text.strip()) recipe = Recipe(id, title) recipe.preparation_time = preparation_time recipe.cooking_time = cooking_time recipe.nb_person = nb_person recipe.ingredients = ingredients recipe.instructions = instructions recipe.picture_url = picture_url recipe.comments = comments recipe.author = author recipe.thumbnail_url = NotLoaded return recipe
def get_recipe(self, id): title = NotAvailable preparation_time = NotAvailable cooking_time = NotAvailable author = NotAvailable nb_person = NotAvailable ingredients = NotAvailable picture_url = NotAvailable instructions = NotAvailable comments = NotAvailable title = unicode(self.parser.select(self.document.getroot(), 'h1#itemTitle', 1).text) imgillu = self.parser.select(self.document.getroot(), 'img#imgPhoto') if len(imgillu) > 0: picture_url = unicode(imgillu[0].attrib.get('src', '')) ingredients = [] l_ing = self.parser.select(self.document.getroot(), 'li#liIngredient') for ing in l_ing: ingtxt = unicode(ing.text_content().strip()) if ingtxt != '': ingredients.append(ingtxt) instructions = u'' l_divinst = self.parser.select(self.document.getroot(), 'div.directLeft li') num_instr = 1 for inst in l_divinst: instructions += '%s: %s\n' % (num_instr, inst.text_content()) num_instr += 1 prepmin = 0 emprep = self.parser.select(self.document.getroot(), 'span#prepHoursSpan em') if len(emprep) > 0: prepmin += int(emprep[0].text) * 60 emprep = self.parser.select(self.document.getroot(), 'span#prepMinsSpan em') if len(emprep) > 0: prepmin += int(emprep[0].text) if prepmin != 0: preparation_time = prepmin cookmin = 0 emcooktime = self.parser.select(self.document.getroot(), 'span#cookHoursSpan em') if len(emcooktime) > 0: cookmin += int(emcooktime[0].text) * 60 emcooktime = self.parser.select(self.document.getroot(), 'span#cookMinsSpan em') if len(emcooktime) > 0: cookmin += int(emcooktime[0].text) if cookmin != 0: cooking_time = cookmin l_nbpers = self.parser.select(self.document.getroot(), 'span#lblYield[itemprop=recipeYield]') if len(l_nbpers) > 0 and 'servings' in l_nbpers[0].text: nb_person = [int(l_nbpers[0].text.split()[0])] recipe = Recipe(id, title) recipe.preparation_time = preparation_time recipe.cooking_time = cooking_time recipe.nb_person = nb_person recipe.ingredients = ingredients recipe.instructions = instructions recipe.picture_url = picture_url recipe.comments = comments recipe.author = author recipe.thumbnail_url = NotLoaded return recipe
def get_recipe(self, id): title = NotAvailable preparation_time = NotAvailable cooking_time = NotAvailable nb_person = NotAvailable ingredients = NotAvailable picture_url = NotAvailable instructions = NotAvailable comments = NotAvailable title = unicode( self.parser.select(self.document.getroot(), 'h1.m_title', 1).text_content().strip()) main = self.parser.select(self.document.getroot(), 'div.m_content_recette_main', 1) preparation_time = int( self.parser.select(main, 'p.m_content_recette_info span.preptime', 1).text_content()) cooking_time = int( self.parser.select(main, 'p.m_content_recette_info span.cooktime', 1).text_content()) ing_header_line = self.parser.select( main, 'p.m_content_recette_ingredients span', 1).text_content() if '(pour' in ing_header_line and ')' in ing_header_line: nb_person = [ int( ing_header_line.split('pour ')[-1].split('personnes)') [0].split()[0]) ] ingredients = self.parser.select(main, 'p.m_content_recette_ingredients', 1).text_content().strip().split('- ') ingredients = ingredients[1:] rinstructions = self.parser.select(main, 'div.m_content_recette_todo', 1).text_content().strip() instructions = u'' for line in rinstructions.split('\n'): instructions += '%s\n' % line.strip() instructions = instructions.strip('\n') imgillu = self.parser.select(self.document.getroot(), 'a.m_content_recette_illu img') if len(imgillu) > 0: picture_url = unicode(imgillu[0].attrib.get('src', '')) divcoms = self.parser.select(self.document.getroot(), 'div.m_commentaire_row') if len(divcoms) > 0: comments = [] for divcom in divcoms: note = self.parser.select(divcom, 'div.m_commentaire_note span', 1).text.strip() user = self.parser.select(divcom, 'div.m_commentaire_content span', 1).text.strip() content = self.parser.select(divcom, 'div.m_commentaire_content p', 1).text.strip() comments.append(Comment(author=user, rate=note, text=content)) recipe = Recipe(id, title) recipe.preparation_time = preparation_time recipe.cooking_time = cooking_time recipe.nb_person = nb_person recipe.ingredients = ingredients recipe.instructions = instructions recipe.picture_url = picture_url recipe.comments = comments recipe.thumbnail_url = NotLoaded recipe.author = NotAvailable return recipe
def get_recipe(self, id): title = NotAvailable preparation_time = NotAvailable cooking_time = NotAvailable author = NotAvailable nb_person = NotAvailable ingredients = NotAvailable picture_url = NotAvailable instructions = NotAvailable comments = NotAvailable title = unicode(self.parser.select( self.document.getroot(), 'div#ficheRecette h1.fn.recetteH1', 1).text) main = self.parser.select( self.document.getroot(), 'div#ficheRecette', 1) imgillu = self.parser.select(main, 'div#recetteLeft img.photo') if len(imgillu) > 0: picture_url = unicode(imgillu[0].attrib.get('src', '')) l_spanprep = self.parser.select(main, 'span.preptime') if len(l_spanprep) > 0: preparation_time = int(self.parser.tocleanstring(l_spanprep[0]).split()[0]) l_cooktime = self.parser.select(main, 'span.cooktime') if len(l_cooktime) > 0: cooking_time = int(self.parser.tocleanstring(l_cooktime[0]).split()[0]) l_nbpers = self.parser.select(main, 'td#recipeQuantity span') if len(l_nbpers) > 0: rawnb = l_nbpers[0].text.split()[0] if '/' in rawnb: nbs = rawnb.split('/') nb_person = [int(nbs[0]), int(nbs[1])] else: nb_person = [int(rawnb)] ingredients = [] l_ing = self.parser.select(main, 'div#ingredients li.ingredient') for ing in l_ing: ingtxt = unicode(ing.text_content().strip()) if ingtxt != '': ingredients.append(ingtxt) instructions = u'' l_divinst = self.parser.select( main, 'div#preparation span.instructions div') for inst in l_divinst: instructions += '%s: ' % inst.text instructions += '%s\n' % inst.getnext().text divcoms = self.parser.select(self.document.getroot(), 'div.comment') if len(divcoms) > 0: comments = [] for divcom in divcoms: author = unicode(self.parser.select( divcom, 'div.commentAuthor span', 1).text) comtxt = unicode(self.parser.select( divcom, 'p', 1).text_content().strip()) comments.append(Comment(author=author, text=comtxt)) spans_author = self.parser.select(self.document.getroot(), 'span.author') if len(spans_author) > 0: author = unicode(spans_author[0].text_content().strip()) recipe = Recipe(id, title) recipe.preparation_time = preparation_time recipe.cooking_time = cooking_time recipe.nb_person = nb_person recipe.ingredients = ingredients recipe.instructions = instructions recipe.picture_url = picture_url recipe.comments = comments recipe.author = author recipe.thumbnail_url = NotLoaded return recipe