def scrape(url): rawPage = requests.get(url) data = rawPage.text soup = BeautifulSoup(data) for line in soup.find_all("div", {"class": "txt-container"}): title = line.find("div", {"class": "title"}).text.strip() link = line.parent.get("href") recipePage = requests.get("http://www.allthecooks.com" + link) onionSoup = BeautifulSoup(recipePage.text) rating = onionSoup.find("span", {"itemprop": "ratingValue"}) numRatings = onionSoup.find("span", {"itemprop": "reviewCount"}) if rating != None: rating = rating.text else: rating = 0 if numRatings != None: numRatings = numRatings.text else: numRatings = 0 ingredients = [] for ingredient in line.find_all("div", {"class": "ingredient"}): ingredName = ingredient.text.replace("\n", "").replace(u"·", "").replace(u"½", "1/2").replace(u"¾", "3/4").replace(u"¼", "1/4").replace(u"⅓", "1/3") ingredients.append(ingredName) helper.store(recipeName=title, rating=rating, numRatings=numRatings, source=url, ingredients=ingredients)
def scrapeRecipe(url, title): rawIngredientPage = requests.get(url) ingredientData = rawIngredientPage.text soup = BeautifulSoup(ingredientData) rating = soup.find("div", {"class": "rating-stars stars113x20 fl-left"}).find("meta").get("content") numRatings = soup.find("p", {"id": "pRatings"}).text.split() if len(numRatings) > 0: numRatings = numRatings[0].replace(",", "") else: numRatings = 0 ingredients = [] for nline in soup.find_all("li", {"id": "liIngredient"}): try: parsedIngredName = nline.find("span", {"class": "ingredient-name"}).text.strip() parsedIngredName = parsedIngredName.decode("utf-8") except: continue try: ingredName = nline.find("span", {"class": "ingredient-amount"}).text.strip() + " " + parsedIngredName except: ingredName = parsedIngredName ingredients.append(ingredName) helper.store(recipeName=title, rating=rating, numRatings=numRatings, source=url, ingredients=ingredients)
def scrape(url): rawPage = requests.get(url) data = rawPage.text soup = BeautifulSoup(data) title = soup.find("h1", {"itemprop": "name"}).text.strip().encode("ascii", "ignore") ratingsDiv = soup.find("div", {"class": "post-ratings"}) rating = ratingsDiv.find("span", {"class", "average"}).text.split()[0] numRatings = ratingsDiv.find("span", {"class", "total"}).text.split()[0] ingredients = [] for line in soup.find_all("span", {"itemprop": "ingredient"}): line = line.find_all("span") if len(line) > 1: amount = line[0].text name = line[1].text else: amount = "" name = line[0].text ingredients.append((amount + " " + name).strip().encode("ascii", "ignore")) helper.store(recipeName=title, rating=rating, numRatings=numRatings, source=url, ingredients=ingredients)