def scrape(url): rawJSON = requests.get(url) data = json.loads(rawJSON.text) recipes = data["recipes"] for recipe in recipes: if recipe["publisher"] == "All Recipes": #print recipe["source_url"] AllRecipes.scrapeRecipe(recipe["source_url"], recipe["title"]) if recipe["publisher"] == "Tasty Kitchen": TastyKitchen.scrape(recipe["source_url"])
def scrape(url): rawJSON = requests.get(url) data = json.loads(rawJSON.text) recipes = data["recipes"] for recipe in recipes: if recipe["publisher"] == "All Recipes": #print recipe["source_url"] AllRecipes.scrapeRecipe(recipe["source_url"], recipe["title"]) if recipe["publisher"] == "Tasty Kitchen": TastyKitchen.scrape(recipe["source_url"]) #scrape("http://food2fork.com/api/search?key=b86b8857efb9349636cdf6f2e8f75503&q=sandwich&page=1")
food2forkUrl = "http://food2fork.com/api/search?key=b86b8857efb9349636cdf6f2e8f75503&q=sandwich&page=%d" start = time.time() for x in range(1,59): Food2Fork.scrape(food2forkUrl % (x)) print "Finshed executing after", time.time() - start, "seconds" # Scrape all the pages I could find with relevent sandwich recipes from allrecipes.com allRecipeUrls = ["http://allrecipes.com/search/default.aspx?qt=k&wt=sandwich&rt=r&origin=Recipe%%20Search%%20Results&Page=%d&vm=l&p34=SR_ListView", "http://allrecipes.com/search/default.aspx?qt=k&wt=sandwich&rt=r&origin=Search%%20Results&vm=l&p40=SR_ListView&hb=76&p34=SR_FilterByAppetizer&Page=%d", "http://allrecipes.com/search/default.aspx?qt=k&wt=sandwich&rt=r&origin=Search%%20Results&vm=l&p40=SR_ListView&hb=78&p34=SR_FilterByBreakfast&Page=%d", "http://allrecipes.com/search/default.aspx?qt=k&wt=sandwich&rt=r&origin=Search%%20Results&vm=l&p40=SR_ListView&hb=80&p34=SR_FilterByMainDish&Page=%d", "http://allrecipes.com/search/default.aspx?qt=k&wt=sandwich&rt=r&origin=Search%%20Results&vm=l&p40=SR_ListView&hb=96&p34=SR_FilterBySalad&Page=%d"] start = time.time() for url in allRecipeUrls: for x in range(1,6): AllRecipes.scrape(url % (x)) print "Finshed executing after", time.time() - start, "seconds" # Scrape allthecooks start = time.time() for x in range(0,8): AllTheCooks.scrape("http://www.allthecooks.com/search/?q=sandwich&start=%d&fullpath=&categories=&filters=&dietaryConstraints=&ordering=#sr" % (x*19)) print "Finshed executing after", time.time() - start, "seconds"