def web_crawler(enter_url, limit=20): """ Get recipes and insert them in the database with ingredients @param enter_url base url of a web site (http://marmiton.org) @param limit limit number of recipes before ending the search """ _base = enter_url url_to_treat = [enter_url] # the list of url to treat url_treated = [] # th list of url treated requests = [] # contains the requests to insert recipes and ingredients ingr_list = [] # contains the ingredient list recipe_info_list = [] recipe_found = 0 # Create a dictionnary with all the recipe types and their id type_id = {} for row in db_execute_out("SELECT * FROM types"): type_id[row[1]] = row[0] while len(url_to_treat) > 0 and recipe_found < limit: try: # get the recipe in a dictionnary res = get_recipe(url_to_treat.pop(), _base) except urllib2.HTTPError: pass # insert the sql request to add the recipe in the list if 'name' in res.keys(): # put the type id instead of the name of the id res['type'] = type_id[res['type']] # add the sql request to insert the recipe in the list of requests requests.append(get_recipe_request(res)) # add the sql request for the ingredients for _ingr in res['ingredients']: if _ingr not in ingr_list: requests.append(get_ingr_request(_ingr)) ingr_list.append(_ingr) # showing the number of recipes found recipe_found += 1 print '{0}/{1} recipes found'.format(str(recipe_found), str(limit)) # keep recipes info to add to recipe_has_ingredients table recipe_info_list.append(res) # Adding urls to the stack of urls to treat url_treated.append(res['url']) for i in res['add_urls']: if i not in url_treated and i not in url_to_treat: url_to_treat.append(i) # recording all the recipes and ingredients in the database db_execute_in(requests) add_options_to_form('ingredients', 'search_form_path', 'select#ingr-like') add_options_to_form('ingredients', 'search_form_path', 'select#ingr-dislike') requests = get_recipe_ingr_request(recipe_info_list) db_execute_in(requests)
from db.db_module import add_user, db_execute_in from formatter import format_recipes, format_form_result # from r_engine import recommander from recommandation_engine import get_recipes cgitb.enable() # Retrieving informations from the form FORM = cgi.FormContentDict() # insert user into database and get user id MAIL = FORM['email'][0] USER_ID = str(add_user(MAIL)[0]) # adding a search for the user REQ = "INSERT INTO search(user_id, recipe_id) VALUES ({}, NULL);".format(USER_ID) db_execute_in([REQ]) # format the informations for the recommandation engine CLEAN_FORM = format_form_result(FORM, USER_ID) # getting the recommandation for the user # RECOMMANDATION = recommander(CLEAN_FORM) RECOMMANDATION = get_recipes( CLEAN_FORM['user_id'], CLEAN_FORM['recipe_type'], CLEAN_FORM['ingr_like'], CLEAN_FORM['ingr_dislike'] ) # formatting the result to display it RESULT = format_recipes(RECOMMANDATION)
from db.db_module import db_execute_in import re cgitb.enable() # Retrieving informations from the form FORM = cgi.FormContentDict() # search : add recipe_id if 'search' in FORM.keys(): RECI_ID = re.sub(r'_url', '', FORM['search'][0]) REQ = """ UPDATE search SET recipe_id={0} WHERE user_id={1} AND recipe_id IS NULL; """.format(RECI_ID.split('_')[1], RECI_ID.split('_')[0]) db_execute_in([REQ]) # fav if 'fav' in FORM.keys(): FAV_ID = re.sub(r'fav_', '', FORM['fav'][0]) REQ = """ INSERT INTO user_has_favorite_recipes VALUES ({}, {}); """.format(FAV_ID.split('_')[0], FAV_ID.split('_')[1]) db_execute_in([REQ]) # unfav if 'unfav' in FORM.keys(): UNFAV_ID = re.sub(r'unfav_', '', FORM['unfav'][0]) REQ = """ DELETE FROM user_has_favorite_recipes