def predict_rating_naive(user_id, business_id, city): """ Takes a userId describing a user, a dict describing a business and a dict describing a city. Returns a predicted rating for the given business in the given city by the given user. """ businesses = BUSINESSES[city] reviews = REVIEWS[city] user_reviewed_businesses = [] user_reviews = {} for review in reviews: if review['user_id'] == user_id: user_reviewed_businesses.append(review['business_id']) user_reviews[review['business_id']] = review['stars'] total_rating = 0.0 total_weight = 0.0 for b2 in businesses: shared_similarity = cos_similarity( data.get_business(city, business_id), b2, city) if shared_similarity > 0.0: if business_id in user_reviewed_businesses: total_rating += user_reviews[business_id] total_weight += shared_similarity else: total_rating += data.get_business(city, business_id)['stars'] total_weight += shared_similarity return total_rating / total_weight
def recommend(user_id=None, business_id=None, city=None, n=10): """ Returns n recommendations as a list of dicts. Optionally takes in a user_id, business_id and/or city. A recommendation is a dictionary in the form of: { business_id:str stars:str name:str city:str adress:str } """ # if not user_id: # user_id = "DAIpUGIsY71noX0wNuc27w" if not city: city = random.choice(CITIES) # if not business_id: # business_id = "XieY4CeZOw9bMBk965BNTw" # print(user_id) dfratings = dfmakersuited(city, user_id) dfutility = pivot_genres(dfmakercategories()) dfsimilarity = create_similarity_matrix_categories(dfutility) dfutilityratings = pivot_ratings(dfratings) predicted_genres = predict_ratings( dfsimilarity, dfutilityratings, dfratings[['user_id', 'business_id', 'rating']]) sortedpredicted = predicted_genres.sort_values(by='predicted rating', ascending=False).iloc[0:n] sorteddict = sortedpredicted.set_index( 'business_id')['predicted rating'].to_dict() # print(sorteddict.keys()) # return random.sample(BUSINESSES[city], n) recommendlist = list() cities = list() cities.append(city) for i in sorteddict.keys(): recommendlist.append(get_business(city, i)) # print(random.choice(list(load(cities, "business").values())[0])["business_id"]) recommendlist.append( get_business( city, random.choice(list(load(cities, "business").values())[0])["business_id"])) # print(recommendlist) # print(predicted_genres) return recommendlist
def business(city, id): """Business page, shows the business, reviews and 10 recommendations.""" # Get current user if logged in user = session.get("user") user_id = user["user_id"] if user else None # Get business by city and business_id business = data.get_business(city.lower(), id) # Grab reviews reviews = data.get_reviews(city=business["city"].lower(), business_id=business["business_id"]) # Get 10 recommendations recommendations = recommender.recommend3(user_id=user_id, business_id=id, city=business["city"].lower(), n=10) # Render return render_template("business.html", business=business, recommendations=recommendations, reviews=reviews, user=user)
def recommend(user_id=None, business_id=None, city=None, n=10): """ Returns n recommendations as a list of dicts. Optionally takes in a user_id, business_id and/or city. A recommendation is a dictionary in the form of: { business_id:str stars:str name:str city:str adress:str } """ # Recommendation for business page if business_id: print("Business id found!", business_id) a = pivot_categories(extract_genres(make_business_matrix(city))) df_similarity_genres = create_similarity_matrix_categories(a) df_categories = df_similarity_genres.sort_values( by=[business_id], ascending=False)[business_id].head(11) print(df_categories) # Get top ten businesses with info top_ten = [get_business(city, i) for i in df_categories.index.values] top_ten.pop(0) # MSE testing _, df_ratings_test = split_data( make_rating_matrix(), d=0.9) utility_matrix = pivot_ratings(make_rating_matrix()) predicted_ratings = predict_ratings(df_similarity_genres, utility_matrix, df_ratings_test[[ 'user_id', 'business_id', 'rating']]) mse_genres = mse(predicted_ratings) print("MSE content based:", mse_genres) return top_ten # Recommendation for home page of user if not city: city = random.choice(CITIES) # If user is not logged in return random business if not user_id: return random.sample(BUSINESSES[city], n) utility_matrix = pivot_ratings(make_rating_matrix()) similarity = create_similarity_matrix_cosine(utility_matrix) utility_matrix_copy = utility_matrix.copy() # MSE testing _, df_ratings_test = split_data( make_rating_matrix(), d=0.9) df_predicted_cf_item_based = predict_ratings( similarity, utility_matrix, df_ratings_test[['user_id', 'business_id', 'rating']]) mse_item = mse(df_predicted_cf_item_based) print("MSE item based:", mse_item) # Predict all ratings all_business_ids = [business["business_id"] for business in BUSINESSES[city]] all_user_ids = [] for _, users in USERS.items(): for user in users: all_user_ids.append(user['user_id']) for user in all_user_ids: for business in all_business_ids: utility_matrix_copy[user][business] = predict_ids( similarity, utility_matrix, user, business) # Remove already rated business already_rated = utility_matrix[user_id].dropna() df = utility_matrix_copy.sort_values( by=[user_id], ascending=False)[user_id].drop(already_rated.index.values).head(10) # Get top ten businesses with info top_ten = [get_business(city, i) for i in df.index.values] return top_ten
def recommend(user_id=None, business_id=None, city=None, n=10): """ Returns n recommendations as a list of dicts. Optionally takes in a user_id, business_id and/or city. A recommendation is a dictionary in the form of: { business_id:str stars:str name:str city:str adress:str } """ if business_id: df_bus = pd.DataFrame(BUSINESSES[city]) df_bus_cats = extract_genres(df_bus) df_bus_utility = pivot_genres(df_bus_cats) df_similarity_categories = create_similarity_matrix_categories( df_bus_utility) topneighborhood = select_neighborhood( df_similarity_categories, df_bus_utility, business_id).drop(business_id).sort_values( ascending=False)[:n].index gesorteerde_topneighborhood = list( df_bus[df_bus['business_id'].isin(topneighborhood)].sort_values( by='stars', ascending=False).transpose().to_dict().values()) return gesorteerde_topneighborhood # selecteer stad op basis van gebruiker if user_id: for stad in USERS: for user in USERS[stad]: if user['user_id'] == user_id: city = stad break # anders kiezen we een random stad if not city: city = random.choice(CITIES) df_bus = pd.DataFrame(BUSINESSES[city]) df_bus_cats = extract_genres(df_bus) df_rev = pd.DataFrame(REVIEWS[city]) df_bus_utility = pivot_genres(df_bus_cats) dict_cat = { category: df_bus_utility[category].sum() for category in df_bus_utility } top5_cats = sorted(dict_cat.items(), key=lambda x: x[1])[-5:] names = [top5_cats[i][0] for i in range(len(top5_cats))] df_top5_cat_names = df_bus_utility[names].loc[~( df_bus_utility[names] == 0).all(axis=1)] lijst_business_id = df_top5_cat_names.index.values.tolist() df_rev_with_business = df_rev[df_rev['business_id'].isin( lijst_business_id)] df_rev_with_business = df_rev_with_business.drop( columns=['cool', 'date', 'funny', 'text', 'useful', 'user_id' ]).sort_values(by=['business_id']) ratings = la_place(df_rev_with_business).sort_values('lapscore', ascending=False) top_businesses = list(ratings['business_id'][:10]) top_businesses topjes = [] for bedrijf in top_businesses: topjes.append(get_business(city, bedrijf)) return topjes
def recommend(user_id=None, business_id=None, city=None, n=10): """ Returns n recommendations as a list of dicts. Optionally takes in a user_id, business_id and/or city. A recommendation is a dictionary in the form of: { business_id:str stars:str name:str city:str adress:str } """ if not city: city = random.choice(CITIES) # business_id = random.choice(BUSINESSES[city])['business_id'] # user_id = random.choice(USERS[city])['user_id'] businesses_to_recommend = [] if user_id and business_id: # If we have both a user id and a business id, we can find the # businesses with the highest predicted rating that are still somewhat # similar to the given business. similarity_list = [ (cos_similarity(data.get_business(city, business_id), b2, city), b2) for b2 in list(BUSINESSES[city]) if b2 != data.get_business(city, business_id) ] neighboorhood = [ tuple for tuple in similarity_list if tuple[0] > 0.0 and tuple[1]['business_id'] != business_id ] predicted_ratings = sorted( [(predict_rating(user_id, tuple[1]['business_id'], city), tuple[1]) for tuple in neighboorhood], key=lambda tup: tup[0], reverse=True) similar_businesses = [tuple[1] for tuple in predicted_ratings][:n // 2] # Fill out the rest of the values with first businesses that share a # category with the given business, or if too few of those can be found # simply use the highest rated businesses. already_found_businesses = similar_businesses.copy() already_found_businesses.append(data.get_business(city, business_id)) already_found_businesses = [ business['business_id'] for business in already_found_businesses ] categorically_similar_businesses = find_best_businesses_by_category( user_id, data.get_business(city, business_id)['categories'].split(', '), already_found_businesses, city, n - len(similar_businesses)) if len(similar_businesses) + len(categorically_similar_businesses) < n: categorically_similar_businesses.append( sorted([b for b in list(BUSINESSES[city])], key=lambda val: val['stars'], reverse=True)[:n - len(similar_businesses) - len(categorically_similar_businesses)]) for business in categorically_similar_businesses: similar_businesses.append(business) # Randomly shuffle the businesses, to intersperse the two types of # suggestions. random.shuffle(similar_businesses) businesses_to_recommend = similar_businesses elif business_id: # If we only have a business id, find the most similar businesses based # on user reviews and add the businesses that share the most categories. # If not enough businesses can be found that way, simply use well rated # ones. similarity_list = [ (cos_similarity(data.get_business(city, business_id), b2, city), b2) for b2 in list(BUSINESSES[city]) if b2 != data.get_business(city, business_id) ] neighboorhood = [tuple for tuple in similarity_list if tuple[0] > 0.0] similar_businesses = [ business[1] for business in sorted( neighboorhood, key=lambda tup: tup[0], reverse=True) ][:n // 2] categorically_similar_businesses = find_businesses_by_category( data.get_business(city, business_id)['categories'].split(', '), city)[:n - len(similar_businesses) + 1] categorically_similar_businesses = [ val[0] for val in categorically_similar_businesses if val[0]['business_id'] != business_id ] if len(similar_businesses) + len(categorically_similar_businesses) < n: for val in sorted( [b for b in list(BUSINESSES[city])], key=lambda val: val['stars'], reverse=True)[:n - len(similar_businesses) - len(categorically_similar_businesses)]: categorically_similar_businesses.append(val) for business in categorically_similar_businesses: print(business) similar_businesses.append(business) # Randomly shuffle the businesses, to intersperse the two types of # suggestions. random.shuffle(similar_businesses) businesses_to_recommend = similar_businesses elif user_id: # Predict the rating that the givne user will give each business, then # return the businesses with the highest predicted ratings. predicted_ratings = [(predict_rating(user_id, business['business_id'], city), business) for business in list(BUSINESSES[city])] sorted_ratings = sorted(predicted_ratings, key=lambda tup: tup[0], reverse=True) businesses_to_recommend = [val[1] for val in sorted_ratings][:n] else: businesses_to_recommend = sorted([b for b in list(BUSINESSES[city])], key=lambda val: val['stars'], reverse=True)[:n] # Restructure the found businesses into correct recommendations. recommendations = [] for business in businesses_to_recommend: recommendation = {} recommendation['business_id'] = business['business_id'] recommendation['stars'] = str(business['stars']) recommendation['name'] = business['name'] recommendation['city'] = business['city'] recommendation['adress'] = business['address'] recommendations.append(recommendation) return recommendations
def recommend(user_id=None, business_id=None, city=None, n=10, scenario=None): """ Returns n recommendations as a list of dicts. Optionally takes in a user_id, business_id and/or city. A recommendation is a dictionary in the form of: { business_id:str stars:str name:str city:str adress:str } """ if not city: city = random.choice(CITIES) if scenario == 1: print("start recommending scenario 1") # create list with businesses which have more than 10 reviews valid_businesses = [] for var in BUSINESSES: for x in BUSINESSES[var]: if x['review_count'] > 10: valid_businesses.append(x) # get all user ids all_user_ids = [] for stad in USERS: for user in USERS[stad]: all_user_ids.append(user['user_id']) # print(all_user_ids) elif scenario == 2: print("start recommending scenario 2") # create list with how many reviews user has placed in each city review_in_city = [] for city in REVIEWS: reviews_per_city = REVIEWS[city] for review in reviews_per_city: if user_id in review.values(): review_in_city.append(city) # check what city is most reviewed and where user comes from most_reviewed_city = collections.Counter( review_in_city).most_common()[0][0] print("city determined", most_reviewed_city) utility_matrix = data.create_frame(most_reviewed_city) print(" utility matrix created") print("utility", utility_matrix) # ---------------------------------------------------------------------------------------------- # for testing purposes re_frame = utility_matrix.reset_index() re_frame = re_frame.melt(id_vars=['index'], var_name='users', value_name='rating') print("reframed", list(re_frame.columns.values)) print(re_frame) train, test = train_test_split(re_frame, test_size=0.2) print(len(train), len(test)) train = train.dropna() test = test.dropna() print(test) kevin_test = test utility_matrix_train = data.pivot_ratings(train) mean_center_train = data.mean_center_columns(utility_matrix_train) sim_cos_train = data.create_similarity_matrix_cosine(mean_center_train) predicted_ratings = data.predict_ratings_item_based( sim_cos_train, utility_matrix_train, kevin_test) predicted_ratings = predicted_ratings[ predicted_ratings['predicted rating'] != 0] random_test = predicted_ratings.copy() random_test['predicted rating'] = np.random.uniform( 0.5, 5.0, random_test.shape[0]) print(predicted_ratings) print("RANDOM", random_test) print(data.mse(predicted_ratings)) print(data.mse(random_test)) # ---------------------------------------------------------------------------------------------- not_seen = [] for index, row in utility_matrix.iterrows(): if math.isnan(row[user_id]): not_seen.append(index) print("determined what user hasn't seen yet") print(not_seen) mean_centered_matrix = data.mean_center_columns(utility_matrix) print("matrix mean centered") similarity = data.create_similarity_matrix_cosine(mean_centered_matrix) print("similarity", similarity) pred = {} for item in not_seen: neighborhood = data.select_neighborhood(similarity, utility_matrix, user_id, item) rating_prediction = data.weighted_mean(neighborhood, utility_matrix, user_id) if rating_prediction != 0: pred[item] = rating_prediction print(pred, len(pred)) sorted_pred_rating = sorted(pred.items(), key=lambda kv: kv[1], reverse=True) print(sorted_pred_rating) item_col_rec = [] for prediction in sorted_pred_rating: item_col_rec.append(prediction[0]) print(item_col_rec) recom = [] if len(item_col_rec) >= 10: for business in item_col_rec[0:10]: dic_business = data.get_business(most_reviewed_city, business) recom.append(dic_business) return recom elif scenario == 3: print("start recommending scenario 3") # get categories from selected business business_data = data.get_business(city, business_id) categories = business_data['categories'] # turn string of categories into list categories_split = categories.split(", ") # take subset based on city bus_city = BUSINESSES[city] # for each business in the selected city, link to categorie and placed in dict # dict contains business_id : categories dic_cat = {} for bus in bus_city: cat_list = bus['categories'].split(", ") dic_cat[bus['business_id']] = cat_list # calculate similarity between selected business and all other businesses in city sim_cat = {} for key in dic_cat.keys(): # devide categories overlap by maximum amount of categories overlap = len(set(dic_cat[key]) & set(categories_split)) most_cat = max(len(dic_cat[key]), len(categories_split)) similarity = overlap / most_cat # place similarity in dict if similarity is not zero if similarity != 0: sim_cat[key] = similarity # transform dict to list of tuples and sort by similarity sorted_sim = sorted(sim_cat.items(), key=lambda kv: kv[1], reverse=True) # group similarities if similarity values are the same grouped_sim_list = [] equal = [] for i in range(len(sorted_sim) - 1): if sorted_sim[i][1] == sorted_sim[i + 1][1]: equal.append(sorted_sim[i]) if sorted_sim[i][1] != sorted_sim[i + 1][1]: equal.append(sorted_sim[i]) grouped_sim_list.append(equal) equal = [] # create list with dicts containing rating and review count stars_and_reviews = [] for group in grouped_sim_list: group = dict(group) for key in group: key_data = data.get_business(city, key) group[key] = [key_data['stars'], key_data['review_count']] stars_and_reviews.append(group) # sort on stars, if stars are same value sort on review count final_list = [] for equals in stars_and_reviews: sorted_items = sorted(equals.items(), key=lambda kv: kv[1], reverse=True) # put all business id's in final list except for selected business for business in sorted_items: if business[0] != business_id: final_list.append(business[0]) # get all information for top10 recommendation = [] # check if length of list is bigger than or is 10, if so append top 10 businesses to recommendations if len(final_list) >= 10: for business in final_list[0:10]: dic_business = data.get_business(city, business) recommendation.append(dic_business) # for testing purposes. Check the terminal print("\n") print( "NAME | SIMILARITY | AVERAGE RATING | REVIEW COUNT | CITY \n") for i in recommendation: print(i['name'], " | ", "similarity = ", sim_cat[i['business_id']], " | ", "Average rating =", i['stars'], " | ", "Review count =", i['review_count'], " | ", "City =", i['city'], "\n") else: # if length of final_list is less than 10, first append all businesses we have got to recommandend for business in final_list: dic_business = data.get_business(city, business) recommendation.append(dic_business) # for testing purposes. Check the terminal print("\n") print( "NAME | SIMILARITY | AVERAGE RATING | REVIEW COUNT | CITY \n") for i in recommendation: print(i['name'], " | ", "similarity = ", sim_cat[i['business_id']], " | ", "Average rating =", i['stars'], " | ", "Review count =", i['review_count'], " | ", "City =", i['city'], "\n") already_in_rec = len(final_list) # check how much recommandations are needed to complete the top 10 needed_rec = 10 - len(final_list) # create a list with all cities temporary_cities_list = CITIES while True: # pick a random city out of the list of all cities stad = random.choice(temporary_cities_list) # check if random picked city is equal to the current city # check if there are enough cities in the random picked city to fill the top 10 # if so, break out of for loop if stad != city and len(BUSINESSES[stad]) >= needed_rec: city = stad break bus_city = BUSINESSES[city] # for each business in the selected city, link to categorie and placed in dict # dict contains business_id : categories dic_cat = {} for bus in bus_city: cat_list = bus['categories'].split(", ") dic_cat[bus['business_id']] = cat_list # calculate similarity between selected business and all other businesses in city sim_cat = {} for key in dic_cat.keys(): # devide categories overlap by maximum amount of categories overlap = len(set(dic_cat[key]) & set(categories_split)) most_cat = max(len(dic_cat[key]), len(categories_split)) similarity = overlap / most_cat # place similarity in dict sim_cat[key] = similarity # transform dict to list of tuples and sort by similarity sorted_sim = sorted(sim_cat.items(), key=lambda kv: kv[1], reverse=True) # group similarities if similarity values are the same grouped_sim_list = [] equal = [] for i in range(len(sorted_sim) - 1): if sorted_sim[i][1] == sorted_sim[i + 1][1]: equal.append(sorted_sim[i]) if sorted_sim[i][1] != sorted_sim[i + 1][1]: equal.append(sorted_sim[i]) grouped_sim_list.append(equal) equal = [] # create list with dicts containing rating and review count stars_and_reviews = [] for group in grouped_sim_list: group = dict(group) for key in group: key_data = data.get_business(city, key) group[key] = [key_data['stars'], key_data['review_count']] stars_and_reviews.append(group) # sort on stars, if stars are same value sort on review count final_list = [] for equals in stars_and_reviews: sorted_items = sorted(equals.items(), key=lambda kv: kv[1], reverse=True) # put all business id's in final list except for selected business for business in sorted_items: if business[0] != business_id: final_list.append(business[0]) # get all data from businesses and put it in the recommandations for business in final_list[0:needed_rec]: dic_business = data.get_business(city, business) recommendation.append(dic_business) # for testing purposes. Check the terminal for i in recommendation[already_in_rec:]: print(i['name'], " | ", "similarity = ", sim_cat[i['business_id']], " | ", "Average rating =", i['stars'], " | ", "Review count =", i['review_count'], " | ", "City =", i['city'], "\n") return recommendation elif scenario == 4: print("start recommending scenario 4") return random.sample(BUSINESSES[city], n)