Esempio n. 1
0
def predict_rating_naive(user_id, business_id, city):
    """
    Takes a userId describing a user, a dict describing a business and a dict
    describing a city. Returns a predicted rating for the given business in the
    given city by the given user.
    """
    businesses = BUSINESSES[city]
    reviews = REVIEWS[city]
    user_reviewed_businesses = []
    user_reviews = {}

    for review in reviews:
        if review['user_id'] == user_id:
            user_reviewed_businesses.append(review['business_id'])
            user_reviews[review['business_id']] = review['stars']

    total_rating = 0.0
    total_weight = 0.0
    for b2 in businesses:
        shared_similarity = cos_similarity(
            data.get_business(city, business_id), b2, city)
        if shared_similarity > 0.0:
            if business_id in user_reviewed_businesses:
                total_rating += user_reviews[business_id]
                total_weight += shared_similarity
            else:
                total_rating += data.get_business(city, business_id)['stars']
                total_weight += shared_similarity

    return total_rating / total_weight
Esempio n. 2
0
def recommend(user_id=None, business_id=None, city=None, n=10):
    """
    Returns n recommendations as a list of dicts.
    Optionally takes in a user_id, business_id and/or city.
    A recommendation is a dictionary in the form of:
        {
            business_id:str
            stars:str
            name:str
            city:str
            adress:str
        }
    """
    # if not user_id:
    #     user_id = "DAIpUGIsY71noX0wNuc27w"
    if not city:
        city = random.choice(CITIES)
    # if not business_id:
    #     business_id = "XieY4CeZOw9bMBk965BNTw"
    # print(user_id)

    dfratings = dfmakersuited(city, user_id)
    dfutility = pivot_genres(dfmakercategories())
    dfsimilarity = create_similarity_matrix_categories(dfutility)
    dfutilityratings = pivot_ratings(dfratings)
    predicted_genres = predict_ratings(
        dfsimilarity, dfutilityratings,
        dfratings[['user_id', 'business_id', 'rating']])
    sortedpredicted = predicted_genres.sort_values(by='predicted rating',
                                                   ascending=False).iloc[0:n]
    sorteddict = sortedpredicted.set_index(
        'business_id')['predicted rating'].to_dict()
    # print(sorteddict.keys())
    # return random.sample(BUSINESSES[city], n)
    recommendlist = list()
    cities = list()
    cities.append(city)
    for i in sorteddict.keys():
        recommendlist.append(get_business(city, i))
    # print(random.choice(list(load(cities, "business").values())[0])["business_id"])
    recommendlist.append(
        get_business(
            city,
            random.choice(list(load(cities,
                                    "business").values())[0])["business_id"]))
    # print(recommendlist)
    # print(predicted_genres)
    return recommendlist
Esempio n. 3
0
def business(city, id):
    """Business page, shows the business, reviews and 10 recommendations."""
    # Get current user if logged in
    user = session.get("user")
    user_id = user["user_id"] if user else None

    # Get business by city and business_id
    business = data.get_business(city.lower(), id)

    # Grab reviews
    reviews = data.get_reviews(city=business["city"].lower(),
                               business_id=business["business_id"])

    # Get 10 recommendations
    recommendations = recommender.recommend3(user_id=user_id,
                                             business_id=id,
                                             city=business["city"].lower(),
                                             n=10)

    # Render
    return render_template("business.html",
                           business=business,
                           recommendations=recommendations,
                           reviews=reviews,
                           user=user)
Esempio n. 4
0
def recommend(user_id=None, business_id=None, city=None, n=10):
    """
    Returns n recommendations as a list of dicts.
    Optionally takes in a user_id, business_id and/or city.
    A recommendation is a dictionary in the form of:
        {
            business_id:str
            stars:str
            name:str
            city:str
            adress:str
        }
    """

    # Recommendation for business page
    if business_id:
        print("Business id found!", business_id)
        a = pivot_categories(extract_genres(make_business_matrix(city)))
        df_similarity_genres = create_similarity_matrix_categories(a)
        df_categories = df_similarity_genres.sort_values(
            by=[business_id], ascending=False)[business_id].head(11)
        print(df_categories)
        # Get top ten businesses with info
        top_ten = [get_business(city, i) for i in df_categories.index.values]
        top_ten.pop(0)

        # MSE testing
        _, df_ratings_test = split_data(
            make_rating_matrix(), d=0.9)
        utility_matrix = pivot_ratings(make_rating_matrix())
        predicted_ratings = predict_ratings(df_similarity_genres, utility_matrix, df_ratings_test[[
                                            'user_id', 'business_id', 'rating']])
        mse_genres = mse(predicted_ratings)
        print("MSE content based:", mse_genres)

        return top_ten

    # Recommendation for home page of user
    if not city:
        city = random.choice(CITIES)

    # If user is not logged in return random business
    if not user_id:
        return random.sample(BUSINESSES[city], n)

    utility_matrix = pivot_ratings(make_rating_matrix())
    similarity = create_similarity_matrix_cosine(utility_matrix)
    utility_matrix_copy = utility_matrix.copy()

    # MSE testing
    _, df_ratings_test = split_data(
        make_rating_matrix(), d=0.9)
    df_predicted_cf_item_based = predict_ratings(
        similarity, utility_matrix, df_ratings_test[['user_id', 'business_id', 'rating']])
    mse_item = mse(df_predicted_cf_item_based)
    print("MSE item based:", mse_item)

    # Predict all ratings
    all_business_ids = [business["business_id"]
                        for business in BUSINESSES[city]]
    all_user_ids = []
    for _, users in USERS.items():
        for user in users:
            all_user_ids.append(user['user_id'])

    for user in all_user_ids:
        for business in all_business_ids:
            utility_matrix_copy[user][business] = predict_ids(
                similarity, utility_matrix, user, business)

    # Remove already rated business
    already_rated = utility_matrix[user_id].dropna()
    df = utility_matrix_copy.sort_values(
        by=[user_id], ascending=False)[user_id].drop(already_rated.index.values).head(10)

    # Get top ten businesses with info
    top_ten = [get_business(city, i) for i in df.index.values]

    return top_ten
Esempio n. 5
0
def recommend(user_id=None, business_id=None, city=None, n=10):
    """
    Returns n recommendations as a list of dicts.
    Optionally takes in a user_id, business_id and/or city.
    A recommendation is a dictionary in the form of:
        {
            business_id:str
            stars:str
            name:str
            city:str
            adress:str
        }
    """
    if business_id:
        df_bus = pd.DataFrame(BUSINESSES[city])
        df_bus_cats = extract_genres(df_bus)
        df_bus_utility = pivot_genres(df_bus_cats)
        df_similarity_categories = create_similarity_matrix_categories(
            df_bus_utility)
        topneighborhood = select_neighborhood(
            df_similarity_categories, df_bus_utility,
            business_id).drop(business_id).sort_values(
                ascending=False)[:n].index
        gesorteerde_topneighborhood = list(
            df_bus[df_bus['business_id'].isin(topneighborhood)].sort_values(
                by='stars', ascending=False).transpose().to_dict().values())
        return gesorteerde_topneighborhood

    # selecteer stad op basis van gebruiker
    if user_id:
        for stad in USERS:
            for user in USERS[stad]:
                if user['user_id'] == user_id:
                    city = stad
                    break

    # anders kiezen we een random stad

    if not city:
        city = random.choice(CITIES)

    df_bus = pd.DataFrame(BUSINESSES[city])
    df_bus_cats = extract_genres(df_bus)
    df_rev = pd.DataFrame(REVIEWS[city])
    df_bus_utility = pivot_genres(df_bus_cats)

    dict_cat = {
        category: df_bus_utility[category].sum()
        for category in df_bus_utility
    }
    top5_cats = sorted(dict_cat.items(), key=lambda x: x[1])[-5:]

    names = [top5_cats[i][0] for i in range(len(top5_cats))]

    df_top5_cat_names = df_bus_utility[names].loc[~(
        df_bus_utility[names] == 0).all(axis=1)]
    lijst_business_id = df_top5_cat_names.index.values.tolist()

    df_rev_with_business = df_rev[df_rev['business_id'].isin(
        lijst_business_id)]
    df_rev_with_business = df_rev_with_business.drop(
        columns=['cool', 'date', 'funny', 'text', 'useful', 'user_id'
                 ]).sort_values(by=['business_id'])
    ratings = la_place(df_rev_with_business).sort_values('lapscore',
                                                         ascending=False)
    top_businesses = list(ratings['business_id'][:10])
    top_businesses

    topjes = []
    for bedrijf in top_businesses:
        topjes.append(get_business(city, bedrijf))

    return topjes
Esempio n. 6
0
def recommend(user_id=None, business_id=None, city=None, n=10):
    """
    Returns n recommendations as a list of dicts.
    Optionally takes in a user_id, business_id and/or city.
    A recommendation is a dictionary in the form of:
        {
            business_id:str
            stars:str
            name:str
            city:str
            adress:str
        }
    """
    if not city:
        city = random.choice(CITIES)

    # business_id = random.choice(BUSINESSES[city])['business_id']
    # user_id = random.choice(USERS[city])['user_id']

    businesses_to_recommend = []

    if user_id and business_id:
        # If we have both a user id and a business id, we can find the
        # businesses with the highest predicted rating that are still somewhat
        # similar to the given business.
        similarity_list = [
            (cos_similarity(data.get_business(city, business_id), b2,
                            city), b2) for b2 in list(BUSINESSES[city])
            if b2 != data.get_business(city, business_id)
        ]
        neighboorhood = [
            tuple for tuple in similarity_list
            if tuple[0] > 0.0 and tuple[1]['business_id'] != business_id
        ]
        predicted_ratings = sorted(
            [(predict_rating(user_id, tuple[1]['business_id'], city), tuple[1])
             for tuple in neighboorhood],
            key=lambda tup: tup[0],
            reverse=True)
        similar_businesses = [tuple[1] for tuple in predicted_ratings][:n // 2]

        # Fill out the rest of the values with first businesses that share a
        # category with the given business, or if too few of those can be found
        # simply use the highest rated businesses.
        already_found_businesses = similar_businesses.copy()
        already_found_businesses.append(data.get_business(city, business_id))
        already_found_businesses = [
            business['business_id'] for business in already_found_businesses
        ]
        categorically_similar_businesses = find_best_businesses_by_category(
            user_id,
            data.get_business(city, business_id)['categories'].split(', '),
            already_found_businesses, city, n - len(similar_businesses))
        if len(similar_businesses) + len(categorically_similar_businesses) < n:
            categorically_similar_businesses.append(
                sorted([b for b in list(BUSINESSES[city])],
                       key=lambda val: val['stars'],
                       reverse=True)[:n - len(similar_businesses) -
                                     len(categorically_similar_businesses)])
        for business in categorically_similar_businesses:
            similar_businesses.append(business)

        # Randomly shuffle the businesses, to intersperse the two types of
        # suggestions.
        random.shuffle(similar_businesses)
        businesses_to_recommend = similar_businesses
    elif business_id:
        # If we only have a business id, find the most similar businesses based
        # on user reviews and add the businesses that share the most categories.
        # If not enough businesses can be found that way, simply use well rated
        # ones.
        similarity_list = [
            (cos_similarity(data.get_business(city, business_id), b2,
                            city), b2) for b2 in list(BUSINESSES[city])
            if b2 != data.get_business(city, business_id)
        ]
        neighboorhood = [tuple for tuple in similarity_list if tuple[0] > 0.0]
        similar_businesses = [
            business[1] for business in sorted(
                neighboorhood, key=lambda tup: tup[0], reverse=True)
        ][:n // 2]
        categorically_similar_businesses = find_businesses_by_category(
            data.get_business(city, business_id)['categories'].split(', '),
            city)[:n - len(similar_businesses) + 1]
        categorically_similar_businesses = [
            val[0] for val in categorically_similar_businesses
            if val[0]['business_id'] != business_id
        ]
        if len(similar_businesses) + len(categorically_similar_businesses) < n:
            for val in sorted(
                [b for b in list(BUSINESSES[city])],
                    key=lambda val: val['stars'],
                    reverse=True)[:n - len(similar_businesses) -
                                  len(categorically_similar_businesses)]:
                categorically_similar_businesses.append(val)

        for business in categorically_similar_businesses:
            print(business)
            similar_businesses.append(business)

        # Randomly shuffle the businesses, to intersperse the two types of
        # suggestions.
        random.shuffle(similar_businesses)
        businesses_to_recommend = similar_businesses
    elif user_id:
        # Predict the rating that the givne user will give each business, then
        # return the businesses with the highest predicted ratings.
        predicted_ratings = [(predict_rating(user_id, business['business_id'],
                                             city), business)
                             for business in list(BUSINESSES[city])]
        sorted_ratings = sorted(predicted_ratings,
                                key=lambda tup: tup[0],
                                reverse=True)
        businesses_to_recommend = [val[1] for val in sorted_ratings][:n]
    else:
        businesses_to_recommend = sorted([b for b in list(BUSINESSES[city])],
                                         key=lambda val: val['stars'],
                                         reverse=True)[:n]

    # Restructure the found businesses into correct recommendations.
    recommendations = []
    for business in businesses_to_recommend:
        recommendation = {}
        recommendation['business_id'] = business['business_id']
        recommendation['stars'] = str(business['stars'])
        recommendation['name'] = business['name']
        recommendation['city'] = business['city']
        recommendation['adress'] = business['address']
        recommendations.append(recommendation)

    return recommendations
Esempio n. 7
0
def recommend(user_id=None, business_id=None, city=None, n=10, scenario=None):
    """
    Returns n recommendations as a list of dicts.
    Optionally takes in a user_id, business_id and/or city.
    A recommendation is a dictionary in the form of:
        {
            business_id:str
            stars:str
            name:str
            city:str
            adress:str
        }
    """
    if not city:
        city = random.choice(CITIES)

    if scenario == 1:
        print("start recommending scenario 1")

        # create list with businesses which have more than 10 reviews
        valid_businesses = []
        for var in BUSINESSES:
            for x in BUSINESSES[var]:
                if x['review_count'] > 10:
                    valid_businesses.append(x)

        # get all user ids
        all_user_ids = []
        for stad in USERS:
            for user in USERS[stad]:
                all_user_ids.append(user['user_id'])

        # print(all_user_ids)

    elif scenario == 2:
        print("start recommending scenario 2")

        # create list with how many reviews user has placed in each city
        review_in_city = []
        for city in REVIEWS:
            reviews_per_city = REVIEWS[city]
            for review in reviews_per_city:
                if user_id in review.values():
                    review_in_city.append(city)

        # check what city is most reviewed and where user comes from
        most_reviewed_city = collections.Counter(
            review_in_city).most_common()[0][0]
        print("city determined", most_reviewed_city)

        utility_matrix = data.create_frame(most_reviewed_city)
        print(" utility matrix created")
        print("utility", utility_matrix)

        # ----------------------------------------------------------------------------------------------
        #  for testing purposes
        re_frame = utility_matrix.reset_index()
        re_frame = re_frame.melt(id_vars=['index'],
                                 var_name='users',
                                 value_name='rating')
        print("reframed", list(re_frame.columns.values))
        print(re_frame)

        train, test = train_test_split(re_frame, test_size=0.2)
        print(len(train), len(test))
        train = train.dropna()
        test = test.dropna()
        print(test)
        kevin_test = test

        utility_matrix_train = data.pivot_ratings(train)
        mean_center_train = data.mean_center_columns(utility_matrix_train)
        sim_cos_train = data.create_similarity_matrix_cosine(mean_center_train)

        predicted_ratings = data.predict_ratings_item_based(
            sim_cos_train, utility_matrix_train, kevin_test)
        predicted_ratings = predicted_ratings[
            predicted_ratings['predicted rating'] != 0]

        random_test = predicted_ratings.copy()
        random_test['predicted rating'] = np.random.uniform(
            0.5, 5.0, random_test.shape[0])

        print(predicted_ratings)
        print("RANDOM", random_test)
        print(data.mse(predicted_ratings))
        print(data.mse(random_test))

        # ----------------------------------------------------------------------------------------------
        not_seen = []
        for index, row in utility_matrix.iterrows():
            if math.isnan(row[user_id]):
                not_seen.append(index)
        print("determined what user hasn't seen yet")
        print(not_seen)

        mean_centered_matrix = data.mean_center_columns(utility_matrix)
        print("matrix mean centered")

        similarity = data.create_similarity_matrix_cosine(mean_centered_matrix)
        print("similarity", similarity)

        pred = {}
        for item in not_seen:
            neighborhood = data.select_neighborhood(similarity, utility_matrix,
                                                    user_id, item)
            rating_prediction = data.weighted_mean(neighborhood,
                                                   utility_matrix, user_id)
            if rating_prediction != 0:
                pred[item] = rating_prediction
        print(pred, len(pred))

        sorted_pred_rating = sorted(pred.items(),
                                    key=lambda kv: kv[1],
                                    reverse=True)
        print(sorted_pred_rating)

        item_col_rec = []
        for prediction in sorted_pred_rating:
            item_col_rec.append(prediction[0])
        print(item_col_rec)

        recom = []
        if len(item_col_rec) >= 10:
            for business in item_col_rec[0:10]:
                dic_business = data.get_business(most_reviewed_city, business)
                recom.append(dic_business)
        return recom

    elif scenario == 3:
        print("start recommending scenario 3")

        # get categories from selected business
        business_data = data.get_business(city, business_id)
        categories = business_data['categories']

        # turn string of categories into list
        categories_split = categories.split(", ")

        # take subset based on city
        bus_city = BUSINESSES[city]

        # for each business in the selected city, link to categorie and placed in dict
        # dict contains business_id : categories
        dic_cat = {}
        for bus in bus_city:
            cat_list = bus['categories'].split(", ")
            dic_cat[bus['business_id']] = cat_list

        # calculate similarity between selected business and all other businesses in city
        sim_cat = {}
        for key in dic_cat.keys():

            # devide categories overlap by maximum amount of categories
            overlap = len(set(dic_cat[key]) & set(categories_split))
            most_cat = max(len(dic_cat[key]), len(categories_split))
            similarity = overlap / most_cat

            # place similarity in dict if similarity is not zero
            if similarity != 0:
                sim_cat[key] = similarity

        # transform dict to list of tuples and sort by similarity
        sorted_sim = sorted(sim_cat.items(),
                            key=lambda kv: kv[1],
                            reverse=True)

        # group similarities if similarity values are the same
        grouped_sim_list = []
        equal = []
        for i in range(len(sorted_sim) - 1):
            if sorted_sim[i][1] == sorted_sim[i + 1][1]:
                equal.append(sorted_sim[i])
            if sorted_sim[i][1] != sorted_sim[i + 1][1]:
                equal.append(sorted_sim[i])
                grouped_sim_list.append(equal)
                equal = []

        # create list with dicts containing rating and review count
        stars_and_reviews = []
        for group in grouped_sim_list:
            group = dict(group)
            for key in group:
                key_data = data.get_business(city, key)
                group[key] = [key_data['stars'], key_data['review_count']]
            stars_and_reviews.append(group)

        # sort on stars, if stars are same value sort on review count
        final_list = []
        for equals in stars_and_reviews:
            sorted_items = sorted(equals.items(),
                                  key=lambda kv: kv[1],
                                  reverse=True)

            # put all business id's in final list except for selected business
            for business in sorted_items:
                if business[0] != business_id:
                    final_list.append(business[0])

        # get all information for top10
        recommendation = []

        # check if length of list is bigger than or is 10, if so append top 10 businesses to recommendations
        if len(final_list) >= 10:
            for business in final_list[0:10]:
                dic_business = data.get_business(city, business)
                recommendation.append(dic_business)

            # for testing purposes. Check the terminal
            print("\n")
            print(
                "NAME | SIMILARITY | AVERAGE RATING | REVIEW COUNT | CITY \n")
            for i in recommendation:
                print(i['name'], " | ", "similarity = ",
                      sim_cat[i['business_id']], " | ", "Average rating =",
                      i['stars'], " | ", "Review count =", i['review_count'],
                      " | ", "City =", i['city'], "\n")

        else:

            # if length of final_list is less than 10, first append all businesses we have got to recommandend
            for business in final_list:
                dic_business = data.get_business(city, business)
                recommendation.append(dic_business)

            # for testing purposes. Check the terminal
            print("\n")
            print(
                "NAME | SIMILARITY | AVERAGE RATING | REVIEW COUNT | CITY \n")
            for i in recommendation:
                print(i['name'], " | ", "similarity = ",
                      sim_cat[i['business_id']], " | ", "Average rating =",
                      i['stars'], " | ", "Review count =", i['review_count'],
                      " | ", "City =", i['city'], "\n")
            already_in_rec = len(final_list)

            # check how much recommandations are needed to complete the top 10
            needed_rec = 10 - len(final_list)

            # create a list with all cities
            temporary_cities_list = CITIES

            while True:

                # pick a random city out of the list of all cities
                stad = random.choice(temporary_cities_list)

                # check if random picked city is equal to the current city
                # check if there are enough cities in the random picked city to fill the top 10
                # if so, break out of for loop
                if stad != city and len(BUSINESSES[stad]) >= needed_rec:
                    city = stad
                    break

            bus_city = BUSINESSES[city]

            # for each business in the selected city, link to categorie and placed in dict
            # dict contains business_id : categories
            dic_cat = {}
            for bus in bus_city:
                cat_list = bus['categories'].split(", ")
                dic_cat[bus['business_id']] = cat_list

            # calculate similarity between selected business and all other businesses in city
            sim_cat = {}
            for key in dic_cat.keys():

                # devide categories overlap by maximum amount of categories
                overlap = len(set(dic_cat[key]) & set(categories_split))
                most_cat = max(len(dic_cat[key]), len(categories_split))
                similarity = overlap / most_cat

                # place similarity in dict
                sim_cat[key] = similarity

            # transform dict to list of tuples and sort by similarity
            sorted_sim = sorted(sim_cat.items(),
                                key=lambda kv: kv[1],
                                reverse=True)

            # group similarities if similarity values are the same
            grouped_sim_list = []
            equal = []
            for i in range(len(sorted_sim) - 1):
                if sorted_sim[i][1] == sorted_sim[i + 1][1]:
                    equal.append(sorted_sim[i])
                if sorted_sim[i][1] != sorted_sim[i + 1][1]:
                    equal.append(sorted_sim[i])
                    grouped_sim_list.append(equal)
                    equal = []

            # create list with dicts containing rating and review count
            stars_and_reviews = []
            for group in grouped_sim_list:
                group = dict(group)
                for key in group:
                    key_data = data.get_business(city, key)
                    group[key] = [key_data['stars'], key_data['review_count']]
                stars_and_reviews.append(group)

            # sort on stars, if stars are same value sort on review count
            final_list = []
            for equals in stars_and_reviews:
                sorted_items = sorted(equals.items(),
                                      key=lambda kv: kv[1],
                                      reverse=True)

                # put all business id's in final list except for selected business
                for business in sorted_items:
                    if business[0] != business_id:
                        final_list.append(business[0])

            # get all data from businesses and put it in the recommandations
            for business in final_list[0:needed_rec]:
                dic_business = data.get_business(city, business)
                recommendation.append(dic_business)

            # for testing purposes. Check the terminal
            for i in recommendation[already_in_rec:]:
                print(i['name'], " | ", "similarity = ",
                      sim_cat[i['business_id']], " | ", "Average rating =",
                      i['stars'], " | ", "Review count =", i['review_count'],
                      " | ", "City =", i['city'], "\n")

        return recommendation

    elif scenario == 4:
        print("start recommending scenario 4")

    return random.sample(BUSINESSES[city], n)