Beispiel #1
0
def generate_prediction(training_file, testing_file, all_songs):
    # Reverse the comments in the next four lines of code to generate
    # ItemBasedPredictions intead of UserBasedPredictions

    s_u = utilities.song_to_users(training_file)  #dict songs:{users}
    pr = prediction.ItemBasedPrediction(s_u, _sim=0)
    # u_s = utilities.user_to_songs(training_file) #dict songs:{users}
    # pr = prediction.UserBasedPrediction(u_s)

    # the recommender
    rec = recommender.Recommender(all_songs, pr, _k=500)

    testing_u_s = utilities.user_to_songs(testing_file)

    pool = Pool(4)
    for user in testing_u_s.keys():
        #recommend for each user-- songs they would like to listen to based on our recommender
        pool.apply_async(parallel_rec_worker,
                         args=(
                             user,
                             rec,
                             testing_u_s,
                         ),
                         callback=log_result)

    print('finished applying')
    pool.close()
    print('will join when finished evaluating all users...')
    pool.join()
    print('finished jobs...')
Beispiel #2
0
def test_all():

    rec = recommender.Recommender()

    user_1 = {}

    user_2 = {"vegan": 5, "halal": 1}

    user_3 = {"american": 4, "mexican": 3, "japanese": 2}

    user_4 = {"thai": 4, "mexican": 1, "japanese": 4}

    user_5 = {}

    user_6 = {"thai": -3, "japanese": -2, "american": -1}

    user_7 = {"thai": 113, "japanese": 112, "american": 111}

    user_8 = {"thai": 12, "japanese": 8, "american": 4}

    me = {"thai": 5, "japanese": 2, "american": 1}

    users = [user_1, user_2, user_3, user_4, user_5, user_6, user_7, user_8]

    print(rec.recommend_me(me, users, 8, intersection=True, with_keys=False))
    print(rec.recommend_me(me, users, 8, intersection=False, with_keys=False))

    # most similar user with strongest shared interests
    index = rec.recommend_me(me, users, 1, intersection=True,
                             with_keys=False)[0]
    print(index, rec.find_most_shared_interests(me, users[index], 3))
Beispiel #3
0
def test_find_similarity():

    rec = recommender.Recommender()
    john = {"vegan": 1}
    joe = {"vegan": 5, "halal": 1}
    print(rec.find_similarity(john, joe))

    john = {"vegan": 1}
    joe = {}
    print("empty case:", rec.find_similarity(john, joe))
Beispiel #4
0
 def test_array_average(self):
     global_avg = 1
     array = np.array([[2, 4], [1, 5], [67, 2], [23, 1], [23, 4]])
     total_entries = 100
     test = recommender.Recommender(None)
     result = test.array_average(array, total_entries)
     self.assertEqual(len(result), total_entries + 1)
     self.assertEqual(result[23], 2.5)
     self.assertEqual(result[2], 4)
     self.assertEqual(np.isnan(result[3]), True)
     self.assertEqual(np.isnan(result[0]), True)
     self.assertEqual(np.isnan(result[100]), True)
Beispiel #5
0
def test_find_most_shared_interests():
    rec = recommender.Recommender()
    me = {"thai": 3, "japanese": 2, "american": 1}

    user_1 = {"vegan": 1, "japanese": 4}

    print(rec.find_most_shared_interests(me, user_1, 1))

    me = {"thai": 3, "japanese": 2, "american": 1}
    user_5 = {}
    print(rec.find_most_shared_interests(me, user_5, 3))

    user_3 = {"american": 4, "mexican": 3, "japanese": 2}
    x = {"thai": 4, "mexican": 1, "japanese": 4, "american": 4}
    print(rec.find_most_shared_interests(user_3, x, 3))
import numpy as np
import time
import recommender

start_time = time.time()

# initalizing
print('[%.2fs] Initializing...' % (time.time() - start_time))

rec_system = recommender.Recommender()

# training
print('\nTraining data:')
print('-> Number of ratings:  %s' % len(rec_system.ratings_train.data))
print('-> Number of distinct users:  %s' %
      len(np.unique(rec_system.ratings_train.row)))
print('-> Number of distinct items:  %s' %
      len(np.unique(rec_system.ratings_train.col)))
print('-> Number of latent factors:  %d' % (rec_system.num_user_factors))

print('\n[%.2fs] Training...' % (time.time() - start_time))

rec_system.train()

print('\nLearned values:')
print('\n-> User factors:')
print(rec_system.user_factors)
print('\n-> Item factors:')
print(rec_system.item_factors)

# testing
Beispiel #7
0
from flask import Flask, request, render_template, session
app = Flask(__name__)
import pandas as pd
import numpy as np
import graphlab as gl
from pymongo import MongoClient
import time
from pprint import pprint
from flask import Flask
import recommender as rec
import info as info
from dispatcher import add_job
app.secret_key = 'datascience'

model = rec.Recommender()


#Post Data Request:
#@app.route("/")
@app.route('/')
@app.route('/index')
def index():
    session["user_id"] = None
    session["profile"] = None
    session["rated"] = None
    return render_template('home.html')


@app.route('/recs', methods=['GET'])
def show_five():
    if session["user_id"] == None:
Beispiel #8
0
from flask import Flask, render_template, request
import recommender

app = Flask(__name__)
rcmdr = recommender.Recommender()


@app.route('/')
def main():
    reader_list = rcmdr.getListOfReaders()
    # reader_list = ["Hello" , "World"]
    # print(reader_list)
    return render_template('index.html', option_list=reader_list)


@app.route("/get_purchased", methods=['POST'])
def get_input():
    customerID = request.data
    if (customerID != ''):
        actualPurchased_list = rcmdr.getPurchased_Items(customerID)
        return actualPurchased_list


@app.route("/get_recommender", methods=['POST'])
def get_recommender():

    customerID = request.data
    # print("customer ID is {} type is {}".format(customerID, type(customerID)))
    if (customerID != ''):
        recommended_list = rcmdr.getRec_Items(customerID)
        # actualPurchased_list = rcmdr.getPurchased_Items(customerID)
    b = recommendObject.addClub('Not in it club', 'something', 3,"")
    c = recommendObject.addClub("Random Other Club", 'something', 4,"")

    u101.addClub("Common Club", recommendObject)
    u101.addClub("Random Other Club", recommendObject)

    u102.addClub("Common Club", recommendObject)
    u102.addClub("Not in it club", recommendObject)
    u102.addClub("Random Other Club", recommendObject)

    c = recommendObject.createClubRecommendation(101)
    assert(c.getDestination().getClubName() == "Not in it club")
    print("The assertion passed")
    return None

def tryAddingExcelClubs():
    recommendObject.addExcelClubs()
    print("Added the clubs from excel")
    return None

def clubBasedTests():
    miniDataSet()
    smallDataSet()
    largeDataSet()
    caseForNoRelated()
    checkNotReturningClubAlreadyIn()
    return 0

recommendObject = recommender.Recommender()
clubBasedTests()
Beispiel #10
0
    options = parser.parse_args()
    do_ingest = options.ingest
    user_id = options.user_id
    top_rated_songs = options.top_rated_songs
    top_played_songs = options.top_played_songs
    get_added_songs = options.get_added_songs
    recommend_songs = options.recommend_songs

    if do_ingest:
        print '\n' * 2 + '*' * 10 + ' DATA INGESTION ' + '*' * 10
        ingesta.ingest()

    if top_rated_songs:
        print '\n' * 2 + '*' * 10 + ' TOP RATED SONGS ' + '*' * 10
        r = recommender.Recommender()
        results = r.top_rated(top_rated_songs)
        for result in results:
            print result

    if top_played_songs:
        print '\n' * 2 + '*' * 10 + ' TOP PLAYED SONGS ' + '*' * 10
        r = recommender.Recommender()
        results = r.top_listened(top_played_songs)
        for result in results:
            print result

    if get_added_songs and user_id:
        print '\n' * 2 + '*' * 10 + ' SONGS ADDED BY USER ' + '*' * 10
        r = recommender.Recommender()
        playlists = r.user_based(user_id)[0].get('playlists')
Beispiel #11
0
class Recommender:
    """
    This Recommender uses FunkSVD to make predictions of exact ratings.
    And uses either FunkSVD or a Knowledge Based recommendation (highest ranked)
    to make recommendations for users.  Finally, if given a movie, the recommender
    will provide movies that are most similar as a Content Based Recommender.
    """
    def __init__(self):
        pass

    def fit(self,
            reviews_pth,
            movies_pth,
            latent_features=12,
            learning_rate=0.0001,
            iters=100):
        """
        This function performs matrix factorization using a basic form of FunkSVD with no regularization

        INPUT:
        reviews_pth - path to csv with at least the four columns: 'user_id', 'movie_id', 'rating', 'timestamp'
        movies_pth - path to csv with each movie and movie information in each row
        latent_features - (int) the number of latent features used
        learning_rate - (float) the learning rate
        iters - (int) the number of iterations

        OUTPUT:
        None - stores the following as attributes:
        n_users - the number of users (int)
        n_movies - the number of movies (int)
        num_ratings - the number of ratings made (int)
        reviews - dataframe with four columns: 'user_id', 'movie_id', 'rating', 'timestamp'
        movies - dataframe of
        user_item_mat - (np array) a user by item numpy array with ratings and nans for values
        latent_features - (int) the number of latent features used
        learning_rate - (float) the learning rate
        iters - (int) the number of iterations
        """

        # Store inputs as attributes
        self.reviews = pd.read_csv(reviews_pth)
        self.movies = pd.read_csv(movies_pth)

        # Create user-item matrix
        usr_itm = self.reviews[['user_id', 'movie_id', 'rating', 'timestamp']]
        self.user_item_df = usr_itm.groupby(['user_id', 'movie_id'
                                             ])['rating'].max().unstack()
        self.user_item_mat = np.array(self.user_item_df)

        # Store more inputs
        self.latent_features = latent_features
        self.learning_rate = learning_rate
        self.iters = iters

        # Set up useful values to be used through the rest of the function
        self.n_users = self.user_item_mat.shape[0]
        self.n_movies = self.user_item_mat.shape[1]
        self.num_ratings = np.count_nonzero(~np.isnan(self.user_item_mat))
        self.user_ids_series = np.array(self.user_item_df.index)
        self.movie_ids_series = np.array(self.user_item_df.columns)

        # initialize the user and movie matrices with random values
        user_mat = np.random.rand(self.n_users, self.latent_features)
        movie_mat = np.random.rand(self.latent_features, self.n_movies)

        # initialize sse at 0 for first iteration
        sse_accum = 0

        # keep track of iteration and MSE
        print("Optimizaiton Statistics")
        print("Iterations | Mean Squared Error ")

        # for each iteration
        for iteration in range(self.iters):

            # update our sse
            old_sse = sse_accum
            sse_accum = 0

            # For each user-movie pair
            for i in range(self.n_users):
                for j in range(self.n_movies):

                    # if the rating exists
                    if self.user_item_mat[i, j] > 0:

                        # compute the error as the actual minus the dot product of the user and movie latent features
                        diff = self.user_item_mat[i, j] - np.dot(
                            user_mat[i, :], movie_mat[:, j])

                        # Keep track of the sum of squared errors for the matrix
                        sse_accum += diff**2

                        # update the values in each matrix in the direction of the gradient
                        for k in range(self.latent_features):
                            user_mat[i, k] += self.learning_rate * (
                                2 * diff * movie_mat[k, j])
                            movie_mat[k, j] += self.learning_rate * (
                                2 * diff * user_mat[i, k])

            # print results
            print("%d \t\t %f" % (iteration + 1, sse_accum / self.num_ratings))

        # SVD based fit
        # Keep user_mat and movie_mat for safe keeping
        self.user_mat = user_mat
        self.movie_mat = movie_mat

        # Knowledge based fit
        self.ranked_movies = rf.create_ranked_df(self.movies, self.reviews)

    def predict_rating(self, user_id, movie_id):
        """
        INPUT:
        user_id - the user_id from the reviews df
        movie_id - the movie_id according the movies df

        OUTPUT:
        pred - the predicted rating for user_id-movie_id according to FunkSVD
        """

        try:  # User row and Movie Column
            user_row = np.where(self.user_ids_series == user_id)[0][0]
            movie_col = np.where(self.movie_ids_series == movie_id)[0][0]

            # Take dot product of that row and column in U and V to make prediction
            pred = np.dot(self.user_mat[user_row, :],
                          self.movie_mat[:, movie_col])

            movie_name = str(
                self.movies[self.movies['movie_id'] == movie_id]['movie'])[5:]
            movie_name = movie_name.replace('\nName: movie, dtype: object', '')
            print(
                "For user {} we predict a {} rating for the movie {}.".format(
                    user_id, round(pred, 2), str(movie_name)))

            return pred

        except:
            print(
                "I'm sorry, but a prediction cannot be made for this user-movie pair.  It looks like one of these "
                "items does not exist in our current database.")

            return None

    def make_recommendations(self, _id, _id_type='movie', rec_num=5):
        """
        INPUT:
        _id - either a user or movie id (int)
        _id_type - "movie" or "user" (str)
        rec_num - number of recommendations to return (int)

        OUTPUT:
        recs - (array) a list or numpy array of recommended movies like the
                       given movie, or recs for a user_id given
        """

        # if the user is available from the matrix factorization data,
        # I will use this and rank movies based on the predicted values
        # For use with user indexing
        rec_ids, rec_names = None, None
        if _id_type == 'user':
            if _id in self.user_ids_series:
                # Get the index of which row the user is in for use in U matrix
                idx = np.where(self.user_ids_series == _id)[0][0]

                # take the dot product of that row and the V matrix
                preds = np.dot(self.user_mat[idx, :], self.movie_mat)

                # pull the top movies according to the prediction
                indices = preds.argsort()[-rec_num:][::-1]  # indices
                rec_ids = self.movie_ids_series[indices]
                rec_names = rf.get_movie_names(rec_ids, self.movies)

            else:
                # if we don't have this user, give just top ratings back
                rec_names = rf.popular_recommendations(_id, rec_num,
                                                       self.ranked_movies)
                print(
                    "Because this user wasn't in our database, we are giving back the top movie recommendations for "
                    "all users.")

                # Fi nd similar movies if it is a movie that is passed
        else:
            if _id in self.movie_ids_series:
                rec_names = list(rf.find_similar_movies(_id,
                                                        self.movies))[:rec_num]
            else:
                print(
                    "That movie doesn't exist in our database.  Sorry, we don't have any recommendations for you."
                )

        return rec_ids, rec_names

    if __name__ == '__main__':
        import recommender as r

        # instantiate recommender
        rec = r.Recommender()

        # fit recommender
        rec.fit(reviews_pth='data/train_data.csv',
                movies_pth='data/movies_clean.csv',
                learning_rate=.01,
                iters=1)

        # predict
        rec.predict_rating(user_id=8, movie_id=2844)

        # make recommendations
        print(rec.make_recommendations(8, 'user'))  # user in the dataset
        print(rec.make_recommendations(1, 'user'))  # user not in dataset
        print(rec.make_recommendations(1853728))  # movie in the dataset
        print(rec.make_recommendations(1))  # movie not in dataset
        print(rec.n_users)
        print(rec.n_movies)
        print(rec.num_ratings)
Beispiel #12
0
__ACTIVITY = "activity_v2.csv"
__DEAL_ITEMS = "dealitems.csv"
__DEAL_DETAILS = "deal_details.csv"

# Params
N_dealitems = 10

# load raw data
activity_train = pd.read_csv('train_' + __ACTIVITY)
deal_items_train = pd.read_csv('train_' + __DEAL_ITEMS)
deal_details_train = pd.read_csv('train_' + __DEAL_DETAILS)

activity_test = pd.read_csv('test_' + __ACTIVITY)
deal_items_test = pd.read_csv('test_' + __DEAL_ITEMS)
deal_details_test = pd.read_csv('test_' + __DEAL_DETAILS)

full_data, grouped_by_users_train, grouped_by_dealitem_id_train = processing.get_proceed_data(
    activity_train, deal_items_train, deal_details_train)
_, grouped_by_users_test, grouped_by_dealitem_id_test = processing.get_proceed_data(
    activity_test, deal_items_test, deal_details_test)
actual_time = activity_train['create_time'].max()
# acctual_time = 1406852020
model = r.Recommender(actual_time)
model.fit(full_data,
          grouped_by_users_train,
          grouped_by_dealitem_id_train,
          deal_items_train,
          deal_details_train,
          top_N_items=N_dealitems)
model.predict(activity_train, grouped_by_users_test, distance_treshold=0.4)