def test_Collaborative_Filtering_Cost_Function(self): data_file = "resource/ex8_movieParams.mat" # Reduce the data set size so that this runs faster mat = scipy.io.loadmat(data_file) X = mat["X"] Theta = mat["Theta"] num_users = 4 num_movies = 5 num_features = 3 X = X[:num_movies, :num_features] Theta = Theta[:num_users, :num_features] Y = self.movies_Y[:num_movies, :num_users] R = self.movies_R[:num_movies, :num_users] from ex8_Anomaly_Detection_and_Recommender_Systems.cofiCostFunc import cofiCostFunc # Evaluate cost function params = np.concatenate( (X.reshape(X.size, order='F'), Theta.reshape(Theta.size, order='F'))) J, _ = cofiCostFunc(params, Y, R, num_users, num_movies, num_features, 0) print("Cost at loaded parameters: {cost}".format(cost=J)) print("(this value should be about 22.22)") self.assertAlmostEqual(J, 22.22, delta=0.01) # ============== Part 3: Collaborative Filtering Gradient ============== # Once your cost function matches up with ours, you should now implement # the collaborative filtering gradient function. Specifically, you should # complete the code in cofiCostFunc.m to return the grad argument. # print('Checking Gradients (without regularization) ... ') from ex8_Anomaly_Detection_and_Recommender_Systems.checkCostFunction import checkCostFunction checkCostFunction() # ========= Part 4: Collaborative Filtering Cost Regularization ======== # Now, you should implement regularization for the cost function for # collaborative filtering. You can implement it by adding the cost of # regularization to the original cost computation. # # Evaluate cost function J, _ = cofiCostFunc(params, Y, R, num_users, num_movies, num_features, 1.5) print( "'Cost at loaded parameters (lambda = 1.5): {cost}".format(cost=J)) print("(this value should be about 31.34)") self.assertAlmostEqual(J, 31.34, delta=0.01) # ======= Part 5: Collaborative Filtering Gradient Regularization ====== # Once your cost matches up with ours, you should proceed to implement # regularization for the gradient. # print('Checking Gradients (with regularization) ... ') # Check gradients by running checkNNGradients checkCostFunction(1.5)
def checkCostFunction(_lambda=0): # CHECKCOSTFUNCTION Creates a collaborative filering problem # to check your cost function and gradients # CHECKCOSTFUNCTION(lambda ) Creates a collaborative filering problem # to check your cost function and gradients, it will output the # analytical gradients produced by your code and the numerical gradients # (computed using computeNumericalGradient).These two gradient # computations should result in very similar values. # Create small problem X_t = np.random.rand(4, 3) Theta_t = np.random.rand(5, 3) # Zap out most entries Y = np.dot(X_t, Theta_t.T) Y[np.random.rand(Y.shape[0], Y.shape[1]) > 0.5] = 0 R = np.zeros(np.shape(Y)) R[Y != 0] = 1 # Run Gradient Checking X_t_shape = np.shape(X_t) X = np.random.randn(X_t_shape[0], X_t_shape[1]) Theta_t_shape = np.shape(Theta_t) Theta = np.random.randn(Theta_t_shape[0], Theta_t_shape[1]) num_movies, num_users = np.shape(Y) num_features = Theta_t_shape[1] params = np.concatenate((X.reshape(X.size, order='F'), Theta.reshape(Theta.size, order='F'))) # Short hand for cost function from ex8_Anomaly_Detection_and_Recommender_Systems.computerNumericalGradient import computeNumericalGradient from ex8_Anomaly_Detection_and_Recommender_Systems.cofiCostFunc import cofiCostFunc numgrad = computeNumericalGradient(lambda p: cofiCostFunc(p, Y, R, num_users, num_movies, num_features, _lambda), params) cost, grad = cofiCostFunc(params, Y, R, num_users, num_movies, num_features, _lambda) print(np.column_stack((numgrad, grad))) print("The above two columns you get should be very similar.") print("(Left-Your Numerical Gradient, Right-Analytical Gradient)") diff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad) print("If your backpropagation implementation is correct, then") print("the relative difference will be small (less than 1e-9). ") print("Relative Difference: {diff}".format(diff=diff))
def gf(_p): return cofiCostFunc(_p, Y, R, num_users, num_movies, num_features, _lambda)[1]
def test_Entering_ratings_for_a_new_user(self): from ex8_Anomaly_Detection_and_Recommender_Systems.loadMovieList import loadMovieList movieList = loadMovieList() # Initialize my ratings my_ratings = np.zeros((1682, 1)) # Check the file movie_idx.txt for id of each movie in our dataset # For example, Toy Story (1995) has ID 1, so to rate it "4", you can set my_ratings[0] = 4 # Or suppose did not enjoy Silence of the Lambs (1991), you can set my_ratings[97] = 2 # We have selected a few movies we liked / did not like and the ratings we # gave are as follows: my_ratings[6] = 3 # We have selected a few movies we liked / did not like and the ratings we # gave are as follows: my_ratings[6] = 3 my_ratings[11] = 5 my_ratings[53] = 4 my_ratings[63] = 5 my_ratings[65] = 3 my_ratings[68] = 5 my_ratings[182] = 4 my_ratings[225] = 5 my_ratings[354] = 5 print('New user ratings:') for i in range(my_ratings.size): if my_ratings[i] > 0: print("Rated {rate} for {movie}".format(rate=my_ratings[i], movie=movieList[i])) # ================== Part 7: Learning Movie Ratings ==================== # Now, you will train the collaborative filtering model on a movie rating # dataset of 1682 movies and 943 users # print('Training collaborative filtering...') data_file = "resource/ex8_movies.mat" # Reduce the data set size so that this runs faster # Load data mat = scipy.io.loadmat(data_file) Y = mat["Y"] R = mat["R"] # Y is a 1682x943 matrix, containing ratings (1-5) of 1682 movies by # 943 users # # R is a 1682x943 matrix, where R(i,j) = 1 if and only if user j gave a # rating to movie i # Add our own ratings to the data matrix Y = np.column_stack((my_ratings, Y)) R = np.column_stack(((my_ratings != 0).astype(int), R)) from ex8_Anomaly_Detection_and_Recommender_Systems.normalizeRatings import normalizeRatings Ynorm, Ymean = normalizeRatings(Y, R) # Userful Values num_users = Y.shape[1] num_movies = Y.shape[0] num_features = 10 # Set Initial Parameters (Theta, X) X = np.random.randn(num_movies, num_features) Theta = np.random.randn(num_users, num_features) params = np.concatenate( (X.reshape(X.size, order='F'), Theta.reshape(Theta.size, order='F'))) # Set Regularization _lambda = 10 # Set options maxiter = 100 options = {'disp': True, 'maxiter': maxiter} from ex8_Anomaly_Detection_and_Recommender_Systems.cofiCostFunc import cofiCostFunc def cf(_p): return cofiCostFunc(_p, Y, R, num_users, num_movies, num_features, _lambda)[0] def gf(_p): return cofiCostFunc(_p, Y, R, num_users, num_movies, num_features, _lambda)[1] from scipy.optimize import fmin_l_bfgs_b result2 = fmin_l_bfgs_b(cf, fprime=gf, x0=params, maxiter=100, disp=True) print(result2) from scipy.optimize import minimize result = minimize(lambda _p: cofiCostFunc( _p, Y, R, num_users, num_movies, num_features, _lambda), x0=params, options=options, method='L-BFGS-B', jac=True) print(result) # r = result["x"] r = result2[0] X = np.reshape(r[:num_movies * num_features], (num_movies, num_features), order='F') Theta = np.reshape(r[num_movies * num_features:], (num_users, num_features), order='F') print('Recommender system learning completed.') # ================== Part 8: Recommendation for you ==================== # After training the model, you can now make recommendations by computing # the predictions matrix. # p = np.dot(X, Theta.T) my_predictions = p[:, 0:1] + Ymean # reverse sorting by index idx = my_predictions.argsort(axis=0)[::-1] my_predictions = my_predictions[idx] print('Top recommendations for you:') for i in range(10): j = idx[i, 0] print('Predicting rating {p} for movie {name}'.format( p=my_predictions[j], name=movieList[j])) print('Original ratings provided:') for i in range(len(my_ratings)): if my_ratings[i] > 0: print('Rated {:d} for {:s}'.format(int(my_ratings[i]), movieList[i]))