예제 #1
0
    def fit(self, latent_features=12, learning_rate=0.0001, iters=100):

        self.latent_features = latent_features
        self.learning_rate = learning_rate
        self.iters = iters

        print('Create User-Item matrix...')

        # Create user-item matrix
        user_item = self.df_reviews[[
            self.user_id_colname, self.item_id_colname, self.rating_col_name,
            self.date_col_name
        ]]

        self.user_item_df = (user_item.groupby(
            [self.user_id_colname,
             self.item_id_colname])[self.rating_col_name].max().unstack())
        self.user_item_mat = np.array(self.user_item_df)

        # Set up some useful values for later
        self.n_users = self.user_item_mat.shape[0]
        self.n_items = self.user_item_mat.shape[1]
        self.num_ratings = np.count_nonzero(~np.isnan(self.user_item_mat))
        self.user_ids_series = np.array(self.user_item_df.index)
        self.items_ids_series = np.array(self.user_item_df.columns)

        print('Train data with Funk Sigular Value Decomposition...')
        #### FunkSVD ####

        # initialize the user and item matrices with random values
        user_mat = np.random.rand(self.n_users, self.latent_features)
        item_mat = np.random.rand(self.latent_features, self.n_items)

        sse_accum = 0

        print("Iterations \t\t Mean Squared Error ")

        for iteration in range(self.iters):
            old_sse = sse_accum
            sse_accum = 0

            for i in range(self.n_users):
                for j in range(self.n_items):

                    # if the rating exists (so we train only on non-missval)
                    if self.user_item_mat[i, j] > 0:
                        # compute the error as the actual minus the dot
                        # product of the user and item latent features
                        diff = (self.user_item_mat[i, j] -
                                np.dot(user_mat[i, :], item_mat[:, j]))
                        # Keep track of the sum of squared errors for the
                        # matrix
                        sse_accum += diff**2

                        for k in range(self.latent_features):
                            user_mat[i, k] += (self.learning_rate *
                                               (2 * diff * item_mat[k, j]))

                            item_mat[k, j] += (self.learning_rate *
                                               (2 * diff * user_mat[i, k]))

            print(f"{iteration+1} \t\t {sse_accum/self.num_ratings} ")

        # Keep these matrices for later
        self.user_mat = user_mat
        self.item_mat = item_mat

        # Create ranked items
        self.ranked_items = rf.ranked_df(self.df_reviews, self.item_id_colname,
                                         self.rating_col_name,
                                         self.date_col_name)
    def fit(self, latent_features=12, learning_rate=0.0001, iters=100):
        """
		This function will train the data using a Funk Singular value
		decomposition, by creating a user matrix U (user by latent
		feature), an item matrix (latent feature by item) and a Sigma
		diagonal matrix with the shape 
		(latent feature x latent feature) with the highest 
		(more relevant) latent feature on the upper left and the lowest
		(less relevant) latent feature on the lower right.

		Input:
		- latent_features: number of latent feature (int), Default:12
		- learning_rate: the lerning rate (int/float), Default:0.0001
		- iters: number of iterations, Default:100
		"""

        self.latent_features = latent_features
        self.learning_rate = learning_rate
        self.iters = iters
        self.user_item_mat = np.array(self.user_item_df)

        # Set up some useful values for later
        self.n_users = self.user_item_mat.shape[0]
        self.n_items = self.user_item_mat.shape[1]
        self.num_ratings = np.count_nonzero(~np.isnan(self.user_item_mat))
        self.user_ids_series = np.array(self.user_item_df.index)
        self.items_ids_series = np.array(self.user_item_df.columns)

        print('Train data with Funk Sigular Value Decomposition...')
        #### FunkSVD ####

        # initialize the user and item matrices with random values
        user_mat = np.random.rand(self.n_users, self.latent_features)
        item_mat = np.random.rand(self.latent_features, self.n_items)

        sse_accum = 0

        print("Iterations \t\t Mean Squared Error ")

        for iteration in range(self.iters):
            old_sse = sse_accum
            sse_accum = 0

            for i in range(self.n_users):
                for j in range(self.n_items):

                    # if the rating exists (so we train only on non-missval)
                    if self.user_item_mat[i, j] > 0:
                        # compute the error as the actual minus the dot
                        # product of the user and item latent features
                        diff = (self.user_item_mat[i, j] -
                                np.dot(user_mat[i, :], item_mat[:, j]))
                        # Keep track of the sum of squared errors for the
                        # matrix
                        sse_accum += diff**2

                        for k in range(self.latent_features):
                            user_mat[i, k] += (self.learning_rate *
                                               (2 * diff * item_mat[k, j]))

                            item_mat[k, j] += (self.learning_rate *
                                               (2 * diff * user_mat[i, k]))

            print(f"\t{iteration+1} \t\t {sse_accum/self.num_ratings} ")

        # Keep these matrices for later
        self.user_mat = user_mat
        self.item_mat = item_mat

        # Create ranked items
        self.ranked_items = rf.ranked_df(self.df_reviews, self.item_id_colname,
                                         self.rating_col_name,
                                         self.date_col_name)
예제 #3
0
	def make_recommendations(self, _id, _id_type='item', rec_num=5, latent_features=12, learning_rate=0.001, iters=10):
		"""
		This function make recommendations for a particular user or a
		particular item regarding the value that you've putted in
		the _id_type argument.

		If you choose _id_type='user':
		the _id argument will be considered as a user id and the
		recommendation is given using matrix factorization if the user
		has already rated some movies before. If the user is a new user
		the recommendation is given using the most popular movies in
		the data (Ranked based recommendation).

		If you choose _id_type='item':
		the _id argument will be considered as a item id and the
		recommendation is given using similarity between movies if the
		item exist in the data (Content Based Recommendation).
		If the item is not present in the data (so no information
		about the genre, years, ect.) it will return a message to
		update the data with this item.

		Input:
		- _id: either a user or item id (int)
		- dot_prod_user: the dot product matrix computed by your own
		to find similar users
		- _id_type: either 'user' or 'item', Default:'item' (str)
		- rec_num: number of recommendation that you want
		Default:5 (int)

		Output:
		- recommendation ids
		- recommendation names
		- and a personalized message
		"""


		self.latent_features = latent_features
		self.learning_rate = learning_rate
		self.iters = iters


		user_item_reset = self.user_item_grouped.reset_index()
		self.user_ids = user_item_reset[self.user_id_colname].unique()

		current_user = (
			user_item_reset[user_item_reset[self.user_id_colname] == _id]
		)
		current_user = (
			current_user.groupby([self.user_id_colname,
								  self.item_id_colname])[self.rating_col_name].max()
		)
		current_user_item_df = current_user.unstack()
		self.current_user_item_df = current_user_item_df
		self.user_item_mat = np.array(self.current_user_item_df)

		# Set up some useful values for later
		self.n_users = self.user_item_mat.shape[0]
		self.n_items = self.user_item_mat.shape[1]
		self.num_ratings = np.count_nonzero(~np.isnan(self.user_item_mat))

		self.user_ids_series = np.array(user_item_reset[self.user_id_colname].unique())
		self.items_ids_series = np.array(user_item_reset[self.item_id_colname].unique())

		print('Train data with Funk Singular Value Decomposition...')
		#### FunkSVD ####

		# initialize the user and item matrices with random values
		user_mat = np.random.rand(self.n_users, self.latent_features)
		item_mat = np.random.rand(self.latent_features, self.n_items)

		sse_accum = 0

		print("Iterations \t\t Mean Squared Error ")

		for iteration in range(self.iters):
			old_sse = sse_accum
			sse_accum = 0

			for i in range(self.n_users):
				for j in range(self.n_items):

					# if the rating exists (so we train only on non-missval)
					if self.user_item_mat[i, j] > 0:
						# compute the error as the actual minus the dot
						# product of the user and item latent features
						diff = (
							self.user_item_mat[i, j]
							- np.dot(user_mat[i, :], item_mat[:, j])
						)
						# Keep track of the sum of squared errors for the
						# matrix
						sse_accum += diff**2

						for k in range(self.latent_features):
							user_mat[i, k] += (
								self.learning_rate * (2*diff*item_mat[k, j])
							)

							item_mat[k, j] += (
								self.learning_rate * (2*diff*user_mat[i, k])
							)

			print(f"\t{iteration+1} \t\t {sse_accum/self.num_ratings} ")

			self.mse=sse_accum/self.num_ratings



		# Create ranked items
		self.ranked_items = rf.ranked_df(self.df_reviews,
										 self.item_id_colname,
										 self.rating_col_name,
										 self.date_col_name)




		if _id in self.user_ids_series:
			message = 'Glad to see you again! recommended for you:\n'
			idx = np.where(self.user_ids_series == _id)[0][0]

			# predict items
			# take the dot product of that row and the V matrix
			preds = np.dot(user_mat[idx,:],item_mat)

			# pull the top items according to the prediction
			indices = preds.argsort()[-rec_num:][::-1]
			rec_ids = self.items_ids_series[indices]
			rec_names = rf.get_item_names(rec_ids,
										  self.df_items,
										  self.item_id_colname,
										  self.item_name_colname)

		else:

			message = "Hey, you are new here, this is for you:\n"
			# if we don't have this user, give just top ratings back
			rec_ids = rf.popular_recommendations(_id,
												 self.ranked_items,
												 self.item_id_colname,
												 rec_num)

			rec_names = rf.get_item_names(rec_ids,
										  self.df_items,
										  self.item_id_colname,
										  self.item_name_colname)

		return rec_ids, rec_names, message