def alternateOptimization(opinion_matrix, opinion_matrix_I, rating_matrix, NUM_OF_FACTORS, MAX_DEPTH, File): # Save and print the Number of Users and Movies NUM_USERS = rating_matrix.shape[0] NUM_MOVIES = rating_matrix.shape[1] NUM_FEATURE = opinion_matrix.shape[1] print("Number of Users", NUM_USERS) print("Number of Item", NUM_MOVIES) print("Number of Feature", NUM_FEATURE) print("Number of Latent Factors: ", NUM_OF_FACTORS) # Create the user and item profile vector of appropriate size. # Initialize the item vectors according to MF user_vectors, item_vectors = MF(20, 0.05, 0.02, 0.02, 100, File) # user_vectors = np.random.rand(NUM_USERS, NUM_OF_FACTORS) # item_vectors = np.random.rand(NUM_MOVIES, NUM_OF_FACTORS) i = 0 print("Entering Main Loop of alternateOptimization") decTree = dtree.Tree(dtree.Node(None, 1), NUM_OF_FACTORS, MAX_DEPTH) # Do converge Check while i < 5: # Create the decision Tree based on item_vectors #print("Creating Tree.. for i = ", i, "for user") #decTree = dtree.Tree(dtree.Node(None, 1), NUM_OF_FACTORS, MAX_DEPTH) #decTree.fitTree_U(decTree.root, opinion_matrix, rating_matrix, item_vectors, NUM_OF_FACTORS) #print("print user tree ", i) #decTree.printtree(decTree.root) print("Getting the user vectors from tree") # Calculate the User vectors using dtree user_vectors_before = user_vectors #user_vectors = decTree.getVectors_f(opinion_matrix, NUM_OF_FACTORS) # adding personalized term for index in range(len(rating_matrix)): indice = np.array([index]) user_vectors[index] = opt.cf_user(rating_matrix, item_vectors, user_vectors[index], indice, NUM_OF_FACTORS) print("Creating Tree.. for i = ", i, "for item") decTreeI = dtree.Tree(dtree.Node(None, 1), NUM_OF_FACTORS, MAX_DEPTH) decTreeI.fitTree_I(decTreeI.root, opinion_matrix_I, rating_matrix, user_vectors, NUM_OF_FACTORS) print("print item tree ", i) decTreeI.printtree(decTreeI.root) print("Getting the item vectors from tree") item_vectors_before = item_vectors item_vectors = decTreeI.getVectors_f(opinion_matrix_I, NUM_OF_FACTORS) for index in range(len(rating_matrix[0])): indice = np.array([index]) item_vectors[index] = opt.cf_item(rating_matrix, user_vectors, item_vectors[index], indice, NUM_OF_FACTORS) # Calculate Error for Convergence check Pred_before = np.dot(user_vectors_before, item_vectors_before.T) Pred = np.dot(user_vectors, item_vectors.T) Error = Pred_before - Pred Error = Error.flatten() error = np.dot(Error, Error) if error < 0.1: break i = i + 1 return decTree, decTreeI, user_vectors, item_vectors
def fitTree_I(self, current_node, opinion_matrix, rating_matrix, user_vectors, K): # rating_matrix only consists of rows which are users corresponding to the current Node # Check if the maxDepth is reached t1 = time.time() if current_node.depth + 1 > self.max_depth: return print("current depth of the tree", current_node.depth) if len(rating_matrix) == 0: return # Calulate the Error Before the Split print("Calculate error") error_before = opt.lossfunction_all(rating_matrix, current_node.vector, user_vectors, 0) print("Error Before: ", error_before) # Create a numy_array to hold the split_criteria Values NUMBER_OF_BIN = 5 params = {} # pool = mp.Pool() count = 0 feature_splitpoint_matrix = [] for feature_index in range(len(opinion_matrix[0])): split_points = self.find_split_point(opinion_matrix, feature_index, NUMBER_OF_BIN) feature_splitpoint_matrix.append(split_points) for split_point in split_points: (indices_like, indices_dislike, indices_unknown) = split(opinion_matrix, feature_index, split_point) # Split the rating_matrix into like, dislike and unknown params[count] = [] params[count].extend( (rating_matrix, user_vectors, current_node.vector, indices_like, indices_dislike, indices_unknown, K)) count += 1 # Calculate the split criteria value print("Calculating the split criteria value") results = [] params_index = 0 for feature_index in range(len(opinion_matrix[0])): # result = pool.apply_async(opt.cal_splitvalue, params[feature_index]) print("feature_index", feature_index) # t1 = time.time() temp = [] for split_point in feature_splitpoint_matrix[feature_index]: print("split_point", split_point) result = opt.cal_splitvalueI( params[params_index][0], params[params_index][1], params[params_index][2], params[params_index][3], params[params_index][4], params[params_index][5], params[params_index][6]) params_index += 1 temp.append(result) results.append(temp) # t2 = time.time() # print("Time used to calculate the feature:", t2 - t1) #for feature_index in range(len(opinion_matrix[0])): # split_values[feature_index] = results[feature_index].get() # split_values[feature_index] = results[feature_index] # pool.close() # pool.join() #results = np.array(results) temp_value = [] temp_index = [] for i in range(len(opinion_matrix[0])): temp_value.append(min(results[i])) temp_index.append(results[i].index(min(results[i]))) bestFeature = temp_value.index(min(temp_value)) best_split_point = feature_splitpoint_matrix[bestFeature][ temp_index[bestFeature]] print("bestFeature index: ", bestFeature) print("Split point:", best_split_point) t2 = time.time() print("Time used to create the layer: ", t2 - t1) # Store the feature_index for the current_node current_node.feature_index = bestFeature current_node.split_point = best_split_point # Split the rating_matrix into like, dislike and unknown (indices_like, indices_dislike, indices_unknown) = split(opinion_matrix, bestFeature, best_split_point) split_value = opt.cal_splitvalueI(rating_matrix, user_vectors, current_node.vector, indices_like, indices_dislike, indices_unknown, K) like = rating_matrix[:, indices_like] like_op = opinion_matrix[indices_like] dislike = rating_matrix[:, indices_dislike] dislike_op = opinion_matrix[indices_dislike] unknown = rating_matrix[:, indices_unknown] unknown_op = opinion_matrix[indices_unknown] # Calculate the User Profile Vector for each of the three classes # print "optimizing like, dislike and unknown..." # Calculate the User Profile Vector for each of the three classes like_vector = current_node.vector dislike_vector = current_node.vector unknown_vector = current_node.vector if len(indices_like) > 0: like_vector = opt.cf_item(rating_matrix, user_vectors, current_node.vector, indices_like, K) if len(indices_dislike) > 0: dislike_vector = opt.cf_item(rating_matrix, user_vectors, current_node.vector, indices_dislike, K) if len(indices_unknown) > 0: unknown_vector = opt.cf_item(rating_matrix, user_vectors, current_node.vector, indices_unknown, K) # CONDITION check condition RMSE Error check is CORRECT if split_value < error_before: # Recursively call the fitTree_f function for like, dislike and unknown Nodes creation current_node.like = Node(current_node, current_node.depth + 1) current_node.like.vector = like_vector if len(like_op) != 0: self.fitTree_I(current_node.like, like_op, like, user_vectors, K) current_node.dislike = Node(current_node, current_node.depth + 1) current_node.dislike.vector = dislike_vector if len(dislike_op) != 0: self.fitTree_I(current_node.dislike, dislike_op, dislike, user_vectors, K) current_node.unknown = Node(current_node, current_node.depth + 1) current_node.unknown.vector = unknown_vector if len(unknown_op) != 0: self.fitTree_I(current_node.unknown, unknown_op, unknown, user_vectors, K) else: print("can't spilt")