Beispiel #1
0
def alternateOptimization(opinion_matrix, opinion_matrix_I, rating_matrix,
                          NUM_OF_FACTORS, MAX_DEPTH, File):
    # Save and print the Number of Users and Movies
    NUM_USERS = rating_matrix.shape[0]
    NUM_MOVIES = rating_matrix.shape[1]
    NUM_FEATURE = opinion_matrix.shape[1]
    print("Number of Users", NUM_USERS)
    print("Number of Item", NUM_MOVIES)
    print("Number of Feature", NUM_FEATURE)
    print("Number of Latent Factors: ", NUM_OF_FACTORS)

    # Create the user and item profile vector of appropriate size.
    # Initialize the item vectors according to MF
    user_vectors, item_vectors = MF(20, 0.05, 0.02, 0.02, 100, File)
    # user_vectors = np.random.rand(NUM_USERS, NUM_OF_FACTORS)
    # item_vectors = np.random.rand(NUM_MOVIES, NUM_OF_FACTORS)

    i = 0
    print("Entering Main Loop of alternateOptimization")
    decTree = dtree.Tree(dtree.Node(None, 1), NUM_OF_FACTORS, MAX_DEPTH)
    # Do converge Check
    while i < 5:

        # Create the decision Tree based on item_vectors
        #print("Creating Tree.. for i = ", i, "for user")
        #decTree = dtree.Tree(dtree.Node(None, 1), NUM_OF_FACTORS, MAX_DEPTH)
        #decTree.fitTree_U(decTree.root, opinion_matrix, rating_matrix, item_vectors, NUM_OF_FACTORS)
        #print("print user tree ", i)
        #decTree.printtree(decTree.root)
        print("Getting the user vectors from tree")
        # Calculate the User vectors using dtree
        user_vectors_before = user_vectors
        #user_vectors = decTree.getVectors_f(opinion_matrix, NUM_OF_FACTORS)
        # adding personalized term
        for index in range(len(rating_matrix)):
            indice = np.array([index])
            user_vectors[index] = opt.cf_user(rating_matrix, item_vectors,
                                              user_vectors[index], indice,
                                              NUM_OF_FACTORS)

        print("Creating Tree.. for i = ", i, "for item")
        decTreeI = dtree.Tree(dtree.Node(None, 1), NUM_OF_FACTORS, MAX_DEPTH)
        decTreeI.fitTree_I(decTreeI.root, opinion_matrix_I, rating_matrix,
                           user_vectors, NUM_OF_FACTORS)
        print("print item tree ", i)
        decTreeI.printtree(decTreeI.root)
        print("Getting the item vectors from tree")
        item_vectors_before = item_vectors
        item_vectors = decTreeI.getVectors_f(opinion_matrix_I, NUM_OF_FACTORS)
        for index in range(len(rating_matrix[0])):
            indice = np.array([index])
            item_vectors[index] = opt.cf_item(rating_matrix, user_vectors,
                                              item_vectors[index], indice,
                                              NUM_OF_FACTORS)

        # Calculate Error for Convergence check
        Pred_before = np.dot(user_vectors_before, item_vectors_before.T)
        Pred = np.dot(user_vectors, item_vectors.T)
        Error = Pred_before - Pred
        Error = Error.flatten()
        error = np.dot(Error, Error)
        if error < 0.1:
            break
        i = i + 1

    return decTree, decTreeI, user_vectors, item_vectors
Beispiel #2
0
    def fitTree_U(self, current_node, opinion_matrix, rating_matrix,
                  item_vectors, K):
        # rating_matrix only consists of rows which are users corresponding to the current Node
        # Check if the maxDepth is reached
        print("current depth of the tree", current_node.depth)
        t1 = time.time()
        if current_node.depth + 1 > self.max_depth:
            return
        if len(rating_matrix) == 0:
            return

        # Calulate the Error Before the Split
        print("Calculate error")
        error_before = opt.lossfunction_all(rating_matrix, item_vectors,
                                            current_node.vector, 1)

        print("Error Before: ", error_before)
        # Create a numy_array to hold the split_criteria Values
        params = {}
        feature_splitpoint_matrix = []
        count = 0
        # pool = mp.Pool(20)

        for feature_index in range(len(opinion_matrix[0])):
            # Split the rating_matrix into like, dislike and unknown
            NUMBER_OF_BIN = 5
            # for each feature find the split_points
            split_points = self.find_split_point(opinion_matrix, feature_index,
                                                 NUMBER_OF_BIN)
            feature_splitpoint_matrix.append(split_points)
            for split_point in split_points:
                (indices_like, indices_dislike,
                 indices_unknown) = split(opinion_matrix, feature_index,
                                          split_point)
                params[count] = []
                params[count].extend(
                    (rating_matrix, item_vectors, current_node.vector,
                     indices_like, indices_dislike, indices_unknown, K))
                count += 1

        # Calculate the split criteria value
        print("Calculating the split criteria value")
        results = []

        params_index = 0
        for feature_index in range(len(opinion_matrix[0])):
            print("feature_index", feature_index)
            temp = []
            # start = time.time()
            # result = pool.apply_async(opt.cal_splitvalue, params[feature_index])
            for split_point in feature_splitpoint_matrix[feature_index]:
                print("split_point", split_point)
                result = opt.cal_splitvalue(
                    params[params_index][0], params[params_index][1],
                    params[params_index][2], params[params_index][3],
                    params[params_index][4], params[params_index][5],
                    params[params_index][6])
                params_index += 1
                temp.append(result)
            results.append(temp)

        #results = np.array(results)
        temp_value = []
        temp_index = []

        for i in range(len(opinion_matrix[0])):
            # temp_value is the min results for each feature
            temp_value.append(min(results[i]))
            # temp_index is the corresponding index of split_point for each feature
            temp_index.append(results[i].index(min(results[i])))
        # get the best feature index
        bestFeature = temp_value.index(min(temp_value))
        # get the corresponding value of split_point
        best_split_point = feature_splitpoint_matrix[bestFeature][
            temp_index[bestFeature]]

        #for feature_index in range(len(opinion_matrix[0])):
        #    # split_values[feature_index] = results[feature_index].get()
        #    split_values[feature_index] = results[feature_index]
        # pool.close()
        # pool.join()

        #bestFeature = np.argmin(split_values)
        print("bestFeature index: ", bestFeature)
        print("Split point:", best_split_point)
        t2 = time.time()
        print("Time used to create the layer: ", t2 - t1)

        # Store the feature_index for the current_node
        current_node.feature_index = bestFeature
        # Store the split_point for the current_node
        current_node.split_point = best_split_point

        # Split the rating_matrix into like, dislike and unknown
        (indices_like, indices_dislike,
         indices_unknown) = split(opinion_matrix, bestFeature,
                                  best_split_point)
        split_value = opt.cal_splitvalue(rating_matrix, item_vectors,
                                         current_node.vector, indices_like,
                                         indices_dislike, indices_unknown, K)

        like = rating_matrix[indices_like]
        like_op = opinion_matrix[indices_like]

        dislike = rating_matrix[indices_dislike]
        dislike_op = opinion_matrix[indices_dislike]

        unknown = rating_matrix[indices_unknown]
        unknown_op = opinion_matrix[indices_unknown]

        # Calculate the User Profile Vector for each of the three classes
        # print "optimizing like, dislike and unknown..."

        # Calculate the User Profile Vector for each of the three classes
        like_vector = current_node.vector
        dislike_vector = current_node.vector
        unknown_vector = current_node.vector
        if len(indices_like) > 0:
            like_vector = opt.cf_user(rating_matrix, item_vectors,
                                      current_node.vector, indices_like, K)
        if len(indices_dislike) > 0:
            dislike_vector = opt.cf_user(rating_matrix, item_vectors,
                                         current_node.vector, indices_dislike,
                                         K)
        if len(indices_unknown) > 0:
            unknown_vector = opt.cf_user(rating_matrix, item_vectors,
                                         current_node.vector, indices_unknown,
                                         K)

        # CONDITION check condition RMSE Error check is CORRECT
        if split_value < error_before:
            # Recursively call the fitTree_f function for like, dislike and unknown Nodes creation
            current_node.like = Node(current_node, current_node.depth + 1)
            current_node.like.vector = like_vector
            if len(like) != 0:
                self.fitTree_U(current_node.like, like_op, like, item_vectors,
                               K)

            current_node.dislike = Node(current_node, current_node.depth + 1)
            current_node.dislike.vector = dislike_vector
            if len(dislike) != 0:
                self.fitTree_U(current_node.dislike, dislike_op, dislike,
                               item_vectors, K)

            current_node.unknown = Node(current_node, current_node.depth + 1)
            current_node.unknown.vector = unknown_vector
            if len(unknown) != 0:
                self.fitTree_U(current_node.unknown, unknown_op, unknown,
                               item_vectors, K)
        else:
            print("can't spilt")