Ejemplo n.º 1
0
def alternateOptimization(opinion_matrix, opinion_matrix_I, rating_matrix,
                          NUM_OF_FACTORS, MAX_DEPTH, File):
    # Save and print the Number of Users and Movies
    NUM_USERS = rating_matrix.shape[0]
    NUM_MOVIES = rating_matrix.shape[1]
    NUM_FEATURE = opinion_matrix.shape[1]
    print("Number of Users", NUM_USERS)
    print("Number of Item", NUM_MOVIES)
    print("Number of Feature", NUM_FEATURE)
    print("Number of Latent Factors: ", NUM_OF_FACTORS)

    # Create the user and item profile vector of appropriate size.
    # Initialize the item vectors according to MF
    user_vectors, item_vectors = MF(20, 0.05, 0.02, 0.02, 100, File)
    # user_vectors = np.random.rand(NUM_USERS, NUM_OF_FACTORS)
    # item_vectors = np.random.rand(NUM_MOVIES, NUM_OF_FACTORS)

    i = 0
    print("Entering Main Loop of alternateOptimization")
    decTree = dtree.Tree(dtree.Node(None, 1), NUM_OF_FACTORS, MAX_DEPTH)
    # Do converge Check
    while i < 5:

        # Create the decision Tree based on item_vectors
        #print("Creating Tree.. for i = ", i, "for user")
        #decTree = dtree.Tree(dtree.Node(None, 1), NUM_OF_FACTORS, MAX_DEPTH)
        #decTree.fitTree_U(decTree.root, opinion_matrix, rating_matrix, item_vectors, NUM_OF_FACTORS)
        #print("print user tree ", i)
        #decTree.printtree(decTree.root)
        print("Getting the user vectors from tree")
        # Calculate the User vectors using dtree
        user_vectors_before = user_vectors
        #user_vectors = decTree.getVectors_f(opinion_matrix, NUM_OF_FACTORS)
        # adding personalized term
        for index in range(len(rating_matrix)):
            indice = np.array([index])
            user_vectors[index] = opt.cf_user(rating_matrix, item_vectors,
                                              user_vectors[index], indice,
                                              NUM_OF_FACTORS)

        print("Creating Tree.. for i = ", i, "for item")
        decTreeI = dtree.Tree(dtree.Node(None, 1), NUM_OF_FACTORS, MAX_DEPTH)
        decTreeI.fitTree_I(decTreeI.root, opinion_matrix_I, rating_matrix,
                           user_vectors, NUM_OF_FACTORS)
        print("print item tree ", i)
        decTreeI.printtree(decTreeI.root)
        print("Getting the item vectors from tree")
        item_vectors_before = item_vectors
        item_vectors = decTreeI.getVectors_f(opinion_matrix_I, NUM_OF_FACTORS)
        for index in range(len(rating_matrix[0])):
            indice = np.array([index])
            item_vectors[index] = opt.cf_item(rating_matrix, user_vectors,
                                              item_vectors[index], indice,
                                              NUM_OF_FACTORS)

        # Calculate Error for Convergence check
        Pred_before = np.dot(user_vectors_before, item_vectors_before.T)
        Pred = np.dot(user_vectors, item_vectors.T)
        Error = Pred_before - Pred
        Error = Error.flatten()
        error = np.dot(Error, Error)
        if error < 0.1:
            break
        i = i + 1

    return decTree, decTreeI, user_vectors, item_vectors
Ejemplo n.º 2
0
    def fitTree_I(self, current_node, opinion_matrix, rating_matrix,
                  user_vectors, K):
        # rating_matrix only consists of rows which are users corresponding to the current Node
        # Check if the maxDepth is reached
        t1 = time.time()
        if current_node.depth + 1 > self.max_depth:
            return
        print("current depth of the tree", current_node.depth)
        if len(rating_matrix) == 0:
            return

        # Calulate the Error Before the Split
        print("Calculate error")
        error_before = opt.lossfunction_all(rating_matrix, current_node.vector,
                                            user_vectors, 0)
        print("Error Before: ", error_before)
        # Create a numy_array to hold the split_criteria Values
        NUMBER_OF_BIN = 5
        params = {}
        # pool = mp.Pool()
        count = 0
        feature_splitpoint_matrix = []
        for feature_index in range(len(opinion_matrix[0])):
            split_points = self.find_split_point(opinion_matrix, feature_index,
                                                 NUMBER_OF_BIN)
            feature_splitpoint_matrix.append(split_points)
            for split_point in split_points:
                (indices_like, indices_dislike,
                 indices_unknown) = split(opinion_matrix, feature_index,
                                          split_point)
                # Split the rating_matrix into like, dislike and unknown
                params[count] = []
                params[count].extend(
                    (rating_matrix, user_vectors, current_node.vector,
                     indices_like, indices_dislike, indices_unknown, K))
                count += 1

        # Calculate the split criteria value
        print("Calculating the split criteria value")

        results = []
        params_index = 0
        for feature_index in range(len(opinion_matrix[0])):
            # result = pool.apply_async(opt.cal_splitvalue, params[feature_index])
            print("feature_index", feature_index)
            # t1 = time.time()
            temp = []
            for split_point in feature_splitpoint_matrix[feature_index]:
                print("split_point", split_point)
                result = opt.cal_splitvalueI(
                    params[params_index][0], params[params_index][1],
                    params[params_index][2], params[params_index][3],
                    params[params_index][4], params[params_index][5],
                    params[params_index][6])
                params_index += 1
                temp.append(result)
            results.append(temp)
            # t2 = time.time()
            # print("Time used to calculate the feature:", t2 - t1)

        #for feature_index in range(len(opinion_matrix[0])):
        # split_values[feature_index] = results[feature_index].get()
        #   split_values[feature_index] = results[feature_index]
        # pool.close()
        # pool.join()
        #results = np.array(results)
        temp_value = []
        temp_index = []

        for i in range(len(opinion_matrix[0])):
            temp_value.append(min(results[i]))
            temp_index.append(results[i].index(min(results[i])))
        bestFeature = temp_value.index(min(temp_value))
        best_split_point = feature_splitpoint_matrix[bestFeature][
            temp_index[bestFeature]]

        print("bestFeature index: ", bestFeature)
        print("Split point:", best_split_point)
        t2 = time.time()
        print("Time used to create the layer: ", t2 - t1)

        # Store the feature_index for the current_node
        current_node.feature_index = bestFeature
        current_node.split_point = best_split_point

        # Split the rating_matrix into like, dislike and unknown
        (indices_like, indices_dislike,
         indices_unknown) = split(opinion_matrix, bestFeature,
                                  best_split_point)
        split_value = opt.cal_splitvalueI(rating_matrix, user_vectors,
                                          current_node.vector, indices_like,
                                          indices_dislike, indices_unknown, K)
        like = rating_matrix[:, indices_like]
        like_op = opinion_matrix[indices_like]

        dislike = rating_matrix[:, indices_dislike]
        dislike_op = opinion_matrix[indices_dislike]

        unknown = rating_matrix[:, indices_unknown]
        unknown_op = opinion_matrix[indices_unknown]

        # Calculate the User Profile Vector for each of the three classes
        # print "optimizing like, dislike and unknown..."

        # Calculate the User Profile Vector for each of the three classes
        like_vector = current_node.vector
        dislike_vector = current_node.vector
        unknown_vector = current_node.vector

        if len(indices_like) > 0:
            like_vector = opt.cf_item(rating_matrix, user_vectors,
                                      current_node.vector, indices_like, K)
        if len(indices_dislike) > 0:
            dislike_vector = opt.cf_item(rating_matrix, user_vectors,
                                         current_node.vector, indices_dislike,
                                         K)
        if len(indices_unknown) > 0:
            unknown_vector = opt.cf_item(rating_matrix, user_vectors,
                                         current_node.vector, indices_unknown,
                                         K)

        # CONDITION check condition RMSE Error check is CORRECT
        if split_value < error_before:
            # Recursively call the fitTree_f function for like, dislike and unknown Nodes creation
            current_node.like = Node(current_node, current_node.depth + 1)
            current_node.like.vector = like_vector
            if len(like_op) != 0:
                self.fitTree_I(current_node.like, like_op, like, user_vectors,
                               K)

            current_node.dislike = Node(current_node, current_node.depth + 1)
            current_node.dislike.vector = dislike_vector
            if len(dislike_op) != 0:
                self.fitTree_I(current_node.dislike, dislike_op, dislike,
                               user_vectors, K)

            current_node.unknown = Node(current_node, current_node.depth + 1)
            current_node.unknown.vector = unknown_vector
            if len(unknown_op) != 0:
                self.fitTree_I(current_node.unknown, unknown_op, unknown,
                               user_vectors, K)
        else:
            print("can't spilt")