예제 #1
0
def bi_item_rating_pred(item_pair, train_mtx, pair_path, k, option):
    pair = pred_set.pred_pair(pair_path)
    item_sim_mtx = []
    pred_list = []
    if option == 1 or option == 2:
        item_sim_mtx = movie_sim.item_dot_sim(train_mtx)
    if option == 3 or option == 4:
        item_zero_vec = np.where(~train_mtx.any(axis=1))[0]
        train_mtx[[item_zero_vec], 0] = 0.001
        item_sim_mtx = movie_sim.item_cos_sim(train_mtx)

    for row in pair:
        pred_rating = 0
        movie_id = row[0]
        user_id = row[1]
        key = item_pair[movie_id]
        item_sim_list = item_sim_mtx[key]
        # top k+1 nearest neighbors
        item_knn_list = np.argsort(item_sim_list)[::-1][0: k]
        if option == 1 or option == 3:
            pred_rating = np.sum(np.take(train_mtx[:, user_id], item_knn_list.tolist())) / float(k) + 3
        if option == 2 or option == 4:
            item_knn_sim = item_sim_list[item_knn_list]
            if np.sum(item_knn_sim) != 0:
                weight = item_knn_sim / np.sum(item_knn_sim)
                pred_rating = np.sum(np.multiply(np.take(train_mtx[:, user_id], item_knn_list.tolist()), weight)) + 3
            else:
                pred_rating = 3.0
        pred_list.append(pred_rating)
    # output the result
    pred_result.file_writer(pred_list)
    return pred_list
def pcc_item_rating_pred(pair_path, k, option):
    pair = pred_set.pred_pair(pair_path)
    train_mtx = rating_matrix.matrix_transfer(2)
    item_zero_vec = np.where(~train_mtx.any(axis=0))[0]
    # add a bias to the all zero column vectors
    train_mtx[:, [item_zero_vec]] = 0.001
    pcc_mtx = np.transpose(train_mtx)
    # user rating standardization
    pcc_mtx = pcc_mtx - np.sum(pcc_mtx, axis=0) / len(pcc_mtx)
    pcc_mtx /= np.linalg.norm(pcc_mtx, axis=0)
    pcc_mtx = np.transpose(pcc_mtx)
    item_sim_mtx = []
    pred_list = []
    if option == 1 or option == 2:
        item_sim_mtx = movie_sim.item_dot_sim(pcc_mtx)
    if option == 3 or option == 4:
        train_mtx[:, [item_zero_vec]] = 0.001
        item_sim_mtx = movie_sim.item_cos_sim(pcc_mtx)

    for row in pair:
        pred_rating = 0
        movie_id = row[0]
        user_id = row[1]
        item_sim_list = item_sim_mtx[movie_id]
        # top k+1 nearest neighbors
        item_knn_list = np.argsort(item_sim_list)[::-1][0:k + 1]
        if movie_id in item_knn_list:
            position = np.where(item_knn_list == movie_id)
            item_knn_list = np.delete(item_knn_list, position)
        else:
            item_knn_list = np.delete(item_knn_list, len(item_knn_list) - 1)

        if option == 1 or option == 3:
            pred_rating = np.sum(
                np.take(train_mtx[:, user_id],
                        item_knn_list.tolist())) / float(k) + 3
        if option == 2 or option == 4:
            item_knn_sim = item_sim_list[item_knn_list]
            if np.sum(item_knn_sim) != 0:
                weight = item_knn_sim / np.sum(item_knn_sim)
                pred_rating = np.sum(
                    np.multiply(
                        np.take(train_mtx[:, user_id], item_knn_list.tolist()),
                        weight)) + 3
            else:
                pred_rating = 3.0
        pred_list.append(pred_rating)
    # output the result
    pred_result.file_writer(pred_list)
    return pred_list
def user_rating_pred(pair_path, k, option):
    pair = pred_set.pred_pair(pair_path)
    train_mtx = rating_matrix.matrix_transfer(2)
    user_sim_mtx = []
    pred_list = []
    user_zero_vec = np.where(~train_mtx.any(axis=0))[0]
    if option == 1 or option == 2:
        user_sim_mtx = user_sim.user_dot_sim(train_mtx)
    if option == 3 or option == 4:
        # add a bias to the all zero column vectors
        train_mtx[0, [user_zero_vec]] = 0.001
        user_sim_mtx = user_sim.user_cos_sim(train_mtx)

    # TODO: weighted mean need refine
    for row in pair:
        pred_rating = 0
        movie_id = row[0]
        user_id = row[1]
        user_sim_list = user_sim_mtx[user_id]
        # top k+1 nearest neighbors
        user_knn_list = np.argsort(user_sim_list)[::-1][0:k + 1]
        # TODO: if two sim equals, small user_id comes first
        if user_id in user_knn_list:
            position = np.where(user_knn_list == user_id)
            user_knn_list = np.delete(user_knn_list, position)
        else:
            user_knn_list = np.delete(user_knn_list, len(user_knn_list) - 1)

        if option == 1 or option == 3:
            pred_rating = np.sum(
                np.take(train_mtx[movie_id, :],
                        user_knn_list.tolist())) / float(k) + 3
        # TODO: problem exists, what if weighted sum is zero
        if option == 2 or option == 4:
            user_knn_sim = user_sim_list[user_knn_list]
            if np.sum(user_knn_sim) != 0:
                weight = user_knn_sim / np.sum(user_knn_sim)
                pred_rating = np.sum(
                    np.multiply(
                        np.take(train_mtx[movie_id, :],
                                user_knn_list.tolist()), weight)) + 3
            else:
                pred_rating = np.sum(train_mtx[movie_id, :]) / np.size(
                    np.nonzero(train_mtx[movie_id, :])) + 3

        pred_list.append(pred_rating)
    # output the result
    pred_result.file_writer(pred_list)
    return pred_list
def pcc_item_rating_pred(pair_path, k, option):
    pair = pred_set.pred_pair(pair_path)
    train_mtx = rating_matrix.matrix_transfer(2)
    item_zero_vec = np.where(~train_mtx.any(axis=0))[0]
    # add a bias to the all zero column vectors
    train_mtx[:, [item_zero_vec]] = 0.001
    pcc_mtx = np.transpose(train_mtx)
    # user rating standardization
    pcc_mtx = pcc_mtx - np.sum(pcc_mtx, axis=0) / len(pcc_mtx)
    pcc_mtx /= np.linalg.norm(pcc_mtx, axis=0)
    pcc_mtx = np.transpose(pcc_mtx)
    item_sim_mtx = []
    pred_list = []
    if option == 1 or option == 2:
        item_sim_mtx = movie_sim.item_dot_sim(pcc_mtx)
    if option == 3 or option == 4:
        train_mtx[:, [item_zero_vec]] = 0.001
        item_sim_mtx = movie_sim.item_cos_sim(pcc_mtx)

    for row in pair:
        pred_rating = 0
        movie_id = row[0]
        user_id = row[1]
        item_sim_list = item_sim_mtx[movie_id]
        # top k+1 nearest neighbors
        item_knn_list = np.argsort(item_sim_list)[::-1][0: k+1]
        if movie_id in item_knn_list:
            position = np.where(item_knn_list == movie_id)
            item_knn_list = np.delete(item_knn_list, position)
        else:
            item_knn_list = np.delete(item_knn_list, len(item_knn_list) - 1)

        if option == 1 or option == 3:
            pred_rating = np.sum(np.take(train_mtx[:, user_id], item_knn_list.tolist())) / float(k) + 3
        if option == 2 or option == 4:
            item_knn_sim = item_sim_list[item_knn_list]
            if np.sum(item_knn_sim) != 0:
                weight = item_knn_sim / np.sum(item_knn_sim)
                pred_rating = np.sum(np.multiply(np.take(train_mtx[:, user_id], item_knn_list.tolist()), weight)) + 3
            else:
                pred_rating = 3.0
        pred_list.append(pred_rating)
    # output the result
    pred_result.file_writer(pred_list)
    return pred_list
예제 #5
0
def item_rating_pred(pair_path, k, option):
    pair = pred_set.pred_pair(pair_path)
    train_mtx = rating_matrix.matrix_transfer(2)
    item_zero_vec = np.where(~train_mtx.any(axis=0))[0]
    item_sim_mtx = []
    pred_list = []
    if option == 1 or option == 2:
        item_sim_mtx = movie_sim.item_dot_sim(train_mtx)
    if option == 3 or option == 4:
        train_mtx[:, [item_zero_vec]] = 0.001
        item_sim_mtx = movie_sim.item_cos_sim(train_mtx)

    for row in pair:
        pred_rating = 0
        movie_id = row[0]
        user_id = row[1]
        item_sim_list = item_sim_mtx[movie_id]
        # top k+1 nearest neighbors
        item_knn_list = np.argsort(item_sim_list)[::-1][0:k + 1]
        if movie_id in item_knn_list:
            position = np.where(item_knn_list == movie_id)
            item_knn_list = np.delete(item_knn_list, position)
        else:
            item_knn_list = np.delete(item_knn_list, len(item_knn_list) - 1)

        if option == 1 or option == 3:
            pred_rating = np.sum(
                np.take(train_mtx[:, user_id],
                        item_knn_list.tolist())) / float(k) + 3
        if option == 2 or option == 4:
            item_knn_sim = item_sim_list[item_knn_list]
            if np.sum(item_knn_sim) != 0:
                weight = item_knn_sim / np.sum(item_knn_sim)
                pred_rating = np.sum(
                    np.multiply(
                        np.take(train_mtx[:, user_id], item_knn_list.tolist()),
                        weight)) + 3
            else:
                pred_rating = np.sum(train_mtx[movie_id, :]) / np.size(
                    np.nonzero(train_mtx[movie_id, :])) + 3
        pred_list.append(pred_rating)
    # output the result
    pred_result.file_writer(pred_list)
    return pred_list
def user_rating_pred(pair_path, k, option):
    pair = pred_set.pred_pair(pair_path)
    train_mtx = rating_matrix.matrix_transfer(2)
    user_sim_mtx = []
    pred_list = []
    user_zero_vec = np.where(~train_mtx.any(axis=0))[0]
    if option == 1 or option == 2:
        user_sim_mtx = user_sim.user_dot_sim(train_mtx)
    if option == 3 or option == 4:
        # add a bias to the all zero column vectors
        train_mtx[0, [user_zero_vec]] = 0.001
        user_sim_mtx = user_sim.user_cos_sim(train_mtx)

    # TODO: weighted mean need refine
    for row in pair:
        pred_rating = 0
        movie_id = row[0]
        user_id = row[1]
        user_sim_list = user_sim_mtx[user_id]
        # top k+1 nearest neighbors
        user_knn_list = np.argsort(user_sim_list)[::-1][0: k+1]
        # TODO: if two sim equals, small user_id comes first
        if user_id in user_knn_list:
            position = np.where(user_knn_list == user_id)
            user_knn_list = np.delete(user_knn_list, position)
        else:
            user_knn_list = np.delete(user_knn_list, len(user_knn_list) - 1)

        if option == 1 or option == 3:
            pred_rating = np.sum(np.take(train_mtx[movie_id, :], user_knn_list.tolist())) / float(k) + 3
        # TODO: problem exists, what if weighted sum is zero
        if option == 2 or option == 4:
            user_knn_sim = user_sim_list[user_knn_list]
            if np.sum(user_knn_sim) != 0:
                weight = user_knn_sim / np.sum(user_knn_sim)
                pred_rating = np.sum(np.multiply(np.take(train_mtx[movie_id, :], user_knn_list.tolist()), weight)) + 3
            else:
                pred_rating = np.sum(train_mtx[movie_id, :]) / np.size(np.nonzero(train_mtx[movie_id, :])) + 3

        pred_list.append(pred_rating)
    # output the result
    pred_result.file_writer(pred_list)
    return pred_list
def item_rating_pred(pair_path, k, option):
    pair = pred_set.pred_pair(pair_path)
    train_mtx = rating_matrix.matrix_transfer(2)
    item_zero_vec = np.where(~train_mtx.any(axis=0))[0]
    item_sim_mtx = []
    pred_list = []
    if option == 1 or option == 2:
        item_sim_mtx = movie_sim.item_dot_sim(train_mtx)
    if option == 3 or option == 4:
        train_mtx[:, [item_zero_vec]] = 0.001
        item_sim_mtx = movie_sim.item_cos_sim(train_mtx)

    for row in pair:
        pred_rating = 0
        movie_id = row[0]
        user_id = row[1]
        item_sim_list = item_sim_mtx[movie_id]
        # top k+1 nearest neighbors
        item_knn_list = np.argsort(item_sim_list)[::-1][0: k+1]
        if movie_id in item_knn_list:
            position = np.where(item_knn_list == movie_id)
            item_knn_list = np.delete(item_knn_list, position)
        else:
            item_knn_list = np.delete(item_knn_list, len(item_knn_list) - 1)

        if option == 1 or option == 3:
            pred_rating = np.sum(np.take(train_mtx[:, user_id], item_knn_list.tolist())) / float(k) + 3
        if option == 2 or option == 4:
            item_knn_sim = item_sim_list[item_knn_list]
            if np.sum(item_knn_sim) != 0:
                weight = item_knn_sim / np.sum(item_knn_sim)
                pred_rating = np.sum(np.multiply(np.take(train_mtx[:, user_id], item_knn_list.tolist()), weight)) + 3
            else:
                pred_rating = np.sum(train_mtx[movie_id, :]) / np.size(np.nonzero(train_mtx[movie_id, :])) + 3
        pred_list.append(pred_rating)
    # output the result
    pred_result.file_writer(pred_list)
    return pred_list
def pcc_user_rating_pred(pair_path, k, option):
    pair = pred_set.pred_pair(pair_path)
    train_mtx = rating_matrix.matrix_transfer(2)
    user_zero_vec = np.where(~train_mtx.any(axis=0))[0]
    # add a bias to the all zero column vectors
    train_mtx[:, [user_zero_vec]] = 0.001
    # user rating standardization
    pcc_mtx = train_mtx - np.sum(train_mtx, axis=0) / len(train_mtx)
    pcc_mtx /= np.linalg.norm(train_mtx, axis=0)
    user_sim_mtx = []
    pred_list = []
    if option == 1 or option == 2:
        user_sim_mtx = user_sim.user_dot_sim(pcc_mtx)
    if option == 3 or option == 4:
        user_sim_mtx = user_sim.user_cos_sim(pcc_mtx)

    # TODO: weighted mean need refine
    for row in pair:
        # pred_rating = 0
        movie_id = row[0]
        user_id = row[1]
        user_sim_list = user_sim_mtx[user_id]
        # top k+1 nearest neighbors
        user_knn_list = np.argsort(user_sim_list)[::-1][0:k + 1]
        # TODO: if two sim equals, small user_id comes first
        if user_id in user_knn_list:
            position = np.where(user_knn_list == user_id)
            user_knn_list = np.delete(user_knn_list, position)
        else:
            user_knn_list = np.delete(user_knn_list, len(user_knn_list) - 1)

        pred_rating = np.sum(
            np.take(train_mtx[movie_id, :],
                    user_knn_list.tolist())) / float(k) + 3
        pred_list.append(pred_rating)
    # output the result
    pred_result.file_writer(pred_list)
    return pred_list
예제 #9
0
def bi_user_rating_pred(user_pair, train_mtx, pair_path, k, option):
    pair = pred_set.pred_pair(pair_path)
    # train_mtx = rating_matrix.matrix_transfer(2)
    user_sim_mtx = []
    pred_list = []
    # user_zero_vec = np.where(~train_mtx.any(axis=0))[0]
    if option == 1 or option == 2:
        user_sim_mtx = user_sim.user_dot_sim(train_mtx)
    if option == 3 or option == 4:
        # add a bias to the all zero column vectors
        # train_mtx[:, [user_zero_vec]] = 0.001
        user_sim_mtx = user_sim.user_cos_sim(train_mtx)

    # TODO: weighted mean need refine
    for row in pair:
        pred_rating = 0
        movie_id = row[0]
        user_id = row[1]
        key = user_pair[user_id]
        user_sim_list = user_sim_mtx[key]
        # top k+1 nearest neighbors
        user_knn_list = np.argsort(user_sim_list)[::-1][0: k]
        if option == 1 or option == 3:
            pred_rating = np.sum(np.take(train_mtx[movie_id, :], user_knn_list.tolist())) / float(k) + 3
        # TODO: problem exists, what if weighted sum is zero
        if option == 2 or option == 4:
            user_knn_sim = user_sim_list[user_knn_list]
            if np.sum(user_knn_sim) != 0:
                weight = user_knn_sim / np.sum(user_knn_sim)
                pred_rating = np.sum(np.multiply(np.take(train_mtx[movie_id, :], user_knn_list.tolist()), weight)) + 3
            else:
                pred_rating = 3.0

        pred_list.append(pred_rating)
    # output the result
    pred_result.file_writer(pred_list)
    return pred_list
def pcc_user_rating_pred(pair_path, k, option):
    pair = pred_set.pred_pair(pair_path)
    train_mtx = rating_matrix.matrix_transfer(2)
    user_zero_vec = np.where(~train_mtx.any(axis=0))[0]
    # add a bias to the all zero column vectors
    train_mtx[:, [user_zero_vec]] = 0.001
    # user rating standardization
    pcc_mtx = train_mtx - np.sum(train_mtx, axis=0) / len(train_mtx)
    pcc_mtx /= np.linalg.norm(train_mtx, axis=0)
    user_sim_mtx = []
    pred_list = []
    if option == 1 or option == 2:
        user_sim_mtx = user_sim.user_dot_sim(pcc_mtx)
    if option == 3 or option == 4:
        user_sim_mtx = user_sim.user_cos_sim(pcc_mtx)

    # TODO: weighted mean need refine
    for row in pair:
        # pred_rating = 0
        movie_id = row[0]
        user_id = row[1]
        user_sim_list = user_sim_mtx[user_id]
        # top k+1 nearest neighbors
        user_knn_list = np.argsort(user_sim_list)[::-1][0: k+1]
        # TODO: if two sim equals, small user_id comes first
        if user_id in user_knn_list:
            position = np.where(user_knn_list == user_id)
            user_knn_list = np.delete(user_knn_list, position)
        else:
            user_knn_list = np.delete(user_knn_list, len(user_knn_list) - 1)

        pred_rating = np.sum(np.take(train_mtx[movie_id, :], user_knn_list.tolist())) / float(k) + 3
        pred_list.append(pred_rating)
    # output the result
    pred_result.file_writer(pred_list)
    return pred_list