Exemple #1
0
def test_matrix_factorization():
    print('mf test.............')
    since = time.time()

    # 潜在因子数
    K = 2
    # m x n のレーティング行列
    R = np.array([[5, 3, 0, 1], [4, 0, 0, 1], [1, 1, 0, 5], [1, 0, 0, 4],
                  [0, 1, 5, 4]])
    m, n = R.shape
    # m x K のユーザ行列P
    P = np.random.rand(m, K)
    # n x K のアイテム行列Q
    Q = np.random.rand(n, K)

    P_new, Q_new = mf.run(R,
                          P,
                          Q,
                          K,
                          steps=5000,
                          alpha=0.0002,
                          beta=0.2,
                          threshold=0.001)
    evaluate(P, Q, R, P_new, Q_new)
    assert False
Exemple #2
0
def test_movielens(load_dataset):
    print('mf test.............')

    # Load dataset
    df_rating = load_dataset
    # print(df_rating.head())

    m = max(np.unique(df_rating.userId))
    n = max(np.unique(df_rating.movieId))
    # m = len(np.unique(df_rating.userId))
    # n = len(np.unique(df_rating.movieId))

    print('m x n: {} x {}'.format(m, n))

    R = np.zeros([m, n])

    print(type(df_rating.userId[0]))
    print(type(df_rating.movieId[0]))
    print(R[df_rating.userId[0]][df_rating.movieId[0]])
    print(np.unique(df_rating.rating))
    print(sum(df_rating.rating.isnull()))

    for i, s in df_rating.iterrows():
        if s.rating == 0: continue

        u_id = int(s.userId)
        m_id = int(s.movieId)

        R[u_id - 1][m_id - 1] = s.rating

    print(R)

    # 潜在因子数
    K = 2
    # m x n のレーティング行列
    m, n = df_rating[['userId', 'movieId']].shape
    print(m, n)
    R = np.array([[5, 3, 0, 1], [4, 0, 0, 1], [1, 1, 0, 5], [1, 0, 0, 4],
                  [0, 1, 5, 4]])
    m, n = R.shape
    # m x K のユーザ行列P
    P = np.random.rand(m, K)
    # n x K のアイテム行列Q
    Q = np.random.rand(n, K)

    P_new, Q_new = mf.run(R,
                          P,
                          Q,
                          K,
                          steps=5000,
                          alpha=0.0002,
                          beta=0.2,
                          threshold=0.001)
    evaluate(P, Q, R, P_new, Q_new)

    assert False