Beispiel #1
0
def test_predict():
    (I, J, K) = (5, 3, 2)
    R = numpy.array(
        [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15]],
        dtype=float)
    M = numpy.ones((I, J))
    K = 3

    U = numpy.array([[125., 126.], [126., 126.], [126., 126.], [126., 126.],
                     [126., 126.]])
    V = numpy.array([[84., 84.], [84., 84.], [84., 84.]])

    M_test = numpy.array([[0, 0, 1], [0, 1, 0], [0, 0, 0], [1, 1, 0],
                          [0, 0,
                           0]])  #R->3,5,10,11, R_pred->21084,21168,21168,21168
    MSE = (444408561. + 447872569. + 447660964. + 447618649) / 4.
    R2 = 1. - (444408561. + 447872569. + 447660964. + 447618649) / (
        4.25**2 + 2.25**2 + 2.75**2 + 3.75**2)  #mean=7.25
    Rp = 357. / (
        math.sqrt(44.75) * math.sqrt(5292.)
    )  #mean=7.25,var=44.75, mean_pred=21147,var_pred=5292, corr=(-4.25*-63 + -2.25*21 + 2.75*21 + 3.75*21)

    nmf = NMF(R, M, K)
    nmf.U = U
    nmf.V = V
    performances = nmf.predict(M_test)

    assert performances['MSE'] == MSE
    assert performances['R^2'] == R2
    assert performances['Rp'] == Rp
Beispiel #2
0
def test_run():
    # Data generated from W = [[1,2],[3,4]], H = [[4,3],[2,1]]
    R = [[8, 5], [20, 13]]
    M = [[1, 1], [1, 0]]
    K = 2

    U = numpy.array([[10, 9], [8, 7]], dtype='f')  #2x2
    V = numpy.array([[6, 4], [5, 3]], dtype='f')  #2x2

    nmf = NMF(R, M, K)

    # Check we get an Exception if W, H are undefined
    with pytest.raises(AssertionError) as error:
        nmf.run(0)
    assert str(
        error.value
    ) == "U and V have not been initialised - please run NMF.initialise() first."

    # Then check for 1 iteration whether the updates work - heck just the first entry of U
    nmf.U = U
    nmf.V = V
    nmf.run(1)

    U_00 = 10 * (6 * 8 / 96.0 + 5 * 5 / 77.0) / (5.0 + 6.0)  #0.74970484061
    assert abs(U_00 - nmf.U[0][0]) < 0.000001
Beispiel #3
0
def test_compute_statistics():
    R = numpy.array([[1,2],[3,4]],dtype=float)
    M = numpy.array([[1,1],[0,1]])
    (I,J,K) = 2, 2, 3
    
    nmf = NMF(R,M,K)
    
    R_pred = numpy.array([[500,550],[1220,1342]],dtype=float)
    M_pred = numpy.array([[0,0],[1,1]])
    
    MSE_pred = (1217**2 + 1338**2) / 2.0
    R2_pred = 1. - (1217**2+1338**2)/(0.5**2+0.5**2) #mean=3.5
    Rp_pred = 61. / ( math.sqrt(.5) * math.sqrt(7442.) ) #mean=3.5,var=0.5,mean_pred=1281,var_pred=7442,cov=61
    
    assert MSE_pred == nmf.compute_MSE(M_pred,R,R_pred)
    assert R2_pred == nmf.compute_R2(M_pred,R,R_pred)
    assert Rp_pred == nmf.compute_Rp(M_pred,R,R_pred)
Beispiel #4
0
def test_compute_I_div():
    R = [[1, 2, 0, 4], [5, 0, 7, 0]]
    M = [[1, 1, 0, 1], [1, 0, 1, 0]]
    K = 2

    U = numpy.array([[1, 2], [3, 4]], dtype='f')  #2x2
    V = numpy.array([[5, 7, 9, 11], [6, 8, 10, 12]], dtype='f').T  #4x2
    #R_pred = [[17,23,29,35],[39,53,67,81]]

    expected_I_div = sum([
        1.0 * math.log(1.0 / 17.0) - 1.0 + 17.0,
        2.0 * math.log(2.0 / 23.0) - 2.0 + 23.0,
        4.0 * math.log(4.0 / 35.0) - 4.0 + 35.0,
        5.0 * math.log(5.0 / 39.0) - 5.0 + 39.0,
        7.0 * math.log(7.0 / 67.0) - 7.0 + 67.0
    ])

    nmf = NMF(R, M, K)
    nmf.U = U
    nmf.V = V

    I_div = nmf.compute_I_div()
    assert abs(I_div - expected_I_div) < 0.0000001
Beispiel #5
0
def test_init():
    # Test getting an exception when R and M are different sizes, and when R is not a 2D array
    R1 = numpy.ones(3)
    M = numpy.ones((2, 3))
    K = 0
    with pytest.raises(AssertionError) as error:
        NMF(R1, M, K)
    assert str(
        error.value
    ) == "Input matrix R is not a two-dimensional array, but instead 1-dimensional."

    R2 = numpy.ones((4, 3, 2))
    with pytest.raises(AssertionError) as error:
        NMF(R2, M, K)
    assert str(
        error.value
    ) == "Input matrix R is not a two-dimensional array, but instead 3-dimensional."

    R3 = numpy.ones((3, 2))
    with pytest.raises(AssertionError) as error:
        NMF(R3, M, K)
    assert str(
        error.value
    ) == "Input matrix R is not of the same size as the indicator matrix M: (3, 2) and (2, 3) respectively."

    # Test getting an exception if a row or column is entirely unknown
    R = numpy.ones((2, 3))
    M1 = [[1, 1, 1], [0, 0, 0]]
    M2 = [[1, 1, 0], [1, 0, 0]]

    with pytest.raises(AssertionError) as error:
        NMF(R, M1, K)
    assert str(error.value) == "Fully unobserved row in R, row 1."
    with pytest.raises(AssertionError) as error:
        NMF(R, M2, K)
    assert str(error.value) == "Fully unobserved column in R, column 2."

    # Test whether we made a copy of R with 1's at unknown values
    I, J = 2, 4
    R = [[1, 2, 0, 4], [5, 0, 7, 0]]
    M = [[1, 1, 0, 1], [1, 0, 1, 0]]
    K = 2
    R_excl_unknown = [[1, 2, 1, 4], [5, 1, 7, 1]]

    nmf = NMF(R, M, K)
    assert numpy.array_equal(R, nmf.R)
    assert numpy.array_equal(M, nmf.M)
    assert nmf.I == I
    assert nmf.J == J
    assert nmf.K == K
    assert numpy.array_equal(R_excl_unknown, nmf.R_excl_unknown)
Beispiel #6
0
def test_initialisation():
    I, J = 2, 3
    R = numpy.ones((I, J))
    M = numpy.ones((I, J))
    K = 4

    # Init ones
    init_UV = 'ones'
    nmf = NMF(R, M, K)
    nmf.initialise(init_UV)

    assert numpy.array_equal(numpy.ones((2, 4)), nmf.U)
    assert numpy.array_equal(numpy.ones((3, 4)), nmf.V)

    # Init random
    init_UV = 'random'
    nmf = NMF(R, M, K)
    nmf.initialise(init_UV)

    for (i, k) in itertools.product(range(0, I), range(0, K)):
        assert nmf.U[i, k] > 0 and nmf.U[i, k] < 1
    for (j, k) in itertools.product(range(0, J), range(0, K)):
        assert nmf.V[j, k] > 0 and nmf.V[j, k] < 1
Beispiel #7
0
def test_update():
    I, J = 2, 4
    R = [[1, 2, 0, 4], [5, 0, 7, 0]]
    M = [[1, 1, 0, 1], [1, 0, 1, 0]]
    K = 2

    U = numpy.array([[1, 2], [3, 4]], dtype='f')  #2x2
    V = numpy.array([[5, 6], [7, 8], [9, 10], [11, 12]], dtype='f')  #4x2

    new_U = [[
        1 * (1 * 5 / 17.0 + 2 * 7 / 23.0 + 4 * 11 / 35.0) / float(5 + 7 + 11),
        2 * (1 * 6 / 17.0 + 2 * 8 / 23.0 + 4 * 12 / 35.0) / float(6 + 8 + 12)
    ],
             [
                 3 * (5 * 5 / 39.0 + 7 * 9 / 67.0) / float(5 + 9),
                 4 * (5 * 6 / 39.0 + 7 * 10 / 67.0) / float(6 + 10)
             ]]

    new_V = [[
        5 * (1 * 1 / 17.0 + 3 * 5 / 39.0) / float(1 + 3),
        6 * (2 * 1 / 17.0 + 4 * 5 / 39.0) / float(2 + 4)
    ], [7 * (1 * 2 / 23.0) / float(1), 8 * (2 * 2 / 23.0) / float(2)],
             [9 * (3 * 7 / 67.0) / float(3), 10 * (4 * 7 / 67.0) / float(4)],
             [11 * (1 * 4 / 35.0) / float(1), 12 * (2 * 4 / 35.0) / float(2)]]

    nmf = NMF(R, M, K)

    def reset():
        nmf.U = numpy.copy(U)
        nmf.V = numpy.copy(V)

    for k in range(0, K):
        reset()
        nmf.update_U(k)

        for i in range(0, I):
            assert abs(new_U[i][k] - nmf.U[i, k]) < 0.00001

    for k in range(0, K):
        reset()
        nmf.update_V(k)
        for j in range(0, J):
            assert abs(new_V[j][k] - nmf.V[j, k]) < 0.00001

    # Also if I = J
    I, J, K = 2, 2, 3
    R = [[1, 2], [3, 4]]
    M = [[1, 1], [0, 1]]

    U = numpy.array([[1, 2, 3], [4, 5, 6]], dtype='f')  #2x3
    V = numpy.array([[7, 8, 9], [10, 11, 12]], dtype='f')  #2x3
    R_pred = numpy.array([[50, 68], [122, 167]], dtype='f')  #2x2

    nmf = NMF(R, M, K)

    def reset_2():
        nmf.U = numpy.copy(U)
        nmf.V = numpy.copy(V)

    for k in range(0, K):
        reset_2()
        nmf.update_U(k)
        for i in range(0, I):
            new_Uik = U[i][k] * sum( [V[j][k] * R[i][j] / R_pred[i,j] for j in range(0,J) if M[i][j] ]) \
                              / sum( [V[j][k] for j in range(0,J) if M[i][j] ])
            assert abs(new_Uik - nmf.U[i, k]) < 0.00001

    for k in range(0, K):
        reset_2()
        nmf.update_V(k)
        for j in range(0, J):
            new_Vjk = V[j][k] * sum( [U[i][k] * R[i][j] / R_pred[i,j] for i in range(0,I) if M[i][j] ]) \
                              / sum( [U[i][k] for i in range(0,I) if M[i][j] ])
            assert abs(new_Vjk - nmf.V[j, k]) < 0.00001
Beispiel #8
0
        check_empty_rows_columns(M, fraction)

# We now run the VB algorithm on each of the M's for each fraction.
all_performances = {metric: [] for metric in metrics}
average_performances = {metric: []
                        for metric in metrics}  # averaged over repeats
for (fraction, Ms, Ms_test) in zip(fractions_unknown, all_Ms, all_Ms_test):
    print "Trying fraction %s." % fraction

    # Run the algorithm <repeats> times and store all the performances
    for metric in metrics:
        all_performances[metric].append([])
    for (repeat, M, M_test) in zip(range(0, repeats), Ms, Ms_test):
        print "Repeat %s of fraction %s." % (repeat + 1, fraction)

        nmf = NMF(R, M, K)
        nmf.initialise(init_UV, expo_prior)
        nmf.run(iterations)

        # Measure the performances
        performances = nmf.predict(M_test)
        for metric in metrics:
            # Add this metric's performance to the list of <repeat> performances for this fraction
            all_performances[metric][-1].append(performances[metric])

    # Compute the average across attempts
    for metric in metrics:
        average_performances[metric].append(
            sum(all_performances[metric][-1]) / repeats)

Beispiel #9
0
# Load in data
R = numpy.loadtxt(input_folder + "R.txt")
M = numpy.ones((I, J))

# Run the VB algorithm, <repeats> times
times_repeats = []
performances_repeats = []
for i in range(0, repeats):
    # Set all the seeds
    numpy.random.seed(0)
    random.seed(0)
    scipy.random.seed(0)

    # Run the classifier
    nmf = NMF(R, M, K)
    nmf.initialise(init_UV, expo_prior)
    nmf.run(iterations)

    # Extract the performances and timestamps across all iterations
    times_repeats.append(nmf.all_times)
    performances_repeats.append(nmf.all_performances)

# Check whether seed worked: all performances should be the same
assert all([numpy.array_equal(performances, performances_repeats[0]) for performances in performances_repeats]), \
    "Seed went wrong - performances not the same across repeats!"

# Print out the performances, and the average times
all_times_average = list(numpy.average(times_repeats, axis=0))
all_performances = performances_repeats[0]
print "np_all_times_average = %s" % all_times_average