Esempio n. 1
0
def test_run():
    ###### Test updating F, G, S in that order
    # Test whether a single iteration gives the correct behaviour, updating F, G, S in that order
    R = numpy.array([[1,2],[3,4]],dtype='f')
    M = numpy.array([[1,1],[0,1]])
    K = 3
    L = 1
    
    F = numpy.array([[1,2,3],[4,5,6]],dtype='f')
    S = numpy.array([[7],[8],[9]],dtype='f')
    G = numpy.array([[10],[11]],dtype='f')
    #FSG = numpy.array([[500,550],[1220,1342]],dtype='f')
    #FS = numpy.array([[50],[122]],dtype='f')
    #SG = numpy.array([[70,77],[80,88],[90,99]],dtype='f')
    
    nmtf = NMTF(R,M,K,L)
    
    # Check we get an Exception if W, H are undefined
    with pytest.raises(AssertionError) as error:
        nmtf.run(0)
    assert str(error.value) == "F, S and G have not been initialised - please run NMTF.initialise() first."       
    
    nmtf.F = numpy.copy(F)
    nmtf.S = numpy.copy(S)
    nmtf.G = numpy.copy(G) 
    
    nmtf.run(1)
Esempio n. 2
0
def test_compute_statistics():
    R = numpy.array([[1,2],[3,4]],dtype=float)
    M = numpy.array([[1,1],[0,1]])
    (I,J,K,L) = 2, 2, 3, 4
    
    nmtf = NMTF(R,M,K,L)
    
    R_pred = numpy.array([[500,550],[1220,1342]],dtype=float)
    M_pred = numpy.array([[0,0],[1,1]])
    
    MSE_pred = (1217**2 + 1338**2) / 2.0
    R2_pred = 1. - (1217**2+1338**2)/(0.5**2+0.5**2) #mean=3.5
    Rp_pred = 61. / ( math.sqrt(.5) * math.sqrt(7442.) ) #mean=3.5,var=0.5,mean_pred=1281,var_pred=7442,cov=61
    
    assert MSE_pred == nmtf.compute_MSE(M_pred,R,R_pred)
    assert R2_pred == nmtf.compute_R2(M_pred,R,R_pred)
    assert Rp_pred == nmtf.compute_Rp(M_pred,R,R_pred)
Esempio n. 3
0
def test_predict():
    R = numpy.array([[1, 2], [3, 4]], dtype=float)
    M = numpy.array([[1, 1], [0, 1]])
    (I, J, K, L) = (2, 2, 3, 1)

    F = numpy.array([[1, 2, 3], [4, 5, 6]])
    S = numpy.array([[7], [8], [9]])
    G = numpy.array([[10], [11]])
    #R_predicted = numpy.array([[500,550],[1220,1342]],dtype=float)

    nmtf = NMTF(R, M, K, L)
    nmtf.F = F
    nmtf.S = S
    nmtf.G = G
    performances = nmtf.predict(M)

    MSE = (499 * 499 + 548 * 548 + 1338 * 1338) / 3.0
    R2 = 1 - (499**2 + 548**2 + 1338**2) / (
        (4.0 / 3.0)**2 + (1.0 / 3.0)**2 + (5.0 / 3.0)**2)  #mean=7.0/3.0
    #mean_real=7.0/3.0,mean_pred=2392.0/3.0 -> diff_real=[-4.0/3.0,-1.0/3.0,5.0/3.0],diff_pred=[-892.0/3.0,-742.0/3.0,1634.0/3.0]
    Rp = ((-4.0 / 3.0 * -892.0 / 3.0) + (-1.0 / 3.0 * -742.0 / 3.0) +
          (5.0 / 3.0 * 1634.0 / 3.0)) / (math.sqrt((-4.0 / 3.0)**2 +
                                                   (-1.0 / 3.0)**2 +
                                                   (5.0 / 3.0)**2) *
                                         math.sqrt((-892.0 / 3.0)**2 +
                                                   (-742.0 / 3.0)**2 +
                                                   (1634.0 / 3.0)**2))

    assert performances['MSE'] == MSE
    assert abs(performances['R^2'] - R2) < 0.000000001
    assert abs(performances['Rp'] - Rp) < 0.000000001
Esempio n. 4
0
def test_init():
    # Test getting an exception when R and M are different sizes, and when R is not a 2D array
    R1 = numpy.ones(3)
    M = numpy.ones((2, 3))
    K = 0
    L = 0
    with pytest.raises(AssertionError) as error:
        NMTF(R1, M, K, L)
    assert str(
        error.value
    ) == "Input matrix R is not a two-dimensional array, but instead 1-dimensional."

    R2 = numpy.ones((4, 3, 2))
    with pytest.raises(AssertionError) as error:
        NMTF(R2, M, K, L)
    assert str(
        error.value
    ) == "Input matrix R is not a two-dimensional array, but instead 3-dimensional."

    R3 = numpy.ones((3, 2))
    with pytest.raises(AssertionError) as error:
        NMTF(R3, M, K, L)
    assert str(
        error.value
    ) == "Input matrix R is not of the same size as the indicator matrix M: (3, 2) and (2, 3) respectively."

    # Test getting an exception if a row or column is entirely unknown
    R = numpy.ones((2, 3))
    M1 = [[1, 1, 1], [0, 0, 0]]
    M2 = [[1, 1, 0], [1, 0, 0]]

    with pytest.raises(AssertionError) as error:
        NMTF(R, M1, K, L)
    assert str(error.value) == "Fully unobserved row in R, row 1."
    with pytest.raises(AssertionError) as error:
        NMTF(R, M2, K, L)
    assert str(error.value) == "Fully unobserved column in R, column 2."

    # Test whether we made a copy of R with 1's at unknown values
    I, J = 2, 4
    R = [[1, 2, 0, 4], [5, 0, 7, 0]]
    M = [[1, 1, 0, 1], [1, 0, 1, 0]]
    K = 2
    L = 3
    R_excl_unknown = [[1, 2, 1, 4], [5, 1, 7, 1]]

    nmtf = NMTF(R, M, K, L)
    assert numpy.array_equal(R, nmtf.R)
    assert numpy.array_equal(M, nmtf.M)
    assert nmtf.I == I
    assert nmtf.J == J
    assert nmtf.K == K
    assert nmtf.L == L
    assert numpy.array_equal(R_excl_unknown, nmtf.R_excl_unknown)
Esempio n. 5
0
def test_initialisation():
    I,J = 2,3
    R = numpy.ones((I,J))
    M = numpy.ones((I,J))
    K = 4
    L = 5
    
    # Init FG ones, S random
    init_FG = 'ones'
    init_S = 'random'
    nmtf = NMTF(R,M,K,L)
    nmtf.initialise(init_S,init_FG)
    
    assert numpy.array_equal(numpy.ones((I,K)),nmtf.F)
    assert numpy.array_equal(numpy.ones((J,L)),nmtf.G)
    for (k,l) in itertools.product(range(0,K),range(0,L)):
        assert nmtf.S[k,l] > 0 and nmtf.S[k,l] < 1
    
    # Init FG random, S ones
    init_FG = 'random'
    init_S = 'ones'
    nmtf = NMTF(R,M,K,L)
    nmtf.initialise(init_S,init_FG)
    
    assert numpy.array_equal(numpy.ones((K,L)),nmtf.S)
    for (i,k) in itertools.product(range(0,I),range(0,K)):
        assert nmtf.F[i,k] > 0 and nmtf.F[i,k] < 1
    for (j,l) in itertools.product(range(0,J),range(0,L)):
        assert nmtf.G[j,k] > 0 and nmtf.G[j,k] < 1    
        
    # Init FG kmeans, S exponential
    init_FG = 'kmeans'
    init_S = 'exponential'
    nmtf = NMTF(R,M,K,L)
    nmtf.initialise(init_S,init_FG)
    
    for (i,k) in itertools.product(range(0,I),range(0,K)):
        assert nmtf.F[i,k] == 0.2 or nmtf.F[i,k] == 1.2
    for (j,l) in itertools.product(range(0,J),range(0,L)):
        assert nmtf.G[j,k] == 0.2 or nmtf.G[j,k] == 1.2   
    for (k,l) in itertools.product(range(0,K),range(0,L)):
        assert nmtf.S[k,l] > 0
Esempio n. 6
0
def test_compute_I_div():
    R = numpy.array([[1,2],[3,4]],dtype=float)
    M = numpy.array([[1,1],[0,1]])
    (I,J,K,L) = (2,2,3,1)
    
    F = numpy.array([[1,2,3],[4,5,6]])
    S = numpy.array([[7],[8],[9]])
    G = numpy.array([[10],[11]])
    #R_predicted = numpy.array([[500,550],[1220,1342]],dtype=float)    
    
    nmtf = NMTF(R,M,K,L)
    nmtf.F = F
    nmtf.S = S
    nmtf.G = G
    
    expected_I_div = sum([
        1.0*math.log(1.0/500.0) - 1.0 + 500.0,
        2.0*math.log(2.0/550.0) - 2.0 + 550.0,
        4.0*math.log(4.0/1342.0) - 4.0 + 1342.0
    ])
    
    nmtf = NMTF(R,M,K,L)
    nmtf.F = F
    nmtf.S = S
    nmtf.G = G
    
    I_div = nmtf.compute_I_div()    
    assert I_div == expected_I_div
Esempio n. 7
0
def test_updates():
    R = numpy.array([[1,2],[3,4]],dtype='f')
    M = numpy.array([[1,1],[0,1]])
    I,J,K,L = 2,2,3,1
    
    F = numpy.array([[1,2,3],[4,5,6]],dtype='f')
    S = numpy.array([[7],[8],[9]],dtype='f')
    G = numpy.array([[10],[11]],dtype='f')
    FSG = numpy.array([[500,550],[1220,1342]],dtype='f')
    FS = numpy.array([[50],[122]],dtype='f')
    SG = numpy.array([[70,77],[80,88],[90,99]],dtype='f')
    
    F_updated = [
        [
            F[0][0] * ( R[0][0]*SG[0][0]/FSG[0][0] + R[0][1]*SG[0][1]/FSG[0][1] ) / ( SG[0][0]+SG[0][1] ),
            F[0][1] * ( R[0][0]*SG[1][0]/FSG[0][0] + R[0][1]*SG[1][1]/FSG[0][1] ) / ( SG[1][0]+SG[1][1] ),
            F[0][2] * ( R[0][0]*SG[2][0]/FSG[0][0] + R[0][1]*SG[2][1]/FSG[0][1] ) / ( SG[2][0]+SG[2][1] )
        ],    
        [
            F[1][0] * R[1][1]/FSG[1][1],
            F[1][1] * R[1][1]/FSG[1][1],
            F[1][2] * R[1][1]/FSG[1][1]
        ]
    ]
    G_updated = [
        [
            G[0][0] * R[0][0]/FSG[0][0]       
        ],
        [
            G[1][0] * ( R[0][1]*FS[0][0]/FSG[0][1] + R[1][1]*FS[1][0]/FSG[1][1] ) / ( FS[0][0]+FS[1][0] )
        ]
    ]
    S_updated = [
        [
            S[0][0] * ( R[0][0]*F[0][0]*G[0][0]/FSG[0][0] + R[0][1]*F[0][0]*G[1][0]/FSG[0][1] + R[1][1]*F[1][0]*G[1][0]/FSG[1][1] ) / ( F[0][0]*G[0][0]+F[0][0]*G[1][0]+F[1][0]*G[1][0] ) 
        ],
        [
            S[1][0] * ( R[0][0]*F[0][1]*G[0][0]/FSG[0][0] + R[0][1]*F[0][1]*G[1][0]/FSG[0][1] + R[1][1]*F[1][1]*G[1][0]/FSG[1][1] ) / ( F[0][1]*G[0][0]+F[0][1]*G[1][0]+F[1][1]*G[1][0] )  
        ],
        [
            S[2][0] * ( R[0][0]*F[0][2]*G[0][0]/FSG[0][0] + R[0][1]*F[0][2]*G[1][0]/FSG[0][1] + R[1][1]*F[1][2]*G[1][0]/FSG[1][1] ) / ( F[0][2]*G[0][0]+F[0][2]*G[1][0]+F[1][2]*G[1][0] )  
        ]
    ]    
    
    nmtf = NMTF(R,M,K,L)
    def reset():
        nmtf.F = numpy.copy(F)
        nmtf.S = numpy.copy(S)
        nmtf.G = numpy.copy(G)
    
    print F[0][0], ( R[0][0]*SG[0][0]/FSG[0][0] + R[0][1]*SG[0][1]/FSG[0][1] ), ( SG[0][0]+SG[0][1] ) 
    print F[1][0], R[1][1]/FSG[1][1]*SG[0][1], SG[0][1]
    
    # Test F
    for k in range(0,K):
        reset()
        nmtf.update_F(k)
        for i in range(0,I):
            print i,k
            assert abs(F_updated[i][k] - nmtf.F[i,k]) < 0.000000001 
            
    # Test G
    for l in range(0,L):
        reset()
        nmtf.update_G(l)
        for j in range(0,J):
            assert abs(G_updated[j][l] - nmtf.G[j,l]) < 0.000000001 
    
    # Test S
    def test_S(k,l):
        reset()
        nmtf.update_S(k,l)
        assert abs(S_updated[k][l] - nmtf.S[k,l]) < 0.00000001
    test_S(0,0)
    test_S(1,0)
    test_S(2,0)
Esempio n. 8
0
# Load in data
R = numpy.loadtxt(input_folder + "R.txt")
M = numpy.ones((I, J))

# Run the VB algorithm, <repeats> times
times_repeats = []
performances_repeats = []
for i in range(0, repeats):
    # Set all the seeds
    numpy.random.seed(3)
    random.seed(4)
    scipy.random.seed(5)

    # Run the classifier
    nmtf = NMTF(R, M, K, L)
    nmtf.initialise(init_S, init_FG, expo_prior)
    nmtf.run(iterations)

    # Extract the performances and timestamps across all iterations
    times_repeats.append(nmtf.all_times)
    performances_repeats.append(nmtf.all_performances)

# Check whether seed worked: all performances should be the same
assert all([numpy.array_equal(performances, performances_repeats[0]) for performances in performances_repeats]), \
    "Seed went wrong - performances not the same across repeats!"

# Print out the performances, and the average times
all_times_average = list(numpy.average(times_repeats, axis=0))
all_performances = performances_repeats[0]
print "np_all_times_average = %s" % all_times_average
Esempio n. 9
0
# We now run the VB algorithm on each of the M's for each fraction.
all_performances = {metric: [] for metric in metrics}
average_performances = {metric: []
                        for metric in metrics}  # averaged over repeats
for (fraction, Ms, Ms_test) in zip(fractions_unknown, all_Ms, all_Ms_test):
    print "Trying fraction %s." % fraction

    # Run the algorithm <repeats> times and store all the performances
    for metric in metrics:
        all_performances[metric].append([])
    for (repeat, M, M_test) in zip(range(0, repeats), Ms, Ms_test):
        print "Repeat %s of fraction %s." % (repeat + 1, fraction)

        # Run the VB algorithm
        nmtf = NMTF(R, M, K, L)
        nmtf.initialise(init_S, init_FG)
        nmtf.run(iterations)

        # Measure the performances
        performances = nmtf.predict(M_test)
        for metric in metrics:
            # Add this metric's performance to the list of <repeat> performances for this fraction
            all_performances[metric][-1].append(performances[metric])

    # Compute the average across attempts
    for metric in metrics:
        average_performances[metric].append(
            sum(all_performances[metric][-1]) / repeats)