예제 #1
0
def test_initialise():
    I,J,K,L = 5,3,2,4
    R = numpy.ones((I,J))
    M = numpy.ones((I,J))
    
    lambdaF = 2*numpy.ones((I,K))
    lambdaS = 3*numpy.ones((K,L))
    lambdaG = 4*numpy.ones((J,L))
    alpha, beta = 3, 1
    priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF, 'lambdaS':lambdaS, 'lambdaG':lambdaG }
    
    # First do a random initialisation - we can then only check whether values are correctly initialised
    init_S = 'random'
    init_FG = 'random'
    NMTF = nmtf_icm(R,M,K,L,priors)
    NMTF.initialise(init_S,init_FG)
    
    assert NMTF.tau >= 0.0
    for i,k in itertools.product(xrange(0,I),xrange(0,K)):
        assert NMTF.F[i,k] >= 0.0
    for k,l in itertools.product(xrange(0,K),xrange(0,L)):
        assert NMTF.S[k,l] >= 0.0
    for j,l in itertools.product(xrange(0,J),xrange(0,L)):
        assert NMTF.G[j,l] >= 0.0
        
    # Initialisation of S using random draws from prior
    init_S, init_FG = 'random', 'exp'
    NMTF = nmtf_icm(R,M,K,L,priors)
    NMTF.initialise(init_S,init_FG)
    
    for i,k in itertools.product(xrange(0,I),xrange(0,K)):
        assert NMTF.F[i,k] == 1./lambdaF[i,k]
    for k,l in itertools.product(xrange(0,K),xrange(0,L)):
        assert NMTF.S[k,l] != 1./lambdaS[k,l] # test whether we overwrote the expectation
    for j,l in itertools.product(xrange(0,J),xrange(0,L)):
        assert NMTF.G[j,l] == 1./lambdaG[j,l]
    
    # Initialisation of F and G using random draws from prior
    init_S, init_FG = 'exp', 'random'
    NMTF = nmtf_icm(R,M,K,L,priors)
    NMTF.initialise(init_S,init_FG)
    
    for i,k in itertools.product(xrange(0,I),xrange(0,K)):
        assert NMTF.F[i,k] != 1./lambdaF[i,k] # test whether we overwrote the expectation
    for k,l in itertools.product(xrange(0,K),xrange(0,L)):
        assert NMTF.S[k,l] == 1./lambdaS[k,l]
    for j,l in itertools.product(xrange(0,J),xrange(0,L)):
        assert NMTF.G[j,l] != 1./lambdaG[j,l] # test whether we overwrote the expectation
        
    # Initialisation of F and G using Kmeans
    init_S, init_FG = 'exp', 'kmeans'
    NMTF = nmtf_icm(R,M,K,L,priors)
    NMTF.initialise(init_S,init_FG)
    
    for i,k in itertools.product(xrange(0,I),xrange(0,K)):
        assert NMTF.F[i,k] == 0.2 or NMTF.F[i,k] == 1.2
    for j,l in itertools.product(xrange(0,J),xrange(0,L)):
        assert NMTF.G[j,l] == 0.2 or NMTF.G[j,l] == 1.2
    for k,l in itertools.product(xrange(0,K),xrange(0,L)):
        assert NMTF.S[k,l] == 1./lambdaS[k,l]
예제 #2
0
def test_log_likelihood():
    R = numpy.array([[1,2],[3,4]],dtype=float)
    M = numpy.array([[1,1],[0,1]])
    I, J, K, L = 2, 2, 3, 4
    lambdaF = 2*numpy.ones((I,K))
    lambdaS = 3*numpy.ones((K,L))
    lambdaG = 4*numpy.ones((J,L))
    alpha, beta = 3, 1
    priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF, 'lambdaS':lambdaS, 'lambdaG':lambdaG }
    
    NMTF = nmtf_icm(R,M,K,L,priors)
    NMTF.F = numpy.ones((I,K))
    NMTF.S = 2*numpy.ones((K,L))
    NMTF.G = 3*numpy.ones((J,L))
    NMTF.tau = 3.
    # expU*expV.T = [[72.]]
    
    log_likelihood = 3./2.*(math.log(3)-math.log(2*math.pi)) - 3./2. * (71**2 + 70**2 + 68**2)
    AIC = -2*log_likelihood + 2*(2*3+3*4+2*4)
    BIC = -2*log_likelihood + (2*3+3*4+2*4)*math.log(3)
    MSE = (71**2+70**2+68**2)/3.
    
    assert log_likelihood == NMTF.quality('loglikelihood')
    assert AIC == NMTF.quality('AIC')
    assert BIC == NMTF.quality('BIC')
    assert MSE == NMTF.quality('MSE')
    with pytest.raises(AssertionError) as error:
        NMTF.quality('FAIL')
    assert str(error.value) == "Unrecognised metric for model quality: FAIL."
예제 #3
0
def test_predict():
    (I,J,K,L) = (5,3,2,4)
    F = numpy.array([[125.,126.],[126.,126.],[126.,126.],[126.,126.],[126.,126.]])
    S = numpy.array([[84.,84.,84.,84.],[84.,84.,84.,84.]])
    G = numpy.array([[42.,42.,42.,42.],[42.,42.,42.,42.],[42.,42.,42.,42.]])
    taus = [m**2 for m in range(1,10+1)]
    #R_pred = numpy.array([[ 3542112.,  3542112.,  3542112.],[ 3556224.,  3556224.,  3556224.],[ 3556224.,  3556224.,  3556224.],[ 3556224.,  3556224.,  3556224.],[ 3556224.,  3556224.,  3556224.]])
     
    R = numpy.array([[1,2,3],[4,5,6],[7,8,9],[10,11,12],[13,14,15]],dtype=float)
    M = numpy.ones((I,J))
    lambdaF = 2*numpy.ones((I,K))
    lambdaS = 3*numpy.ones((K,L))
    lambdaG = 5*numpy.ones((J,L))
    alpha, beta = 3, 1
    priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF, 'lambdaS':lambdaS, 'lambdaG':lambdaG }
    
    M_test = numpy.array([[0,0,1],[0,1,0],[0,0,0],[1,1,0],[0,0,0]]) #R->3,5,10,11, R_pred->3542112,3556224,3556224,3556224
    MSE = ((3.-3542112.)**2 + (5.-3556224.)**2 + (10.-3556224.)**2 + (11.-3556224.)**2) / 4.
    R2 = 1. - ((3.-3542112.)**2 + (5.-3556224.)**2 + (10.-3556224.)**2 + (11.-3556224.)**2) / (4.25**2+2.25**2+2.75**2+3.75**2) #mean=7.25
    Rp = 357. / ( math.sqrt(44.75) * math.sqrt(5292.) ) #mean=7.25,var=44.75, mean_pred=3552696,var_pred=5292, corr=(-4.25*-63 + -2.25*21 + 2.75*21 + 3.75*21)
    
    NMTF = nmtf_icm(R,M,K,L,priors)
    NMTF.F = F
    NMTF.S = S
    NMTF.G = G
    NMTF.all_tau = taus
    performances = NMTF.predict(M_test)
    
    assert performances['MSE'] == MSE
    assert performances['R^2'] == R2
    assert performances['Rp'] == Rp
예제 #4
0
def test_compute_statistics():
    R = numpy.array([[1, 2], [3, 4]], dtype=float)
    M = numpy.array([[1, 1], [0, 1]])
    I, J, K, L = 2, 2, 3, 4
    lambdaF = 2 * numpy.ones((I, K))
    lambdaS = 3 * numpy.ones((K, L))
    lambdaG = 4 * numpy.ones((J, L))
    alpha, beta = 3, 1
    priors = {
        'alpha': alpha,
        'beta': beta,
        'lambdaF': lambdaF,
        'lambdaS': lambdaS,
        'lambdaG': lambdaG
    }

    NMTF = nmtf_icm(R, M, K, L, priors)

    R_pred = numpy.array([[500, 550], [1220, 1342]], dtype=float)
    M_pred = numpy.array([[0, 0], [1, 1]])

    MSE_pred = (1217**2 + 1338**2) / 2.0
    R2_pred = 1. - (1217**2 + 1338**2) / (0.5**2 + 0.5**2)  #mean=3.5
    Rp_pred = 61. / (math.sqrt(.5) * math.sqrt(7442.)
                     )  #mean=3.5,var=0.5,mean_pred=1281,var_pred=7442,cov=61

    assert MSE_pred == NMTF.compute_MSE(M_pred, R, R_pred)
    assert R2_pred == NMTF.compute_R2(M_pred, R, R_pred)
    assert Rp_pred == NMTF.compute_Rp(M_pred, R, R_pred)
예제 #5
0
def test_run():
    I, J, K, L = 10, 5, 3, 2
    R = numpy.ones((I, J))
    M = numpy.ones((I, J))
    M[0, 0], M[2, 2], M[3, 1] = 0, 0, 0

    lambdaF = 2 * numpy.ones((I, K))
    lambdaS = 3 * numpy.ones((K, L))
    lambdaG = 4 * numpy.ones((J, L))
    alpha, beta = 3, 1
    priors = {
        'alpha': alpha,
        'beta': beta,
        'lambdaF': lambdaF,
        'lambdaS': lambdaS,
        'lambdaG': lambdaG
    }
    init = 'exp'  #F=1/2,S=1/3,G=1/4

    iterations = 15

    NMTF = nmtf_icm(R, M, K, L, priors)
    NMTF.initialise(init)
    NMTF.run(iterations)

    assert NMTF.all_tau.shape == (iterations, )
    assert NMTF.all_tau[1] != alpha / float(beta)
예제 #6
0
def test_beta_s():
    NMTF = nmtf_icm(R, M, K, L, priors)
    NMTF.initialise(init_S, init_FG)
    NMTF.tau = 3.
    beta_s = beta + .5 * (12 * (
        11. / 15.)**2)  #F*S = [[1/6+1/6=1/3,..]], F*S*G^T = [[1/15*4=4/15,..]]
    assert abs(NMTF.beta_s() - beta_s) < 0.00000000000001
예제 #7
0
def test_tauG():
    NMTF = nmtf_icm(R,M,K,L,priors)
    NMTF.initialise(init_S,init_FG)
    NMTF.tau = 3.
    # F*S = [[1/3]], (F*S)^2 = [[1/9]], sum_i F*S = [[4/9]]
    tauG = 3.*numpy.array([[4./9.,4./9.,4./9.,4./9.],[4./9.,4./9.,4./9.,4./9.],[4./9.,4./9.,4./9.,4./9.]])
    for j,l in itertools.product(xrange(0,J),xrange(0,L)):
        assert NMTF.tauG(l)[j] == tauG[j,l]
예제 #8
0
def test_tauS():
    NMTF = nmtf_icm(R,M,K,L,priors)
    NMTF.initialise(init_S,init_FG)
    NMTF.tau = 3.
    # F outer G = [[1/10]], (F outer G)^2 = [[1/100]], sum (F outer G)^2 = [[12/100]]
    tauS = 3.*numpy.array([[3./25.,3./25.,3./25.,3./25.],[3./25.,3./25.,3./25.,3./25.]])
    for k,l in itertools.product(xrange(0,K),xrange(0,L)):
        assert abs(NMTF.tauS(k,l) - tauS[k,l]) < 0.000000000000001
예제 #9
0
def test_tauF():
    NMTF = nmtf_icm(R,M,K,L,priors)
    NMTF.initialise(init_S,init_FG)
    NMTF.tau = 3.
    # S*G.T = [[4/15]], (S*G.T)^2 = [[16/225]], sum_j S*G.T = [[32/225,32/225],[48/225,48/225],[32/225,32/225],[32/225,32/225],[48/225,48/225]]
    tauF = 3.*numpy.array([[32./225.,32./225.],[48./225.,48./225.],[32./225.,32./225.],[32./225.,32./225.],[48./225.,48./225.]])
    for i,k in itertools.product(xrange(0,I),xrange(0,K)):
        assert abs(NMTF.tauF(k)[i] - tauF[i,k]) < 0.000000000000001
예제 #10
0
def test_muG():
    NMTF = nmtf_icm(R,M,K,L,priors)
    NMTF.initialise(init_S,init_FG)
    NMTF.tau = 3.
    tauG = 3.*numpy.array([[4./9.,4./9.,4./9.,4./9.],[4./9.,4./9.,4./9.,4./9.],[4./9.,4./9.,4./9.,4./9.]])
    # Rij - Fi*S*Gj + Gjl*(Fi*Sl)) = 11/15 + 1/5 * 1/3 = 12/15 = 4/5
    # (Rij - Fi*S*Gj + Gjl*(Fi*Sl)) * (Fi*Sl) = 4/5 * 1/3 = 4/15
    muG = 1./tauG * ( 3. * numpy.array([[4.*4./15.,4.*4./15.,4.*4./15.,4.*4./15.],[4.*4./15.,4.*4./15.,4.*4./15.,4.*4./15.],[4.*4./15.,4.*4./15.,4.*4./15.,4.*4./15.]]) - lambdaG )
    for j,l in itertools.product(xrange(0,J),xrange(0,L)):
        assert abs(NMTF.muG(tauG[:,l],l)[j] - muG[j,l]) < 0.000000000000001
예제 #11
0
def test_muS():
    NMTF = nmtf_icm(R,M,K,L,priors)
    NMTF.initialise(init_S,init_FG)
    NMTF.tau = 3.
    tauS = 3.*numpy.array([[3./25.,3./25.,3./25.,3./25.],[3./25.,3./25.,3./25.,3./25.]])
    # Rij - Fi*S*Gj + Fik*Skl*Gjk = 11/15 + 1/2*1/3*1/5 = 23/30
    # (Rij - Fi*S*Gj + Fik*Skl*Gjk) * Fik*Gjk = 23/30 * 1/10 = 23/300
    muS = 1./tauS * ( 3. * numpy.array([[12*23./300.,12*23./300.,12*23./300.,12*23./300.],[12*23./300.,12*23./300.,12*23./300.,12*23./300.]]) - lambdaS )
    for k,l in itertools.product(xrange(0,K),xrange(0,L)):
        assert abs(NMTF.muS(tauS[k,l],k,l) - muS[k,l]) < 0.000000000000001
예제 #12
0
def test_muF():
    NMTF = nmtf_icm(R,M,K,L,priors)
    NMTF.initialise(init_S,init_FG)
    NMTF.tau = 3.
    tauF = 3.*numpy.array([[32./225.,32./225.],[48./225.,48./225.],[32./225.,32./225.],[32./225.,32./225.],[48./225.,48./225.]])
    # Rij - Fi*S*Gj + Fik(Sk*Gj) = 11/15 + 1/2 * 4/15 = 13/15
    # (Rij - Fi*S*Gj + Fik(Sk*Gj)) * (Sk*Gj) = 13/15 * 4/15 = 52/225
    muF = 1./tauF * ( 3. * numpy.array([[2*(52./225.),2*(52./225.)],[3*(52./225.),3*(52./225.)],[2*(52./225.),2*(52./225.)],[2*(52./225.),2*(52./225.)],[3*(52./225.),3*(52./225.)]]) - lambdaF )
    for i,k in itertools.product(xrange(0,I),xrange(0,K)):
        assert abs(NMTF.muF(tauF[:,k],k)[i] - muF[i,k]) < 0.000000000000001
예제 #13
0
# We now run the VB algorithm on each of the M's for each fraction.
all_performances = {metric: [] for metric in metrics}
average_performances = {metric: []
                        for metric in metrics}  # averaged over repeats
for (fraction, Ms, Ms_test) in zip(fractions_unknown, all_Ms, all_Ms_test):
    print "Trying fraction %s." % fraction

    # Run the algorithm <repeats> times and store all the performances
    for metric in metrics:
        all_performances[metric].append([])
    for (repeat, M, M_test) in zip(range(0, repeats), Ms, Ms_test):
        print "Repeat %s of fraction %s." % (repeat + 1, fraction)

        # Run the VB algorithm
        NMTF = nmtf_icm(R, M, K, L, priors)
        NMTF.initialise(init_S, init_FG)
        NMTF.run(iterations, minimum_TN=minimum_TN)

        # Measure the performances
        performances = NMTF.predict(M_test)
        for metric in metrics:
            # Add this metric's performance to the list of <repeat> performances for this fraction
            all_performances[metric][-1].append(performances[metric])

    # Compute the average across attempts
    for metric in metrics:
        average_performances[metric].append(
            sum(all_performances[metric][-1]) / repeats)

예제 #14
0
def test_alpha_s():
    NMTF = nmtf_icm(R,M,K,L,priors)
    NMTF.initialise(init_S,init_FG)
    NMTF.tau = 3.
    alpha_s = alpha + 6.
    assert NMTF.alpha_s() == alpha_s
예제 #15
0
def test_init():
    # Test getting an exception when R and M are different sizes, and when R is not a 2D array.
    R1 = numpy.ones(3)
    M = numpy.ones((2,3))
    I,J,K,L = 5,3,1,2
    lambdaF = numpy.ones((I,K))
    lambdaS = numpy.ones((K,L))
    lambdaG = numpy.ones((J,L))
    alpha, beta = 3, 1    
    priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF, 'lambdaS':lambdaS, 'lambdaG':lambdaG }
    
    with pytest.raises(AssertionError) as error:
        nmtf_icm(R1,M,K,L,priors)
    assert str(error.value) == "Input matrix R is not a two-dimensional array, but instead 1-dimensional."
    
    R2 = numpy.ones((4,3,2))
    with pytest.raises(AssertionError) as error:
        nmtf_icm(R2,M,K,L,priors)
    assert str(error.value) == "Input matrix R is not a two-dimensional array, but instead 3-dimensional."
    
    R3 = numpy.ones((3,2))
    with pytest.raises(AssertionError) as error:
        nmtf_icm(R3,M,K,L,priors)
    assert str(error.value) == "Input matrix R is not of the same size as the indicator matrix M: (3, 2) and (2, 3) respectively."
    
    # Similarly for lambdaF, lambdaS, lambdaG
    I,J,K,L = 2,3,1,2
    R4 = numpy.ones((2,3))
    lambdaF = numpy.ones((2+1,1))
    priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF, 'lambdaS':lambdaS, 'lambdaG':lambdaG }
    with pytest.raises(AssertionError) as error:
        nmtf_icm(R4,M,K,L,priors)
    assert str(error.value) == "Prior matrix lambdaF has the wrong shape: (3, 1) instead of (2, 1)."
    
    lambdaF = numpy.ones((2,1))
    lambdaS = numpy.ones((1+1,2+1))
    priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF, 'lambdaS':lambdaS, 'lambdaG':lambdaG }
    with pytest.raises(AssertionError) as error:
        nmtf_icm(R4,M,K,L,priors)
    assert str(error.value) == "Prior matrix lambdaS has the wrong shape: (2, 3) instead of (1, 2)."
    
    lambdaS = numpy.ones((1,2))
    lambdaG = numpy.ones((3,2+1))
    priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF, 'lambdaS':lambdaS, 'lambdaG':lambdaG }
    with pytest.raises(AssertionError) as error:
        nmtf_icm(R4,M,K,L,priors)
    assert str(error.value) == "Prior matrix lambdaG has the wrong shape: (3, 3) instead of (3, 2)."
    
    # Test getting an exception if a row or column is entirely unknown
    lambdaF = numpy.ones((I,K))
    lambdaS = numpy.ones((K,L))
    lambdaG = numpy.ones((J,L))
    M1 = [[1,1,1],[0,0,0]]
    M2 = [[1,1,0],[1,0,0]]
    priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF, 'lambdaS':lambdaS, 'lambdaG':lambdaG }
    
    with pytest.raises(AssertionError) as error:
        nmtf_icm(R4,M1,K,L,priors)
    assert str(error.value) == "Fully unobserved row in R, row 1."
    with pytest.raises(AssertionError) as error:
        nmtf_icm(R4,M2,K,L,priors)
    assert str(error.value) == "Fully unobserved column in R, column 2."
    
    # Finally, a successful case
    I,J,K,L = 3,2,2,2
    R5 = 2*numpy.ones((I,J))
    lambdaF = numpy.ones((I,K))
    lambdaS = numpy.ones((K,L))
    lambdaG = numpy.ones((J,L))
    M = numpy.ones((I,J))
    priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF, 'lambdaS':lambdaS, 'lambdaG':lambdaG }
    NMTF = nmtf_icm(R5,M,K,L,priors)
    
    assert numpy.array_equal(NMTF.R,R5)
    assert numpy.array_equal(NMTF.M,M)
    assert NMTF.I == I
    assert NMTF.J == J
    assert NMTF.K == K
    assert NMTF.L == L
    assert NMTF.size_Omega == I*J
    assert NMTF.alpha == alpha
    assert NMTF.beta == beta
    assert numpy.array_equal(NMTF.lambdaF,lambdaF)
    assert numpy.array_equal(NMTF.lambdaS,lambdaS)
    assert numpy.array_equal(NMTF.lambdaG,lambdaG)
    
    # Test when lambdaF S G are integers
    I,J,K,L = 3,2,2,2
    R5 = 2*numpy.ones((I,J))
    lambdaF = 3
    lambdaS = 4
    lambdaG = 5
    M = numpy.ones((I,J))
    priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF, 'lambdaS':lambdaS, 'lambdaG':lambdaG }
    NMTF = nmtf_icm(R5,M,K,L,priors)
    
    assert numpy.array_equal(NMTF.R,R5)
    assert numpy.array_equal(NMTF.M,M)
    assert NMTF.I == I
    assert NMTF.J == J
    assert NMTF.K == K
    assert NMTF.L == L
    assert NMTF.size_Omega == I*J
    assert NMTF.alpha == alpha
    assert NMTF.beta == beta
    assert numpy.array_equal(NMTF.lambdaF,3*numpy.ones((I,K)))
    assert numpy.array_equal(NMTF.lambdaS,4*numpy.ones((K,L)))
    assert numpy.array_equal(NMTF.lambdaG,5*numpy.ones((J,L)))