def test_initialise(): I,J,K = 5,3,2 R = numpy.ones((I,J)) M = numpy.ones((I,J)) lambdaU = 2*numpy.ones((I,K)) lambdaV = 3*numpy.ones((J,K)) alpha, beta = 3, 1 priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV } # Initialisation with expectation init = 'exp' BNMF = bnmf_vb_optimised(R,M,K,priors) BNMF.initialise(init) assert BNMF.alpha_s == alpha + 15./2. #assert BNMF.alpha_s == alpha assert BNMF.beta_s == beta + BNMF.exp_square_diff()/2. #assert BNMF.beta_s == beta for i,k in itertools.product(xrange(0,I),xrange(0,K)): assert BNMF.tauU[i,k] == 1. assert BNMF.muU[i,k] == 1./lambdaU[i,k] for j,k in itertools.product(xrange(0,J),xrange(0,K)): assert BNMF.tauV[j,k] == 1. assert BNMF.muV[j,k] == 1./lambdaV[j,k] assert BNMF.exptau == (alpha + 15./2.) / (beta + BNMF.exp_square_diff()/2.) #assert BNMF.exptau == alpha / beta for i,k in itertools.product(xrange(0,I),xrange(0,K)): assert abs(BNMF.expU[i,k] - (0.5 + 0.352065 / (1-0.3085))) < 0.0001 for j,k in itertools.product(xrange(0,J),xrange(0,K)): assert abs(BNMF.expV[j,k] - (1./3. + 0.377383 / (1-0.3694))) < 0.0001 # Initialise tauU, tauV using predefined values tauUV = { 'tauU' : 2*numpy.ones((I,K)), 'tauV' : 3*numpy.ones((J,K)) } init = 'exp' BNMF = bnmf_vb_optimised(R,M,K,priors) BNMF.initialise(init,tauUV) for i,k in itertools.product(xrange(0,I),xrange(0,K)): assert BNMF.tauU[i,k] == 2. for j,k in itertools.product(xrange(0,J),xrange(0,K)): assert BNMF.tauV[j,k] == 3.
def test_log_likelihood(): R = numpy.array([[1,2],[3,4]],dtype=float) M = numpy.array([[1,1],[0,1]]) I, J, K = 2, 2, 3 lambdaU = 2*numpy.ones((I,K)) lambdaV = 3*numpy.ones((J,K)) alpha, beta = 3, 1 priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV } BNMF = bnmf_vb_optimised(R,M,K,priors) BNMF.expU = numpy.ones((I,K)) BNMF.expV = 2*numpy.ones((J,K)) BNMF.explogtau = 5. BNMF.exptau = 3. # expU*expV.T = [[6.]] log_likelihood = 3./2.*(5.-math.log(2*math.pi)) - 3./2. * (5**2 + 4**2 + 2**2) AIC = -2*log_likelihood + 2*(2*3+2*3) BIC = -2*log_likelihood + (2*3+2*3)*math.log(3) MSE = (5**2+4**2+2**2)/3. assert log_likelihood == BNMF.quality('loglikelihood') assert AIC == BNMF.quality('AIC') assert BIC == BNMF.quality('BIC') assert MSE == BNMF.quality('MSE') with pytest.raises(AssertionError) as error: BNMF.quality('FAIL') assert str(error.value) == "Unrecognised metric for model quality: FAIL."
def test_predict(): (I,J,K) = (5,3,2) R = numpy.array([[1,2,3],[4,5,6],[7,8,9],[10,11,12],[13,14,15]],dtype=float) M = numpy.ones((I,J)) K = 3 lambdaU = 2*numpy.ones((I,K)) lambdaV = 3*numpy.ones((J,K)) alpha, beta = 3, 1 priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV } expU = numpy.array([[125.,126.],[126.,126.],[126.,126.],[126.,126.],[126.,126.]]) expV = numpy.array([[84.,84.],[84.,84.],[84.,84.]]) M_test = numpy.array([[0,0,1],[0,1,0],[0,0,0],[1,1,0],[0,0,0]]) #R->3,5,10,11, P_pred->21084,21168,21168,21168 MSE = (444408561. + 447872569. + 447660964. + 447618649) / 4. R2 = 1. - (444408561. + 447872569. + 447660964. + 447618649) / (4.25**2+2.25**2+2.75**2+3.75**2) #mean=7.25 Rp = 357. / ( math.sqrt(44.75) * math.sqrt(5292.) ) #mean=7.25,var=44.75, mean_pred=21147,var_pred=5292, corr=(-4.25*-63 + -2.25*21 + 2.75*21 + 3.75*21) BNMF = bnmf_vb_optimised(R,M,K,priors) BNMF.expU = expU BNMF.expV = expV performances = BNMF.predict(M_test) assert performances['MSE'] == MSE assert performances['R^2'] == R2 assert performances['Rp'] == Rp
def test_run(): I,J,K = 10,5,2 R = numpy.ones((I,J)) M = numpy.ones((I,J)) M[0,0], M[2,2], M[3,1] = 0, 0, 0 R[0,1], R[0,2] = 2., 3. lambdaU = 2*numpy.ones((I,K)) lambdaV = 3*numpy.ones((J,K)) alpha, beta = 3, 1 priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV } iterations = 2 BNMF = bnmf_vb_optimised(R,M,K,priors) BNMF.initialise() BNMF.run(iterations) for i,k in itertools.product(xrange(0,I),xrange(0,K)): assert BNMF.muU[i,k] != 1./lambdaU[i,k] assert BNMF.tauU[i,k] != 1. assert BNMF.expU[i,k] != numpy.inf and not math.isnan(BNMF.expU[i,k]) assert BNMF.tauU[i,k] != numpy.inf and not math.isnan(BNMF.tauU[i,k]) for j,k in itertools.product(xrange(0,J),xrange(0,K)): assert BNMF.muV[j,k] != 1./lambdaV[j,k] assert BNMF.tauV[j,k] != 1. assert BNMF.expV[j,k] != numpy.inf and not math.isnan(BNMF.expV[j,k]) assert BNMF.tauV[j,k] != numpy.inf and not math.isnan(BNMF.tauV[j,k]) assert BNMF.alpha_s != alpha assert BNMF.beta_s != beta assert BNMF.exptau != numpy.inf and not math.isnan(BNMF.exptau) assert BNMF.explogtau != numpy.inf and not math.isnan(BNMF.explogtau)
def test_update_exp_V(): for k in range(0,K): BNMF = bnmf_vb_optimised(R,M,K,priors) BNMF.initialise() BNMF.tauV = 4*numpy.ones((J,K)) # muV = [[1./3.]], tauV = [[4.]] BNMF.update_exp_V(k) #-mu*sqrt(tau) = -2./3., lambda(..) = 0.319448 / (1-0.2525) = 0.4273551839464883, gamma = for j in range(0,J): assert abs(BNMF.expV[j,k] - (1./3. + 1./2. * 0.4273551839464883)) < 0.00001 assert abs(BNMF.varV[j,k] - 1./4.*(1. - 0.4675359092102624)) < 0.00001
def test_update_exp_U(): for k in range(0,K): BNMF = bnmf_vb_optimised(R,M,K,priors) BNMF.initialise() BNMF.tauU = 4*numpy.ones((I,K)) # muU = [[0.5]], tauU = [[4.]] BNMF.update_exp_U(k) #-mu*sqrt(tau) = -0.5*2 = -1. lambda(1) = 0.241971 / (1-0.1587) = 0.2876155949126352. gamma = 0.37033832534958433 for i in range(0,I): assert abs(BNMF.expU[i,k] - (0.5 + 1./2. * 0.2876155949126352)) < 0.00001 assert abs(BNMF.varU[i,k] - 1./4.*(1.-0.37033832534958433)) < 0.00001
def test_update_tau(): BNMF = bnmf_vb_optimised(R,M,K,priors) BNMF.expU = 1./lambdaU #[[1./2.]] BNMF.expV = 1./lambdaV #[[1./3.]] BNMF.varU = numpy.ones((I,K))*2 #[[2.]] BNMF.varV = numpy.ones((J,K))*3 #[[3.]] BNMF.update_tau() assert BNMF.alpha_s == alpha + 12./2. assert BNMF.beta_s == beta + 172.66666666666666/2.
def test_exp_square_diff(): BNMF = bnmf_vb_optimised(R,M,K,priors) BNMF.expU = 1./lambdaU #[[1./2.]] BNMF.expV = 1./lambdaV #[[1./3.]] BNMF.varU = numpy.ones((I,K))*2 #[[2.]] BNMF.varV = numpy.ones((J,K))*3 #[[3.]] # expU * expV.T = [[1./3.]]. (varU+expU^2)=2.25, (varV+expV^2)=3.+1./9. exp_square_diff = 172.66666666666666 #12.*(4./9.) + 12.*(2*(2.25*(3.+1./9.)-0.25/9.)) assert BNMF.exp_square_diff() == exp_square_diff
def test_elbo(): I,J,K = 5,3,2 R = numpy.ones((I,J)) M = numpy.ones((I,J)) M[0,0], M[2,2], M[3,1] = 0, 0, 0 # size Omega = 12 lambdaU = 2*numpy.ones((I,K)) lambdaV = 3*numpy.ones((J,K)) alpha, beta = 3, 1 priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV } expU = 5*numpy.ones((I,K)) expV = 6*numpy.ones((J,K)) varU = 11*numpy.ones((I,K)) varV = 12*numpy.ones((J,K)) exptau = 8. explogtau = 9. muU = 14*numpy.ones((I,K)) muV = 15*numpy.ones((J,K)) tauU = numpy.ones((I,K))/100. tauV = numpy.ones((J,K))/101. alpha_s = 20. beta_s = 21. # expU * expV = [[60]] # (R - expU*expV)^2 = 12*59^2 = 41772 # Var[U*V] = 12*K*((11+5^2)*(12+6^2)-5^2*6^2) = 12*2*828 = 19872 # -muU*sqrt(tauU) = -14*math.sqrt(100) = -1.4 # -muV*sqrt(tauV) = -15*math.sqrt(101) = -1.4925557853149838 # cdf(-1.4) = 0.080756659233771066 # cdf(-1.4925557853149838) = 0.067776752211548219 ELBO = 12./2.*(explogtau - math.log(2*math.pi)) - 8./2.*(41772+19872) \ + 5*2*(math.log(2.) - 2.*5.) + 3*2*(math.log(3.) - 3.*6.) \ + 3.*numpy.log(1.) - numpy.log(math.gamma(3.)) + 2.*9. - 1.*8. \ - 20.*numpy.log(21.) + numpy.log(math.gamma(20.)) - 19.*9. + 21.*8. \ - 0.5*5*2*math.log(1./100.) + 0.5*5*2*math.log(2*math.pi) + 5*2*math.log(1.-0.080756659233771066) \ + 0.5*5*2*1./100.*(11.+81.) \ - 0.5*3*2*math.log(1./101.) + 0.5*3*2*math.log(2*math.pi) + 3*2*math.log(1.-0.067776752211548219) \ + 0.5*3*2*1./101.*(12.+81.) BNMF = bnmf_vb_optimised(R,M,K,priors) BNMF.expU = expU BNMF.expV = expV BNMF.varU = varU BNMF.varV = varV BNMF.exptau = exptau BNMF.explogtau = explogtau BNMF.muU = muU BNMF.muV = muV BNMF.tauU = tauU BNMF.tauV = tauV BNMF.alpha_s = alpha_s BNMF.beta_s = beta_s assert BNMF.elbo() == ELBO
def test_update_V(): for k in range(0,K): BNMF = bnmf_vb_optimised(R,M,K,priors) BNMF.muV = numpy.zeros((J,K)) BNMF.tauV = numpy.zeros((J,K)) BNMF.expU = 1./lambdaU #[[1./2.]] BNMF.expV = 1./lambdaV #[[1./3.]] BNMF.varU = numpy.ones((I,K))*2 #[[2.]] BNMF.varV = numpy.ones((J,K))*3 #[[3.]] BNMF.exptau = 3. BNMF.update_V(k) for j in range(0,J): assert BNMF.tauV[j,k] == 3. * (M[:,j] * ( BNMF.expU[:,k]*BNMF.expU[:,k] + BNMF.varU[:,k] )).sum() assert BNMF.muV[j,k] == (1./(3. * (M[:,j] * ( BNMF.expU[:,k]*BNMF.expU[:,k] + BNMF.varU[:,k] )).sum())) * \ ( -3. + BNMF.exptau * (M[:,j]*( (BNMF.R[:,j] - numpy.dot(BNMF.expU,BNMF.expV[j]) + BNMF.expU[:,k]*BNMF.expV[j,k])*BNMF.expU[:,k] )).sum() )
def test_update_U(): for k in range(0,K): BNMF = bnmf_vb_optimised(R,M,K,priors) BNMF.muU = numpy.zeros((I,K)) BNMF.tauU = numpy.zeros((I,K)) BNMF.expU = 1./lambdaU #[[1./2.]] BNMF.expV = 1./lambdaV #[[1./3.]] BNMF.varU = numpy.ones((I,K))*2 #[[2.]] BNMF.varV = numpy.ones((J,K))*3 #[[3.]] BNMF.exptau = 3. BNMF.update_U(k) for i in range(0,I): assert BNMF.tauU[i,k] == 3. * (M[i] * ( BNMF.expV[:,k]*BNMF.expV[:,k] + BNMF.varV[:,k] )).sum() assert BNMF.muU[i,k] == (1./(3. * (M[i] * ( BNMF.expV[:,k]*BNMF.expV[:,k] + BNMF.varV[:,k] )).sum())) * \ ( -2. + BNMF.exptau * (M[i]*( (BNMF.R[i] - numpy.dot(BNMF.expU[i],BNMF.expV.T) + BNMF.expU[i,k]*BNMF.expV[:,k])*BNMF.expV[:,k] )).sum() )
def test_compute_statistics(): R = numpy.array([[1,2],[3,4]],dtype=float) M = numpy.array([[1,1],[0,1]]) I, J, K = 2, 2, 3 lambdaU = 2*numpy.ones((I,K)) lambdaV = 3*numpy.ones((J,K)) alpha, beta = 3, 1 priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV } BNMF = bnmf_vb_optimised(R,M,K,priors) R_pred = numpy.array([[500,550],[1220,1342]],dtype=float) M_pred = numpy.array([[0,0],[1,1]]) MSE_pred = (1217**2 + 1338**2) / 2.0 R2_pred = 1. - (1217**2+1338**2)/(0.5**2+0.5**2) #mean=3.5 Rp_pred = 61. / ( math.sqrt(.5) * math.sqrt(7442.) ) #mean=3.5,var=0.5,mean_pred=1281,var_pred=7442,cov=61 assert MSE_pred == BNMF.compute_MSE(M_pred,R,R_pred) assert R2_pred == BNMF.compute_R2(M_pred,R,R_pred) assert Rp_pred == BNMF.compute_Rp(M_pred,R,R_pred)
lambdaV = numpy.ones((J,K))/10. priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV } # Load in data (_,R,M,_,_,_,_) = load_gdsc(standardised=standardised) # Run the VB algorithm, <repeats> times times_repeats = [] performances_repeats = [] for i in range(0,repeats): # Set all the seeds numpy.random.seed(0) # Run the classifier BNMF = bnmf_vb_optimised(R,M,K,priors) BNMF.initialise(init_UV) BNMF.run(iterations) # Extract the performances and timestamps across all iterations times_repeats.append(BNMF.all_times) performances_repeats.append(BNMF.all_performances) # Check whether seed worked: all performances should be the same assert all(numpy.array_equal(performances, performances_repeats[0]) for performances in performances_repeats), \ "Seed went wrong - performances not the same across repeats!" # Print out the performances, and the average times vb_all_times_average = list(numpy.average(times_repeats, axis=0)) vb_all_performances = performances_repeats[0] print "vb_all_times_average = %s" % vb_all_times_average
def test_update_exp_tau(): BNMF = bnmf_vb_optimised(R,M,K,priors) BNMF.initialise() assert abs(BNMF.exptau - (3+12./2.)/(1+35.4113198623/2.)) < 0.000000000001 #assert abs(BNMF.exptau - 3./1.) < 0.000000000001 assert abs(BNMF.explogtau - (2.1406414779556 - math.log(1+35.4113198623/2.))) < 0.000000000001
def test_init(): # Test getting an exception when R and M are different sizes, and when R is not a 2D array. R1 = numpy.ones(3) M = numpy.ones((2,3)) I,J,K = 5,3,1 lambdaU = numpy.ones((I,K)) lambdaV = numpy.ones((J,K)) alpha, beta = 3, 1 priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV } with pytest.raises(AssertionError) as error: bnmf_vb_optimised(R1,M,K,priors) assert str(error.value) == "Input matrix R is not a two-dimensional array, but instead 1-dimensional." R2 = numpy.ones((4,3,2)) with pytest.raises(AssertionError) as error: bnmf_vb_optimised(R2,M,K,priors) assert str(error.value) == "Input matrix R is not a two-dimensional array, but instead 3-dimensional." R3 = numpy.ones((3,2)) with pytest.raises(AssertionError) as error: bnmf_vb_optimised(R3,M,K,priors) assert str(error.value) == "Input matrix R is not of the same size as the indicator matrix M: (3, 2) and (2, 3) respectively." # Similarly for lambdaU, lambdaV R4 = numpy.ones((2,3)) lambdaU = numpy.ones((2+1,1)) priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV } with pytest.raises(AssertionError) as error: bnmf_vb_optimised(R4,M,K,priors) assert str(error.value) == "Prior matrix lambdaU has the wrong shape: (3, 1) instead of (2, 1)." lambdaU = numpy.ones((2,1)) lambdaV = numpy.ones((3+1,1)) priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV } with pytest.raises(AssertionError) as error: bnmf_vb_optimised(R4,M,K,priors) assert str(error.value) == "Prior matrix lambdaV has the wrong shape: (4, 1) instead of (3, 1)." # Test getting an exception if a row or column is entirely unknown lambdaU = numpy.ones((2,1)) lambdaV = numpy.ones((3,1)) M1 = [[1,1,1],[0,0,0]] M2 = [[1,1,0],[1,0,0]] priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV } with pytest.raises(AssertionError) as error: bnmf_vb_optimised(R4,M1,K,priors) assert str(error.value) == "Fully unobserved row in R, row 1." with pytest.raises(AssertionError) as error: bnmf_vb_optimised(R4,M2,K,priors) assert str(error.value) == "Fully unobserved column in R, column 2." # Finally, a successful case I,J,K = 3,2,2 R5 = 2*numpy.ones((I,J)) lambdaU = numpy.ones((I,K)) lambdaV = numpy.ones((J,K)) M = numpy.ones((I,J)) priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV } BNMF = bnmf_vb_optimised(R5,M,K,priors) assert numpy.array_equal(BNMF.R,R5) assert numpy.array_equal(BNMF.M,M) assert BNMF.I == I assert BNMF.J == J assert BNMF.K == K assert BNMF.size_Omega == I*J assert BNMF.alpha == alpha assert BNMF.beta == beta assert numpy.array_equal(BNMF.lambdaU,lambdaU) assert numpy.array_equal(BNMF.lambdaV,lambdaV) # And when lambdaU and lambdaV are integers I,J,K = 3,2,2 R5 = 2*numpy.ones((I,J)) lambdaU = 3. lambdaV = 4. M = numpy.ones((I,J)) priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV } BNMF = bnmf_vb_optimised(R5,M,K,priors) assert numpy.array_equal(BNMF.R,R5) assert numpy.array_equal(BNMF.M,M) assert BNMF.I == I assert BNMF.J == J assert BNMF.K == K assert BNMF.size_Omega == I*J assert BNMF.alpha == alpha assert BNMF.beta == beta assert numpy.array_equal(BNMF.lambdaU,lambdaU*numpy.ones((I,K))) assert numpy.array_equal(BNMF.lambdaV,lambdaV*numpy.ones((J,K)))