def run_all_settings(all_K_alpha): fout = open('results_varying_K_hmf_no_ARD_ge_gm_to_pm_std.txt', 'w') all_average_performances = [] for K in all_K: ''' Compute the folds ''' n = len(X1) n_folds = 10 shuffle, random_state = True, None folds = KFold(n=n, n_folds=n_folds, shuffle=shuffle, random_state=random_state) ''' Run HMF to predict Y from X ''' all_MSE, all_R2, all_Rp = numpy.zeros(n_folds), numpy.zeros( n_folds), numpy.zeros(n_folds) for i, (train_index, test_index) in enumerate(folds): print "Training fold %s for HMF-MTF." % (i + 1) ''' Split into train and test ''' M_X1, M_X2, M_Y_train = numpy.ones(X1.shape), numpy.ones( X2.shape), numpy.ones(Y.shape) M_Y_train[test_index] = 0. M_Y_test = 1. - M_Y_train R = [(X1, M_X1, 'samples', 'genes', alpha[0]), (X2, M_X2, 'samples', 'genes', alpha[1]), (Y, M_Y_train, 'samples', 'genes', alpha[2])] ''' Train and predict ''' HMF = HMF_Gibbs(R, C, D, K, settings, hyperparameters) HMF.initialise(init) HMF.run(iterations) ''' Compute the performances ''' performances = HMF.predict_Rn(n=2, M_pred=M_Y_test, burn_in=burn_in, thinning=thinning) all_MSE[i], all_R2[i], all_Rp[i] = performances[ 'MSE'], performances['R^2'], performances['Rp'] print "MSE: %s. R^2: %s. Rp: %s." % ( performances['MSE'], performances['R^2'], performances['Rp']) print "Average MSE: %s +- %s. \nAverage R^2: %s +- %s. \nAverage Rp: %s +- %s." % \ (all_MSE.mean(),all_MSE.std(),all_R2.mean(),all_R2.std(),all_Rp.mean(),all_Rp.std()) fout.write('Tried MF on PM -> GE, with K = %s, alphan = %s.\n' % (K, alpha)) fout.write('Average MSE: %s +- %s. \nAverage R^2: %s +- %s. \nAverage Rp: %s +- %s.\n' % \ (all_MSE.mean(),all_MSE.std(),all_R2.mean(),all_R2.std(),all_Rp.mean(),all_Rp.std())) fout.write('All MSE: %s. \nAll R^2: %s. \nAll Rp: %s.\n\n' % (list(all_MSE), list(all_R2), list(all_Rp))) fout.flush() all_average_performances.append(all_MSE.mean()) ''' Print for plotting. ''' print "all_K = %s \nall_average_performances = %s" % ( all_K, all_average_performances)
def test_compute_statistics(): R0 = numpy.array([[1,2],[3,4]],dtype=float) M0 = numpy.array([[1,1],[0,1]]) E = ['entity0','entity1'] K = {E[0]:3,E[1]:4} R = [(R0,M0,E[0],E[1],1.)] C, D = [], [] HMF = HMF_Gibbs(R,C,D,K,{},{}) R_pred = numpy.array([[500,550],[1220,1342]],dtype=float) M_pred = numpy.array([[0,0],[1,1]]) MSE_pred = (1217**2 + 1338**2) / 2.0 R2_pred = 1. - (1217**2+1338**2)/(0.5**2+0.5**2) #mean=3.5 Rp_pred = 61. / ( math.sqrt(.5) * math.sqrt(7442.) ) #mean=3.5,var=0.5,mean_pred=1281,var_pred=7442,cov=61 assert MSE_pred == HMF.compute_MSE(M_pred,R0,R_pred) assert R2_pred == HMF.compute_R2(M_pred,R0,R_pred) assert Rp_pred == HMF.compute_Rp(M_pred,R0,R_pred)
for (fraction, Ms_train_test) in zip(fractions_unknown, all_Ms_train_test): print "Trying fraction %s." % fraction # Run the algorithm <repeats> times and store all the performances for metric in metrics: all_performances[metric].append([]) for repeat, (M_train, M_test) in zip(range(0, repeats), Ms_train_test): print "Repeat %s of fraction %s." % (repeat + 1, fraction) D = [(R_ctrp, M_train, 'Cell_lines', alpha_l[0]), (R_gdsc, M_gdsc, 'Cell_lines', alpha_l[1]), (R_ccle_ic, M_ccle_ic, 'Cell_lines', alpha_l[2]), (R_ccle_ec, M_ccle_ec, 'Cell_lines', alpha_l[3])] R, C = [], [] HMF = HMF_Gibbs(R, C, D, K, settings, hyperparameters) HMF.initialise(init) HMF.run(iterations) # Measure the performances performances = HMF.predict_Dl(l=0, M_pred=M_test, burn_in=burn_in, thinning=thinning) for metric in metrics: # Add this metric's performance to the list of <repeat> performances for this fraction all_performances[metric][-1].append(performances[metric]) # Compute the average across attempts for metric in metrics: average_performances[metric].append(
folds_test = mask.compute_folds_attempts(I=I, J=J, no_folds=no_folds, attempts=1000, M=M_gdsc) folds_training = mask.compute_Ms(folds_test) for i, (train, test) in enumerate(zip(folds_training, folds_test)): print "Fold %s." % (i + 1) ''' Predict values. ''' R = [(R_gdsc, train, 'Cell_lines', 'Drugs', alpha[0]), (R_ctrp, M_ctrp, 'Cell_lines', 'Drugs', alpha[1]), (R_ccle_ic, M_ccle_ic, 'Cell_lines', 'Drugs', alpha[2]), (R_ccle_ec, M_ccle_ec, 'Cell_lines', 'Drugs', alpha[3])] HMF = HMF_Gibbs(R=R, C=C, D=D, K=K, settings=settings, hyperparameters=hyperparameters) HMF.initialise(init=init) HMF.run(iterations=iterations) R_pred = HMF.return_Rn(n=n, burn_in=burn_in, thinning=thinning) ''' Add predictions to list. ''' indices_test = [(i, j) for (i, j) in itertools.product(range(I), range(J)) if test[i, j]] for i, j in indices_test: i_j_real_pred.append((i, j, R_gdsc[i, j], R_pred[i, j])) ''' Store the performances. ''' with open(file_performance, 'w') as fout: fout.write('%s' % i_j_real_pred)
}, ] R, C = [], [] main_dataset = 'D' D, C = [], [] R = [(R_gdsc, M_gdsc, 'Cell_lines', 'Drugs', alpha_n[0]), (R_ctrp, M_ctrp, 'Cell_lines', 'Drugs', alpha_n[1]), (R_ccle_ic, M_ccle_ic, 'Cell_lines', 'Drugs', alpha_n[2]), (R_ccle_ec, M_ccle_ec, 'Cell_lines', 'Drugs', alpha_n[3])] index_main = 0 # GDSC file_performances = './results/hmf_data_mtf_gdsc_ic.txt' ''' Run the methods with different inits and measure the convergence. ''' all_init_performances = [] for init in values_init: all_performances = [] for n in range(n_repeats): print "Repeat %s of initialisation experiment, with init %s." % (n, init) HMF = HMF_Gibbs(R, C, D, K, settings, hyperparameters) HMF.initialise(init) HMF.run(iterations) all_performances.append(HMF.all_performances_Rn['MSE'][index_main]) average_performances = list(numpy.mean(all_performances, axis=0)) all_init_performances.append(average_performances) ''' Store performances in file. ''' with open(file_performances, 'w') as fout: fout.write("%s" % all_init_performances)
E = ['genes', 'samples'] K = {'genes': 20, 'samples': 20} alpha_n = [1., 1., 1.] # GE, PM, GM ''' Load in data ''' R_ge, R_pm, R_gm, genes, samples = filter_driver_genes_std() M_ge, M_pm, M_gm = numpy.ones(R_ge.shape), numpy.ones(R_pm.shape), numpy.ones( R_gm.shape) R = [ (R_ge, M_ge, 'genes', 'samples', alpha_n[0]), (R_pm, M_pm, 'genes', 'samples', alpha_n[1]), (R_gm, M_gm, 'genes', 'samples', alpha_n[1]), ] C, D = [], [] ''' Run the Gibbs sampler ''' HMF = HMF_Gibbs(R, C, D, K, settings, hyperparameters) HMF.initialise(init) HMF.run(iterations) ''' Store the mean of the matrices. ''' folder = project_location + 'HMF/methylation/bicluster_analysis/matrices/' E_drugs, E_cell_lines = 'genes', 'samples' n_ge, n_pm, n_gm = 0, 1, 2 exp_F_genes = HMF.approx_expectation_Ft(E=E_drugs, burn_in=burn_in, thinning=thinning) exp_F_samples = HMF.approx_expectation_Ft(E=E_cell_lines, burn_in=burn_in, thinning=thinning) exp_S_ge = HMF.approx_expectation_Sn(n=n_ge,
'lambdat': 'exp', 'lambdaS': 'exp', 'tau': 'exp', } alpha_n = [1., 1., 1., 1.] # GDSC, CTRP, CCLE IC, CCLE EC alpha_m = [] K = {'Cell_lines': 5, 'Drugs': 5} C, D = [], [] R = [(R_gdsc, M_gdsc, 'Cell_lines', 'Drugs', alpha_n[0]), (R_ctrp, M_ctrp, 'Cell_lines', 'Drugs', alpha_n[1]), (R_ccle_ic, M_ccle_ic, 'Cell_lines', 'Drugs', alpha_n[2]), (R_ccle_ec, M_ccle_ec, 'Cell_lines', 'Drugs', alpha_n[3])] ''' Run the model. ''' HMF = HMF_Gibbs(R, C, D, K, settings, hyperparameters) HMF.initialise(init) HMF.run(iterations) """ ''' Extract all factor matrices (F, S^n), and store in files - only store the burned-in and thinned out draws. ''' E_drugs, E_cell_lines = 'Drugs', 'Cell_lines' n_gdsc, n_ctrp, n_ccle_ic, n_ccle_ec = 0, 1, 2, 3 folder = project_location+'HMF/drug_sensitivity/bicluster_analysis/matrices/' thinned_F_drugs = numpy.array(HMF.iterations_all_Ft[E_drugs])[indices_thinning] thinned_F_cell_lines = numpy.array(HMF.iterations_all_Ft[E_cell_lines])[indices_thinning] thinned_S_gdsc = numpy.array(HMF.iterations_all_Sn[n_gdsc])[indices_thinning] thinned_S_ctrp = numpy.array(HMF.iterations_all_Sn[n_ctrp])[indices_thinning] thinned_S_ccle_ic = numpy.array(HMF.iterations_all_Sn[n_ccle_ic])[indices_thinning] thinned_S_ccle_ec = numpy.array(HMF.iterations_all_Sn[n_ccle_ec])[indices_thinning]
def test_log_likelihood(): iterations = 10 burn_in = 2 thinning = 3 # so index 2,5,8 -> m=3,m=6,m=9 E = ['entity0','entity1'] I = {E[0]:5, E[1]:3} K = {E[0]:2, E[1]:4} J = [6] iterations_all_Ft = { E[0] : [numpy.ones((I[E[0]],K[E[0]])) * 3*m**2 for m in range(1,10+1)], E[1] : [numpy.ones((I[E[1]],K[E[1]])) * 1*m**2 for m in range(1,10+1)] } iterations_all_lambdat = { E[0] : [numpy.ones(K[E[0]]) * 3*m**2 for m in range(1,10+1)], E[1] : [numpy.ones(K[E[1]]) * 1*m**2 for m in range(1,10+1)] } iterations_all_Ft['entity0'][2][0,0] = 24 #instead of 27 - to ensure we do not get 0 variance in our predictions iterations_all_Sn = [[numpy.ones((K[E[0]],K[E[1]])) * 2*m**2 for m in range(1,10+1)]] iterations_all_taun = [[m**2 for m in range(1,10+1)]] iterations_all_Sm = [[numpy.ones((K[E[1]],K[E[1]])) * 2*m**2 * 2 for m in range(1,10+1)]] iterations_all_taum = [[m**2*2 for m in range(1,10+1)]] iterations_all_Gl = [[numpy.ones((J[0],K[E[0]])) * 2*m**2 * 3 for m in range(1,10+1)]] iterations_all_taul = [[m**2*3 for m in range(1,10+1)]] R0 = numpy.array([[1,2,3],[4,5,6],[7,8,9],[10,11,12],[13,14,15]],dtype=float) C0 = numpy.array([[1,2,3],[4,5,6],[7,8,9]],dtype=float) D0 = numpy.array([[1,2,3,4,5,6],[7,8,9,10,11,12],[13,14,15,16,17,18],[19,20,21,22,23,24],[25,26,27,28,29,30]],dtype=float) M0 = numpy.array([[0,0,1],[0,1,0],[0,0,0],[1,1,0],[0,0,0]]) #R->3,5,10,11, R_pred->3542112,3556224,3556224,3556224 M1 = numpy.array([[0,0,1],[0,1,0],[1,1,0]]) #C->3,7,8, C_pred->4741632,4741632,4741632 - entry 5 gets set to 0 since it is the diagonal M2 = numpy.array([[0,0,1,0,0,1],[0,1,0,0,0,0],[1,1,0,0,0,0],[0,0,0,0,0,0],[1,0,0,0,0,0]]) #D->3,6,8,13,14,25, D_pred->63252,63252,63504,63504,63504,63504 R, C, D = [(R0,M0,E[0],E[1],1.)], [(C0,M1,E[1],1.)], [(D0,M2,E[0],1.)] #expected_exp_F0 = numpy.array([[125.,126.],[126.,126.],[126.,126.],[126.,126.],[126.,126.]]) #expected_exp_F1 = numpy.array([[(9.+36.+81.)*(1./3.) for k in range(0,4)] for i in range(0,3)]) #expected_exp_Sn = numpy.array([[(9.+36.+81.)*(2./3.) for l in range(0,4)] for k in range(0,2)]) #expected_exp_taun = (9.+36.+81.)/3. #R_pred = numpy.array([[ 3542112., 3542112., 3542112.],[ 3556224., 3556224., 3556224.],[ 3556224., 3556224., 3556224.],[ 3556224., 3556224., 3556224.],[ 3556224., 3556224., 3556224.]]) #expected_exp_Sm = numpy.array([[(18.+72.+162.)*(2./3.) for l in range(0,4)] for k in range(0,4)]) #expected_exp_taum = (18.+72.+162.)/3. #C_pred = array([[4741632.,4741632.,4741632.],[4741632.,4741632.,4741632.],[4741632.,4741632.,4741632.]]) #expected_exp_Gl = numpy.array([[(27.+108.+243.)*(2./3.) for k in range(0,2)] for j in range(0,6)]) #expected_exp_taul = (27.+108.+243.)/3. #D_pred = array([[63252.,63252.,63252.,63252.,63252.,63252.],[63504.,63504.,63504.,63504.,63504.,63504.],[63504.,63504.,63504.,63504.,63504.,63504.],[63504.,63504.,63504.,63504.,63504.,63504.],[63504.,63504.,63504.,63504.,63504.,63504.]]) MSE_R = ((3.-3542112.)**2 + (5.-3556224.)**2 + (10.-3556224.)**2 + (11.-3556224.)**2) / 4. MSE_C = ((3.-4741632.)**2 + (7.-4741632.)**2 + (8.-4741632.)**2) / 3. MSE_D = ((3.-63252.)**2 + (6.-63252.)**2 + (8.-63504.)**2 + (13.-63504.)**2 + (14.-63504.)**2 + (25.-63504.)**2) / 6. HMF = HMF_Gibbs(R,C,D,K,{},{}) HMF.iterations = iterations HMF.iterations_all_Ft = iterations_all_Ft HMF.iterations_all_lambdat = iterations_all_lambdat HMF.iterations_all_Sn = iterations_all_Sn HMF.iterations_all_taun = iterations_all_taun HMF.iterations_all_Sm = iterations_all_Sm HMF.iterations_all_taum = iterations_all_taum HMF.iterations_all_Gl = iterations_all_Gl HMF.iterations_all_taul = iterations_all_taul log_likelihood = 4./2. * (math.log(42.) - math.log(2*math.pi)) - 42./2.*(MSE_R*4.) + \ 3./2. * (math.log(84.) - math.log(2*math.pi)) - 84./2.*(MSE_C*3.) + \ 6./2. * (math.log(126.) - math.log(2*math.pi)) - 126./2.*(MSE_D*6.) no_parameters = (5*2+4*3+2*4+4*4+2*6+2+4+3) no_datapoints = 4+3+6 AIC = -2*log_likelihood + 2*no_parameters #F0,F1,Sn0,Sm0,G,lambda0,lambda1,tau BIC = -2*log_likelihood + no_parameters*math.log(no_datapoints) assert HMF.no_datapoints() == no_datapoints assert HMF.no_parameters() == no_parameters assert abs(log_likelihood - HMF.quality('loglikelihood',burn_in,thinning)) <= 1. assert abs(AIC - HMF.quality('AIC',burn_in,thinning)) <= 1. assert abs(BIC - HMF.quality('BIC',burn_in,thinning)) <= 1. with pytest.raises(AssertionError) as error: HMF.quality('FAIL',burn_in,thinning) assert str(error.value) == "Unrecognised metric for model quality: FAIL."
def test_predict(): iterations = 10 burn_in = 2 thinning = 3 # so index 2,5,8 -> m=3,m=6,m=9 E = ['entity0','entity1'] I = {E[0]:5, E[1]:3} K = {E[0]:2, E[1]:4} J = [6] iterations_all_Ft = { E[0] : [numpy.ones((I[E[0]],K[E[0]])) * 3*m**2 for m in range(1,10+1)], E[1] : [numpy.ones((I[E[1]],K[E[1]])) * 1*m**2 for m in range(1,10+1)] } iterations_all_lambdat = { E[0] : [numpy.ones(K[E[0]]) * 3*m**2 for m in range(1,10+1)], E[1] : [numpy.ones(K[E[1]]) * 1*m**2 for m in range(1,10+1)] } iterations_all_Ft['entity0'][2][0,0] = 24 #instead of 27 - to ensure we do not get 0 variance in our predictions iterations_all_Sn = [[numpy.ones((K[E[0]],K[E[1]])) * 2*m**2 for m in range(1,10+1)]] iterations_all_taun = [[m**2 for m in range(1,10+1)]] iterations_all_Sm = [[numpy.ones((K[E[1]],K[E[1]])) * 2*m**2 * 2 for m in range(1,10+1)]] iterations_all_taum = [[m**2*2 for m in range(1,10+1)]] iterations_all_Gl = [[numpy.ones((J[0],K[E[0]])) * 2*m**2 * 3 for m in range(1,10+1)]] iterations_all_taul = [[m**2*3 for m in range(1,10+1)]] R0 = numpy.array([[1,2,3],[4,5,6],[7,8,9],[10,11,12],[13,14,15]],dtype=float) C0 = numpy.array([[1,2,3],[4,5,6],[7,8,9]],dtype=float) D0 = numpy.array([[1,2,3,4,5,6],[7,8,9,10,11,12],[13,14,15,16,17,18],[19,20,21,22,23,24],[25,26,27,28,29,30]],dtype=float) M0, M1, M2 = numpy.ones((5,3)), numpy.ones((3,3)), numpy.ones((5,6)) R, C, D = [(R0,M0,E[0],E[1],1.)], [(C0,M1,E[1],1.)], [(D0,M2,E[0],1.)] alphatau, betatau = 1., 2. alpha0, beta0 = 6., 7. lambdaF, lambdaG = 3., 8. lambdaSn, lambdaSm = 4., 5. priors = { 'alpha0':alpha0, 'beta0':beta0, 'alphatau':alphatau, 'betatau':betatau, 'lambdaF':lambdaF, 'lambdaG':lambdaG, 'lambdaSn':lambdaSn, 'lambdaSm':lambdaSm } settings = { 'priorF' : 'exponential', 'priorG' : 'normal', 'priorSn' : 'normal', 'priorSm' : 'normal', 'ARD' : True, 'orderF' : 'columns', 'orderG' : 'rows', 'orderSn' : 'rows', 'orderSm' : 'rows' } #expected_exp_F0 = numpy.array([[125.,126.],[126.,126.],[126.,126.],[126.,126.],[126.,126.]]) #expected_exp_F1 = numpy.array([[(9.+36.+81.)*(1./3.) for k in range(0,4)] for i in range(0,3)]) #expected_exp_Sn = numpy.array([[(9.+36.+81.)*(2./3.) for l in range(0,4)] for k in range(0,2)]) #expected_exp_taun = (9.+36.+81.)/3. #R_pred = numpy.array([[ 3542112., 3542112., 3542112.],[ 3556224., 3556224., 3556224.],[ 3556224., 3556224., 3556224.],[ 3556224., 3556224., 3556224.],[ 3556224., 3556224., 3556224.]]) #expected_exp_Sm = numpy.array([[(18.+72.+162.)*(2./3.) for l in range(0,4)] for k in range(0,4)]) #expected_exp_taum = (18.+72.+162.)/3. #C_pred = array([[4741632.,4741632.,4741632.],[4741632.,4741632.,4741632.],[4741632.,4741632.,4741632.]]) #expected_exp_Gl = numpy.array([[(27.+108.+243.)*(2./3.) for k in range(0,2)] for j in range(0,6)]) #expected_exp_taul = (27.+108.+243.)/3. #D_pred = array([[63252.,63252.,63252.,63252.,63252.,63252.],[63504.,63504.,63504.,63504.,63504.,63504.],[63504.,63504.,63504.,63504.,63504.,63504.],[63504.,63504.,63504.,63504.,63504.,63504.],[63504.,63504.,63504.,63504.,63504.,63504.]]) M_test_R = numpy.array([[0,0,1],[0,1,0],[0,0,0],[1,1,0],[0,0,0]]) #R->3,5,10,11, R_pred->3542112,3556224,3556224,3556224 MSE_R = ((3.-3542112.)**2 + (5.-3556224.)**2 + (10.-3556224.)**2 + (11.-3556224.)**2) / 4. R2_R = 1. - ((3.-3542112.)**2 + (5.-3556224.)**2 + (10.-3556224.)**2 + (11.-3556224.)**2) / (4.25**2+2.25**2+2.75**2+3.75**2) #mean=7.25 Rp_R = 357. / ( math.sqrt(44.75) * math.sqrt(5292.) ) #mean=7.25,var=44.75, mean_pred=3552696,var_pred=5292, corr=(-4.25*-63 + -2.25*21 + 2.75*21 + 3.75*21) M_test_C = numpy.array([[0,0,1],[0,1,0],[1,1,0]]) #C->3,5,7,8, C_pred->4741632,4741632,4741632,4741632 MSE_C = ((3.-4741632.)**2 + (5.-4741632.)**2 + (7.-4741632.)**2 + (8.-4741632.)**2) / 4. R2_C = 1. - ((3.-4741632.)**2 + (5.-4741632.)**2 + (7.-4741632.)**2 + (8.-4741632.)**2) / (2.75**2+0.75**2+1.25**2+2.25**2) #mean=5.75 M_test_D = numpy.array([[0,0,1,0,0,1],[0,1,0,0,0,0],[1,1,0,0,0,0],[0,0,0,0,0,0],[0,0,0,0,0,0]]) #D->3,6,8,13,14, D_pred->63252,63252,63504,63504,63504 MSE_D = ((3.-63252.)**2 + (6.-63252.)**2 + (8.-63504.)**2 + (13.-63504.)**2 + (14.-63504.)**2) / 5. R2_D = 1. - ((3.-63252.)**2 + (6.-63252.)**2 + (8.-63504.)**2 + (13.-63504.)**2 + (14.-63504.)**2) / (5.8**2+2.8**2+0.8**2+4.2**2+5.2**2) #mean=8.8 Rp_D = 0.84265143679484211 HMF = HMF_Gibbs(R,C,D,K,settings,priors) HMF.iterations = iterations HMF.iterations_all_Ft = iterations_all_Ft HMF.iterations_all_lambdat = iterations_all_lambdat HMF.iterations_all_Sn = iterations_all_Sn HMF.iterations_all_taun = iterations_all_taun HMF.iterations_all_Sm = iterations_all_Sm HMF.iterations_all_taum = iterations_all_taum HMF.iterations_all_Gl = iterations_all_Gl HMF.iterations_all_taul = iterations_all_taul performances_R = HMF.predict_Rn(0,M_test_R,burn_in,thinning) performances_C = HMF.predict_Cm(0,M_test_C,burn_in,thinning) performances_D = HMF.predict_Dl(0,M_test_D,burn_in,thinning) assert performances_R['MSE'] == MSE_R assert performances_R['R^2'] == R2_R assert performances_R['Rp'] == Rp_R assert performances_C['MSE'] == MSE_C assert performances_C['R^2'] == R2_C assert numpy.isnan(performances_C['Rp']) assert performances_D['MSE'] == MSE_D assert performances_D['R^2'] == R2_D assert abs(performances_D['Rp'] - Rp_D) < 0.00000000001
def test_approx_expectation(): iterations = 10 burn_in = 2 thinning = 3 # so index 2,5,8 -> m=3,m=6,m=9 E = ['entity0','entity1'] I = {E[0]:5, E[1]:3} K = {E[0]:2, E[1]:4} J = [6] iterations_all_Ft = { E[0] : [numpy.ones((I[E[0]],K[E[0]])) * 3*m**2 for m in range(1,10+1)], E[1] : [numpy.ones((I[E[1]],K[E[1]])) * 1*m**2 for m in range(1,10+1)] } iterations_all_lambdat = { E[0] : [numpy.ones(K[E[0]]) * 3*m**2 for m in range(1,10+1)], E[1] : [numpy.ones(K[E[1]]) * 1*m**2 for m in range(1,10+1)] } iterations_all_Sn = [[numpy.ones((K[E[0]],K[E[1]])) * 2*m**2 for m in range(1,10+1)]] iterations_all_lambdan = [[numpy.ones((K[E[0]],K[E[1]])) * 2*m**2 for m in range(1,10+1)]] iterations_all_taun = [[m**2 for m in range(1,10+1)]] iterations_all_Sm = [[numpy.ones((K[E[1]],K[E[1]])) * 2*m**2 * 2 for m in range(1,10+1)]] iterations_all_lambdam = [[numpy.ones((K[E[1]],K[E[1]])) * 2*m**2 * 2 for m in range(1,10+1)]] iterations_all_taum = [[m**2*2 for m in range(1,10+1)]] iterations_all_Gl = [[numpy.ones((J[0],K[E[1]])) * 2*m**2 * 3 for m in range(1,10+1)]] iterations_all_taul = [[m**2*3 for m in range(1,10+1)]] expected_exp_F0 = numpy.array([[9.+36.+81. for k in range(0,2)] for i in range(0,5)]) expected_exp_F1 = numpy.array([[(9.+36.+81.)*(1./3.) for k in range(0,4)] for i in range(0,3)]) expected_exp_lambda0 = numpy.array([9.+36.+81. for k in range(0,2)]) expected_exp_lambda1 = numpy.array([(9.+36.+81.)*(1./3.) for k in range(0,4)]) expected_exp_Sn = numpy.array([[(9.+36.+81.)*(2./3.) for l in range(0,4)] for k in range(0,2)]) expected_exp_lambdan = numpy.array([[(9.+36.+81.)*(2./3.) for l in range(0,4)] for k in range(0,2)]) expected_exp_taun = (9.+36.+81.)/3. expected_exp_Sm = numpy.array([[(18.+72.+162.)*(2./3.) for l in range(0,4)] for k in range(0,4)]) expected_exp_lambdam = numpy.array([[(18.+72.+162.)*(2./3.) for l in range(0,4)] for k in range(0,4)]) expected_exp_taum = (18.+72.+162.)/3. expected_exp_Gl = numpy.array([[(27.+108.+243.)*(2./3.) for k in range(0,4)] for j in range(0,6)]) expected_exp_taul = (27.+108.+243.)/3. R0, M0 = numpy.ones((I[E[0]],I[E[1]])), numpy.ones((I[E[0]],I[E[1]])) C0, M1 = numpy.ones((I[E[1]],I[E[1]])), numpy.ones((I[E[1]],I[E[1]])) D0, M2 = numpy.ones((I[E[1]],J[0])), numpy.ones((I[E[1]],J[0])) R, C, D = [(R0,M0,E[0],E[1],1.)], [(C0,M1,E[1],1.)], [(D0,M2,E[1],1.)] alphatau, betatau = 1., 2. alpha0, beta0 = 6., 7. lambdaF, lambdaG = 3., 8. lambdaSn, lambdaSm = 4., 5. priors = { 'alpha0':alpha0, 'beta0':beta0, 'alphatau':alphatau, 'betatau':betatau, 'lambdaF':lambdaF, 'lambdaG':lambdaG, 'lambdaSn':lambdaSn, 'lambdaSm':lambdaSm } settings = { 'priorF' : 'exponential', 'priorG' : 'normal', 'priorSn' : 'normal', 'priorSm' : 'normal', 'orderF' : 'columns', 'orderG' : 'rows', 'orderSn' : 'rows', 'orderSm' : 'rows', 'ARD' : True, 'element_sparsity': True } HMF = HMF_Gibbs(R,C,D,K,settings,priors) HMF.iterations = iterations HMF.iterations_all_Ft = iterations_all_Ft HMF.iterations_all_lambdat = iterations_all_lambdat HMF.iterations_all_Sn = iterations_all_Sn HMF.iterations_all_lambdan = iterations_all_lambdan HMF.iterations_all_taun = iterations_all_taun HMF.iterations_all_Sm = iterations_all_Sm HMF.iterations_all_lambdam = iterations_all_lambdam HMF.iterations_all_taum = iterations_all_taum HMF.iterations_all_Gl = iterations_all_Gl HMF.iterations_all_taul = iterations_all_taul exp_F0 = HMF.approx_expectation_Ft(E[0],burn_in,thinning) exp_F1 = HMF.approx_expectation_Ft(E[1],burn_in,thinning) exp_lambda0 = HMF.approx_expectation_lambdat(E[0],burn_in,thinning) exp_lambda1 = HMF.approx_expectation_lambdat(E[1],burn_in,thinning) exp_Sn = HMF.approx_expectation_Sn(0,burn_in,thinning) exp_lambdan = HMF.approx_expectation_lambdan(0,burn_in,thinning) exp_taun = HMF.approx_expectation_taun(0,burn_in,thinning) exp_Sm = HMF.approx_expectation_Sm(0,burn_in,thinning) exp_lambdam = HMF.approx_expectation_lambdam(0,burn_in,thinning) exp_taum = HMF.approx_expectation_taum(0,burn_in,thinning) exp_Gl = HMF.approx_expectation_Gl(0,burn_in,thinning) exp_taul = HMF.approx_expectation_taul(0,burn_in,thinning) assert numpy.array_equal(expected_exp_F0,exp_F0) assert numpy.array_equal(expected_exp_F1,exp_F1) assert numpy.array_equal(expected_exp_lambda0,exp_lambda0) assert numpy.array_equal(expected_exp_lambda1,exp_lambda1) assert numpy.array_equal(expected_exp_Sn,exp_Sn) assert numpy.array_equal(expected_exp_lambdan,exp_lambdan) assert expected_exp_taun == exp_taun assert numpy.array_equal(expected_exp_Sm,exp_Sm) assert numpy.array_equal(expected_exp_lambdam,exp_lambdam) assert expected_exp_taum == exp_taum assert numpy.array_equal(expected_exp_Gl,exp_Gl) assert expected_exp_taul == exp_taul
def test_run(): ''' Settings ''' E0, E1, E2 = 'entity0','entity1',1337 I0, I1, I2 = 10,9,8 K0, K1, K2 = 3,2,1 J0 = 4 N, M, L, T = 3, 2, 1, 3 R0 = numpy.ones((I0,I1)) # relates E0, E1 R1 = numpy.ones((I0,I1)) # relates E0, E1 R2 = numpy.ones((I1,I2)) # relates E1, E2 C0 = numpy.ones((I0,I0)) # relates E0 C1 = numpy.ones((I2,I2)) # relates E2 D0 = numpy.ones((I2,J0)) # relates E2 Mn0 = numpy.ones((I0,I1)) Mn1 = numpy.ones((I0,I1)) Mn2 = numpy.ones((I1,I2)) Mm0 = numpy.ones((I0,I0)) Mm1 = numpy.ones((I2,I2)) Ml0 = numpy.ones((I2,J0)) #size_Omegan = [I0*I1,I0*I1,I1*I2] #size_Omegam = [I0*(I0-1),I2*(I2-1)] #size_Omegal = [I2*J0] alphan = [11.,12.,13.] alpham = [14.,15.] alphal = [16.] R = [(R0,Mn0,E0,E1,alphan[0]),(R1,Mn1,E0,E1,alphan[1]),(R2,Mn2,E1,E2,alphan[2])] C = [(C0,Mm0,E0,alpham[0]),(C1,Mm1,E2,alpham[1])] D = [(D0,Ml0,E2,alphal[0])] E = [E0,E1,E2] K = {E0:K0,E1:K1,E2:K2} I = {E0:I0,E1:I1,E2:I2} J = [J0] #U1t = {'entity0':[0,1], 'entity1':[2], 1337:[] } #U2t = {'entity0':[], 'entity1':[0,1], 1337:[2] } #Vt = {'entity0':[0], 'entity1':[], 1337:[1] } #Wt = {'entity0':[], 'entity1':[], 1337:[0]} E_per_Rn = [(E0,E1),(E0,E1),(E1,E2)] E_per_Cm = [E0,E2] E_per_Dl = [E2] alphatau, betatau = 1., 2. alpha0, beta0 = 6., 7. lambdaF, lambdaG = 3., 8. lambdaSn, lambdaSm = 4., 5. priors = { 'alpha0':alpha0, 'beta0':beta0, 'alphatau':alphatau, 'betatau':betatau, 'lambdaF':lambdaF, 'lambdaG':lambdaG, 'lambdaSn':lambdaSn, 'lambdaSm':lambdaSm } settings = { 'priorF' : 'exponential', 'priorG' : 'normal', 'priorSn' : 'normal', 'priorSm' : 'normal', 'orderF' : 'columns', 'orderG' : 'rows', 'orderSn' : 'rows', 'orderSm' : 'rows', 'ARD' : True, 'element_sparsity': True } init = { 'F': 'kmeans', 'G': 'least', 'Sn': 'least', 'Sm': 'least', 'lambdat': 'random', 'lambdaS': 'random', 'tau': 'random' } iterations = 10 HMF = HMF_Gibbs(R,C,D,K,settings,priors) HMF.initialise(init) HMF.run(iterations) ''' Do size checks ''' for E0 in E: assert len(HMF.iterations_all_Ft[E0]) == iterations assert len(HMF.iterations_all_lambdat[E0]) == iterations for n in range(0,N): assert len(HMF.iterations_all_lambdan[n]) == iterations assert len(HMF.iterations_all_Sn[n]) == iterations assert len(HMF.iterations_all_taun[n]) == iterations for m in range(0,M): assert len(HMF.iterations_all_lambdam[m]) == iterations assert len(HMF.iterations_all_Sm[m]) == iterations assert len(HMF.iterations_all_taum[m]) == iterations for l in range(0,L): assert len(HMF.iterations_all_Gl[l]) == iterations assert len(HMF.iterations_all_taul[l]) == iterations ''' Check whether values change each iteration ''' for iteration in range(1,iterations): for E0 in E: for k in range(0,K[E0]): assert HMF.iterations_all_lambdat[E0][iteration][k] != HMF.iterations_all_lambdat[E0][iteration-1][k] for i,k in itertools.product(xrange(0,I[E0]),xrange(0,K[E0])): assert HMF.iterations_all_Ft[E0][iteration][i,k] != HMF.iterations_all_Ft[E0][iteration-1][i,k] for n in range(0,N): E0,E1 = E_per_Rn[n] for k,l in itertools.product(xrange(0,K[E0]),xrange(0,K[E1])): assert HMF.iterations_all_lambdan[n][iteration][k,l] != HMF.iterations_all_lambdan[n][iteration-1][k,l] assert HMF.iterations_all_Sn[n][iteration][k,l] != HMF.iterations_all_Sn[n][iteration-1][k,l] assert HMF.iterations_all_taun[n][iteration] != HMF.iterations_all_taun[n][iteration-1] for m in range(0,M): E0 = E_per_Cm[m] for k,l in itertools.product(xrange(0,K[E0]),xrange(0,K[E0])): assert HMF.iterations_all_lambdam[m][iteration][k,l] != HMF.iterations_all_lambdam[m][iteration-1][k,l] assert HMF.iterations_all_Sm[m][iteration][k,l] != HMF.iterations_all_Sm[m][iteration-1][k,l] assert HMF.iterations_all_taum[m][iteration] != HMF.iterations_all_taum[m][iteration-1] for l in range(0,l): E0 = E_per_Dl[l] for j,k in itertools.product(xrange(0,J[l]),xrange(0,K[E0])): assert HMF.iterations_all_Gl[l][iteration][j,k] != HMF.iterations_all_Dl[l][iteration-1][j,k] assert HMF.iterations_all_taul[l][iteration] != HMF.iterations_all_taul[l][iteration-1]
def test_initialise(): E0, E1, E2 = 'entity0','entity1',1337 I0, I1, I2 = 10,9,8 K0, K1, K2 = 3,2,1 J0 = 4 N, M, L, T = 3, 2, 1, 3 R0 = numpy.ones((I0,I1)) # relates E0, E1 R1 = numpy.ones((I0,I1)) # relates E0, E1 R2 = numpy.ones((I1,I2)) # relates E1, E2 C0 = numpy.ones((I0,I0)) # relates E0 C1 = numpy.ones((I2,I2)) # relates E2 D0 = numpy.ones((I2,J0)) # relates E2 Mn0 = numpy.ones((I0,I1)) Mn1 = numpy.ones((I0,I1)) Mn2 = numpy.ones((I1,I2)) Mm0 = numpy.ones((I0,I0)) Mm1 = numpy.ones((I2,I2)) Ml0 = numpy.ones((I2,J0)) #size_Omegan = [I0*I1,I0*I1,I1*I2] #size_Omegam = [I0*(I0-1),I2*(I2-1)] #size_Omegal = [I2*J0] alphan = [11.,12.,13.] alpham = [14.,15.] alphal = [16.] R = [(R0,Mn0,E0,E1,alphan[0]),(R1,Mn1,E0,E1,alphan[1]),(R2,Mn2,E1,E2,alphan[2])] C = [(C0,Mm0,E0,alpham[0]),(C1,Mm1,E2,alpham[1])] D = [(D0,Ml0,E2,alphal[0])] E = [E0,E1,E2] K = {E0:K0,E1:K1,E2:K2} I = {E0:I0,E1:I1,E2:I2} J = [J0] #U1t = {'entity0':[0,1], 'entity1':[2], 1337:[] } #U2t = {'entity0':[], 'entity1':[0,1], 1337:[2] } #Vt = {'entity0':[0], 'entity1':[], 1337:[1] } #Wt = {'entity0':[], 'entity1':[], 1337:[0]} E_per_Rn = [(E0,E1),(E0,E1),(E1,E2)] E_per_Cm = [E0,E2] E_per_Dl = [E2] alphatau, betatau = 1., 2. alpha0, beta0 = 6., 7. alphaS, betaS = 9., 10. lambdaF, lambdaG = 3., 8. lambdaSn, lambdaSm = 4., 5. priors = { 'alpha0':alpha0, 'beta0':beta0, 'alphaS':alphaS, 'betaS':betaS, 'alphatau':alphatau, 'betatau':betatau, 'lambdaF':lambdaF, 'lambdaG':lambdaG, 'lambdaSn':lambdaSn, 'lambdaSm':lambdaSm } """ We need to test the following cases: - F ~ Exp or ~ N - G ~ Exp or ~ N - S ~ Exp or ~ N - ARD or no ARD - F init random, exp, kmeans - G init random, exp, least - S init random, exp, least - lambdat init random, exp - tau init random, exp """ ''' F Exp, G Exp, S Exp, ARD, no element-wise sparsity. F exp, G exp, S exp, lambdat exp, tau exp. ''' settings = { 'priorF' : 'exponential', 'priorG' : 'exponential', 'priorSn' : 'exponential', 'priorSm' : 'exponential', 'orderF' : 'rows', 'orderG' : 'columns', 'orderSn' : 'individual', 'orderSm' : 'individual', 'ARD' : True, 'element_sparsity': True } init = { 'F' : 'exp', 'G' : 'exp', 'Sn' : 'exp', 'Sm' : 'exp', 'lambdat' : 'exp', 'lambdaS': 'exp', 'tau' : 'exp'} HMF = HMF_Gibbs(R,C,D,K,settings,priors) HMF.initialise(init) for E1 in E: for k in range(0,K[E1]): assert HMF.all_lambdat[E1][k] == alpha0 / float(beta0) for i,k in itertools.product(xrange(0,I[E1]),xrange(0,K[E1])): assert HMF.all_Ft[E1][i,k] == 1./HMF.all_lambdat[E1][k] expected_all_taun = [0.015369654419961557,0.015367151516936775,0.2442062783472021] for n in range(0,N): E1,E2 = E_per_Rn[n] for k,l in itertools.product(xrange(0,K[E1]),xrange(0,K[E2])): expected_lambdan_kl = alphaS / float(betaS) assert HMF.all_lambdan[n][k,l] == expected_lambdan_kl assert HMF.all_Sn[n][k,l] == 1./expected_lambdan_kl assert abs(HMF.all_taun[n] - expected_all_taun[n]) < 0.0000000001 expected_all_taum = [0.0062975762814580696,3.7505835292008993] for m in range(0,M): E1 = E_per_Cm[m] for k,l in itertools.product(xrange(0,K[E1]),xrange(0,K[E1])): expected_lambdam_kl = alphaS / float(betaS) assert HMF.all_lambdam[m][k,l] == expected_lambdam_kl assert HMF.all_Sm[m][k,l] == 1./expected_lambdam_kl assert abs(HMF.all_taum[m] - expected_all_taum[m]) < 0.000000001 expected_all_taul = [7.2634333565945441] for l in range(0,L): E1 = E_per_Dl[l] for j,k in itertools.product(xrange(0,J[l]),xrange(0,K[E1])): assert HMF.all_Gl[l][j,k] == 1./HMF.all_lambdat[E1][k] assert abs(HMF.all_taul[l] - expected_all_taul[l]) < 0.00000001 ''' F Exp, G Exp, S N, no ARD, element-wise sparsity. F random, G exp, Sn exp, Sm random, tau random. ''' settings = { 'priorF' : 'exponential', 'priorG' : 'exponential', 'priorSn' : 'normal', 'priorSm' : 'normal', 'orderF' : 'columns', 'orderG' : 'rows', 'orderSn' : 'rows', 'orderSm' : 'rows', 'ARD' : False, 'element_sparsity' : False } init = { 'F' : 'random', 'G' : 'exp', 'Sn' : 'exp', 'Sm' : 'random', 'lambdaS': 'exp', 'tau' : 'random' } HMF = HMF_Gibbs(R,C,D,K,settings,priors) HMF.initialise(init) for E1 in E: for i,k in itertools.product(xrange(0,I[E1]),xrange(0,K[E1])): assert HMF.all_Ft[E1][i,k] != 1./lambdaF for n in range(0,N): E1,E2 = E_per_Rn[n] for k,l in itertools.product(xrange(0,K[E1]),xrange(0,K[E2])): assert HMF.all_Sn[n][k,l] == 0.01 assert HMF.all_taun[n] >= 0. for m in range(0,M): E1 = E_per_Cm[m] for k,l in itertools.product(xrange(0,K[E1]),xrange(0,K[E1])): assert HMF.all_Sm[m][k,l] != 0. assert HMF.all_taum[m] >= 0. for l in range(0,L): E1 = E_per_Dl[l] for j,k in itertools.product(xrange(0,J[l]),xrange(0,K[E1])): assert HMF.all_Gl[l][j,k] == 1./lambdaG assert HMF.all_taul[l] >= 0. ''' F N, G N, Sn Exp, Sm N, ARD, no element-wise sparsity. F kmeans, G exp, S random, lambdat random, tau random. ''' settings = { 'priorF' : 'normal', 'priorG' : 'normal', 'priorSn' : 'exponential', 'priorSm' : 'normal', 'ARD' : True, 'orderF' : 'columns', 'orderG' : 'rows', 'orderSn' : 'rows', 'orderSm' : 'rows' } init = { 'F' : 'kmeans', 'G' : 'exp', 'Sn' : 'random', 'Sm' : 'random', 'lambdat' : 'random', 'tau' : 'random' } HMF = HMF_Gibbs(R,C,D,K,settings,priors) HMF.initialise(init) for E1 in E: for k in range(0,K[E1]): assert HMF.all_lambdat[E1][k] >= 0. for i,k in itertools.product(xrange(0,I[E1]),xrange(0,K[E1])): assert HMF.all_Ft[E1][i,k] == 0.2 or HMF.all_Ft[E1][i,k] == 1.2 for n in range(0,N): E1,E2 = E_per_Rn[n] for k,l in itertools.product(xrange(0,K[E1]),xrange(0,K[E2])): assert HMF.all_Sn[n][k,l] >= 0. assert HMF.all_taun[n] >= 0. expected_all_taum = [0.47612886531245974,1.7230629295737439] for m in range(0,M): E1 = E_per_Cm[m] for k,l in itertools.product(xrange(0,K[E1]),xrange(0,K[E1])): assert HMF.all_Sm[m][k,l] != 0. assert HMF.all_taum[m] >= 0. expected_all_taul = [4.1601208459214458] for l in range(0,L): E1 = E_per_Dl[l] for j,k in itertools.product(xrange(0,J[l]),xrange(0,K[E1])): assert HMF.all_Gl[l][j,k] == 0.01 assert HMF.all_taul[l] >= 0. ''' F Exp, G N, S N, no ARD, no element-wise sparsity. F kmeans, G least, S least, lambdat random, tau random. ''' settings = { 'priorF' : 'exponential', 'priorG' : 'normal', 'priorSn' : 'normal', 'priorSm' : 'normal', 'orderF' : 'columns', 'orderG' : 'rows', 'orderSn' : 'rows', 'orderSm' : 'rows', 'ARD' : False, 'element_sparsity' : False } init = { 'F': 'kmeans', 'G': 'least', 'Sn': 'least', 'Sm': 'least', 'lambdat': 'random', 'lambdaS': 'exp', 'tau': 'random' } HMF = HMF_Gibbs(R,C,D,K,settings,priors) HMF.initialise(init) for E1 in E: for i,k in itertools.product(xrange(0,I[E1]),xrange(0,K[E1])): assert HMF.all_Ft[E1][i,k] == 0.2 or HMF.all_Ft[E1][i,k] == 1.2 for n in range(0,N): E1,E2 = E_per_Rn[n] for k,l in itertools.product(xrange(0,K[E1]),xrange(0,K[E2])): assert HMF.all_Sn[n][k,l] != 0. assert HMF.all_taun[n] >= 0. expected_all_taum = [0.47612886531245974,1.7230629295737439] for m in range(0,M): E1 = E_per_Cm[m] for k,l in itertools.product(xrange(0,K[E1]),xrange(0,K[E1])): assert HMF.all_Sm[m][k,l] != 0. assert HMF.all_taum[m] >= 0. expected_all_taul = [4.1601208459214458] for l in range(0,L): E1 = E_per_Dl[l] for j,k in itertools.product(xrange(0,J[l]),xrange(0,K[E1])): assert HMF.all_Gl[l][j,k] != 1./lambdaG assert HMF.all_taul[l] >= 0.
def test_init(): """ We need to test the following cases: 1. Dataset R relates same two entity types 2. Rn and Cm are not 2-dimensional matrices 3. Rn and Mn are of different sizes 4. Cm and Mm are of different sizes 5. Cm is not a square matrix 6. R1 and R2 both relate E but have different no. of entities 7. R and C both relate E but have different no. of entities 8. An entity has no observed datapoints at all 9. K does not have an entry for each entity 10. Finally, we need to test whether all variables are correctly initialised """ E0, E1, E2 = 'entity0','entity1',1337 I0, I1, I2 = 10,9,8 K0, K1, K2 = 3,2,1 J0 = 4 N, M, L, T = 3, 2, 1, 3 R0 = numpy.ones((I0,I1)) # relates E0, E1 R1 = numpy.ones((I0,I1)) # relates E0, E1 R2 = numpy.ones((I1,I2)) # relates E1, E2 C0 = numpy.ones((I0,I0)) # relates E0 C1 = numpy.ones((I2,I2)) # relates E2 D0 = numpy.ones((I2,J0)) # relates E2 Mn0 = numpy.ones((I0,I1)) Mn1 = numpy.ones((I0,I1)) Mn2 = numpy.ones((I1,I2)) Mm0 = numpy.ones((I0,I0)) Mm1 = numpy.ones((I2,I2)) Ml0 = numpy.ones((I2,J0)) size_Omegan = [I0*I1,I0*I1,I1*I2] size_Omegam = [I0*(I0-1),I2*(I2-1)] size_Omegal = [I2*J0] alphan = [11.,12.,13.] alpham = [14.,15.] alphal = [16.] R = [(R0,Mn0,E0,E1,alphan[0]),(R1,Mn1,E0,E1,alphan[1]),(R2,Mn2,E1,E2,alphan[2])] C = [(C0,Mm0,E0,alpham[0]),(C1,Mm1,E2,alpham[1])] D = [(D0,Ml0,E2,alphal[0])] E = [E0,E1,E2] K = {E0:K0,E1:K1,E2:K2} I = {E0:I0,E1:I1,E2:I2} J = [J0] U1t = {'entity0':[0,1], 'entity1':[2], 1337:[] } U2t = {'entity0':[], 'entity1':[0,1], 1337:[2] } Vt = {'entity0':[0], 'entity1':[], 1337:[1] } Wt = {'entity0':[], 'entity1':[], 1337:[0]} E_per_Rn = [(E0,E1),(E0,E1),(E1,E2)] E_per_Cm = [E0,E2] E_per_Dl = [E2] alphatau, betatau = 1., 2. alpha0, beta0 = 6., 7. lambdaF, lambdaG = 3., 8. lambdaSn, lambdaSm = 4., 5. priors = { 'alpha0':alpha0, 'beta0':beta0, 'alphatau':alphatau, 'betatau':betatau, 'lambdaF':lambdaF, 'lambdaG':lambdaG, 'lambdaSn':lambdaSn, 'lambdaSm':lambdaSm } settings = { 'priorF' : 'normal', 'priorG' : 'exponential', 'priorSn' : 'normal', 'priorSm' : 'exponential', 'orderF' : 'rows', 'orderG' : 'columns', 'orderSn' : 'individual', 'orderSm' : 'rows', 'ARD' : True, 'element_sparsity' : True, } ''' 1. Dataset R relates same two entity types ''' R = [(R0,Mn0,E0,E1,alphan[0]),(R1,Mn1,E1,E1,alphan[1]),(R2,Mn2,E1,E2,alphan[2])] with pytest.raises(AssertionError) as error: HMF_Gibbs(R,C,D,K,settings,priors) assert str(error.value) == "Gave same entity type for R1: entity1." ''' 2. Rn and Cm are not 2-dimensional matrices ''' R1 = numpy.ones(I0) R = [(R0,Mn0,E0,E1,alphan[0]),(R1,Mn1,E0,E1,alphan[1]),(R2,Mn2,E1,E2,alphan[2])] with pytest.raises(AssertionError) as error: HMF_Gibbs(R,C,D,K,settings,priors) assert str(error.value) == "R1 is not 2-dimensional, but instead 1-dimensional." R1 = numpy.ones((I0,I1)) R = [(R0,Mn0,E0,E1,alphan[0]),(R1,Mn1,E0,E1,alphan[1]),(R2,Mn2,E1,E2,alphan[2])] C0 = numpy.ones((I0,I1,I2)) C = [(C0,Mm0,E0,alpham[0]),(C1,Mm1,E2,alpham[1])] with pytest.raises(AssertionError) as error: HMF_Gibbs(R,C,D,K,settings,priors) assert str(error.value) == "C0 is not 2-dimensional, but instead 3-dimensional." C0 = numpy.ones((I0,I0)) C = [(C0,Mm0,E0,alpham[0]),(C1,Mm1,E2,alpham[1])] ''' 3. Rn and Mn are of different sizes ''' R2 = numpy.ones((I1,I2)) Mn2 = numpy.ones((I0,I1)) R = [(R0,Mn0,E0,E1,alphan[0]),(R1,Mn1,E0,E1,alphan[1]),(R2,Mn2,E1,E2,alphan[1])] with pytest.raises(AssertionError) as error: HMF_Gibbs(R,C,D,K,settings,priors) assert str(error.value) == "Different shapes for R2 and M2: (9, 8) and (10, 9)." R2 = numpy.ones((I1,I2)) Mn2 = numpy.ones((I1,I2)) R = [(R0,Mn0,E0,E1,alphan[0]),(R1,Mn1,E0,E1,alphan[1]),(R2,Mn2,E1,E2,alphan[2])] ''' 4. Cm and Mm are of different sizes ''' C1 = numpy.ones((I2,I2)) Mm1 = numpy.ones((I1,I1)) C = [(C0,Mm0,E0,alpham[0]),(C1,Mm1,E2,alpham[1])] with pytest.raises(AssertionError) as error: HMF_Gibbs(R,C,D,K,settings,priors) assert str(error.value) == "Different shapes for C1 and M1: (8, 8) and (9, 9)." C1 = numpy.ones((I2,I2)) Mm1 = numpy.ones((I2,I2)) C = [(C0,Mm0,E0,alpham[0]),(C1,Mm1,E2,alpham[1])] ''' 5. Cm is not a square matrix ''' C0 = numpy.ones((I1,I2)) Mm0 = numpy.ones((I1,I2)) C = [(C0,Mm0,E0,alpham[0]),(C1,Mm1,E2,alpham[1])] with pytest.raises(AssertionError) as error: HMF_Gibbs(R,C,D,K,settings,priors) assert str(error.value) == "C0 is not a square matrix: (9, 8)." C0 = numpy.ones((I0,I0)) Mm0 = numpy.ones((I0,I0)) C = [(C0,Mm0,E0,alpham[0]),(C1,Mm1,E2,alpham[1])] ''' 6. R1 and R2 both relate E but have different no. of entities ''' R2 = numpy.ones((I1+1,I2)) Mn2 = numpy.ones((I1+1,I2)) R = [(R0,Mn0,E0,E1,alphan[0]),(R1,Mn1,E0,E1,alphan[1]),(R2,Mn2,E1,E2,alphan[2])] with pytest.raises(AssertionError) as error: HMF_Gibbs(R,C,D,K,settings,priors) assert str(error.value) == "Different number of rows (10) in R2 for entity type entity1 than before (9)!" R2 = numpy.ones((I1,I2)) Mn2 = numpy.ones((I1,I2)) R = [(R0,Mn0,E0,E1,alphan[0]),(R1,Mn1,E0,E1,alphan[1]),(R2,Mn2,E1,E2,alphan[2])] ''' 7. R and C both relate E but have different no. of entities ''' R2 = numpy.ones((I1,I2+1)) Mn2 = numpy.ones((I1,I2+1)) R = [(R0,Mn0,E0,E1,alphan[0]),(R1,Mn1,E0,E1,alphan[1]),(R2,Mn2,E1,E2,alphan[2])] with pytest.raises(AssertionError) as error: HMF_Gibbs(R,C,D,K,settings,priors) assert str(error.value) == "Different number of rows (8) in C1 for entity type 1337 than before (9)!" R2 = numpy.ones((I1,I2)) Mn2 = numpy.ones((I1,I2)) R = [(R0,Mn0,E0,E1,alphan[0]),(R1,Mn1,E0,E1,alphan[1]),(R2,Mn2,E1,E2,alphan[2])] ''' 8. An entity has no observed datapoints at all ''' Mn0[:,1] = numpy.zeros(I0) Mn1[:,1] = numpy.zeros(I0) Mn2[1,:] = numpy.zeros(I2) ''' Concurrently also test not getting an error for entity0 ''' Mn0[2,:] = numpy.zeros(I1) Mn1[2,:] = numpy.zeros(I1) Mm0[2,:] = numpy.zeros(I0) R = [(R0,Mn0,E0,E1,alphan[0]),(R1,Mn1,E0,E1,alphan[1]),(R2,Mn2,E1,E2,alphan[2])] C = [(C0,Mm0,E0,alpham[0]),(C1,Mm1,E2,alpham[0])] with pytest.raises(AssertionError) as error: HMF_Gibbs(R,C,D,K,settings,priors) assert str(error.value) == "No observed datapoints in any dataset for entity 1 of type entity1." Mn0 = numpy.ones((I0,I1)) Mn1 = numpy.ones((I0,I1)) Mn2 = numpy.ones((I1,I2)) Mm0 = numpy.ones((I0,I0)) R = [(R0,Mn0,E0,E1,alphan[0]),(R1,Mn1,E0,E1,alphan[1]),(R2,Mn2,E1,E2,alphan[2])] C = [(C0,Mm0,E0,alpham[0]),(C1,Mm1,E2,alpham[1])] ''' 9. K does not have an entry for each entity ''' K = {E0:K0,E2:K2} with pytest.raises(AssertionError) as error: HMF_Gibbs(R,C,D,K,settings,priors) assert str(error.value) == "Did not get an entry for entity entity1 in K = {1337: 1, 'entity0': 3}." K = {E0:K0,E1:K1,E2:K2} ''' 10. Finally, we need to test whether all variables are correctly initialised ''' HMF = HMF_Gibbs(R,C,D,K,settings,priors) assert numpy.array_equal(HMF.all_E,E) for R,Rtrue in zip(HMF.all_Rn,[R0,R1,R2]): assert numpy.array_equal(R,Rtrue) for Mn,Mntrue in zip(HMF.all_Mn,[Mn0,Mn1,Mn2]): assert numpy.array_equal(Mn,Mntrue) for C,Ctrue in zip(HMF.all_Cm,[C0,C1]): assert numpy.array_equal(C,Ctrue) for Mm,Mmtrue in zip(HMF.all_Mm,[Mm0,Mm1]): assert numpy.array_equal(Mm,Mmtrue) for Dl,Dltrue in zip(HMF.all_Dl,[D0]): assert numpy.array_equal(Dl,Dltrue) assert HMF.size_Omegan == size_Omegan assert HMF.size_Omegam == size_Omegam assert HMF.size_Omegal == size_Omegal assert HMF.E_per_Rn == E_per_Rn assert HMF.E_per_Cm == E_per_Cm assert HMF.E_per_Dl == E_per_Dl assert HMF.all_alphan == alphan assert HMF.all_alpham == alpham assert HMF.all_alphal == alphal assert numpy.array_equal(HMF.K,K) assert HMF.I == I assert HMF.J == J assert HMF.N == N assert HMF.M == M assert HMF.L == L assert HMF.T == T assert HMF.U1t == U1t assert HMF.U2t == U2t assert HMF.Vt == Vt assert HMF.Wt == Wt assert HMF.all_Ft == { 'entity0':[], 'entity1':[], 1337:[] } assert HMF.all_Sn == [] assert HMF.all_Sm == [] assert HMF.all_Gl == [] assert HMF.all_taun == [] assert HMF.all_taum == [] assert HMF.all_taul == [] assert HMF.alpha0 == alpha0 assert HMF.beta0 == beta0 assert HMF.alphatau == alphatau assert HMF.betatau == betatau assert HMF.lambdaF == lambdaF assert HMF.lambdaG == lambdaG assert HMF.lambdaSn == lambdaSn assert HMF.lambdaSm == lambdaSm # { 'priorF' : 'normal', 'priorG' : 'exponential', 'priorSn' : 'normal', 'priorSm' : 'exponential', # 'orderF' : 'rows', 'orderG' : 'columns', 'orderSn' : 'individual', 'orderSm' : 'rows', 'ARD' : True } assert HMF.prior_F == 'normal' assert HMF.prior_G == ['exponential'] assert HMF.prior_Sn == ['normal','normal','normal'] assert HMF.prior_Sm == ['exponential','exponential'] assert HMF.order_F == 'rows' assert HMF.order_G == ['columns'] assert HMF.order_Sn == ['individual','individual','individual'] assert HMF.order_Sm == ['rows','rows'] assert HMF.ARD == True assert HMF.element_sparsity == True assert HMF.rows_F == True assert HMF.rows_G == [False] assert HMF.rows_Sn == [False,False,False] assert HMF.rows_Sm == [True,True] assert HMF.nonnegative_F == False assert HMF.nonnegative_G == [True] assert HMF.nonnegative_Sn == [False,False,False] assert HMF.nonnegative_Sm == [True,True]