def run_all_settings(all_K_alpha): fout = open('results_varying_K_hmf_no_ARD_ge_gm_to_pm_std.txt', 'w') all_average_performances = [] for K in all_K: ''' Compute the folds ''' n = len(X1) n_folds = 10 shuffle, random_state = True, None folds = KFold(n=n, n_folds=n_folds, shuffle=shuffle, random_state=random_state) ''' Run HMF to predict Y from X ''' all_MSE, all_R2, all_Rp = numpy.zeros(n_folds), numpy.zeros( n_folds), numpy.zeros(n_folds) for i, (train_index, test_index) in enumerate(folds): print "Training fold %s for HMF-MTF." % (i + 1) ''' Split into train and test ''' M_X1, M_X2, M_Y_train = numpy.ones(X1.shape), numpy.ones( X2.shape), numpy.ones(Y.shape) M_Y_train[test_index] = 0. M_Y_test = 1. - M_Y_train R = [(X1, M_X1, 'samples', 'genes', alpha[0]), (X2, M_X2, 'samples', 'genes', alpha[1]), (Y, M_Y_train, 'samples', 'genes', alpha[2])] ''' Train and predict ''' HMF = HMF_Gibbs(R, C, D, K, settings, hyperparameters) HMF.initialise(init) HMF.run(iterations) ''' Compute the performances ''' performances = HMF.predict_Rn(n=2, M_pred=M_Y_test, burn_in=burn_in, thinning=thinning) all_MSE[i], all_R2[i], all_Rp[i] = performances[ 'MSE'], performances['R^2'], performances['Rp'] print "MSE: %s. R^2: %s. Rp: %s." % ( performances['MSE'], performances['R^2'], performances['Rp']) print "Average MSE: %s +- %s. \nAverage R^2: %s +- %s. \nAverage Rp: %s +- %s." % \ (all_MSE.mean(),all_MSE.std(),all_R2.mean(),all_R2.std(),all_Rp.mean(),all_Rp.std()) fout.write('Tried MF on PM -> GE, with K = %s, alphan = %s.\n' % (K, alpha)) fout.write('Average MSE: %s +- %s. \nAverage R^2: %s +- %s. \nAverage Rp: %s +- %s.\n' % \ (all_MSE.mean(),all_MSE.std(),all_R2.mean(),all_R2.std(),all_Rp.mean(),all_Rp.std())) fout.write('All MSE: %s. \nAll R^2: %s. \nAll Rp: %s.\n\n' % (list(all_MSE), list(all_R2), list(all_Rp))) fout.flush() all_average_performances.append(all_MSE.mean()) ''' Print for plotting. ''' print "all_K = %s \nall_average_performances = %s" % ( all_K, all_average_performances)
print "Trying fraction %s." % fraction # Run the algorithm <repeats> times and store all the performances for metric in metrics: all_performances[metric].append([]) for repeat, (M_train, M_test) in zip(range(0, repeats), Ms_train_test): print "Repeat %s of fraction %s." % (repeat + 1, fraction) D = [(R_ctrp, M_train, 'Cell_lines', alpha_l[0]), (R_gdsc, M_gdsc, 'Cell_lines', alpha_l[1]), (R_ccle_ic, M_ccle_ic, 'Cell_lines', alpha_l[2]), (R_ccle_ec, M_ccle_ec, 'Cell_lines', alpha_l[3])] R, C = [], [] HMF = HMF_Gibbs(R, C, D, K, settings, hyperparameters) HMF.initialise(init) HMF.run(iterations) # Measure the performances performances = HMF.predict_Dl(l=0, M_pred=M_test, burn_in=burn_in, thinning=thinning) for metric in metrics: # Add this metric's performance to the list of <repeat> performances for this fraction all_performances[metric][-1].append(performances[metric]) # Compute the average across attempts for metric in metrics: average_performances[metric].append( sum(all_performances[metric][-1]) / repeats)
def test_run(): ''' Settings ''' E0, E1, E2 = 'entity0','entity1',1337 I0, I1, I2 = 10,9,8 K0, K1, K2 = 3,2,1 J0 = 4 N, M, L, T = 3, 2, 1, 3 R0 = numpy.ones((I0,I1)) # relates E0, E1 R1 = numpy.ones((I0,I1)) # relates E0, E1 R2 = numpy.ones((I1,I2)) # relates E1, E2 C0 = numpy.ones((I0,I0)) # relates E0 C1 = numpy.ones((I2,I2)) # relates E2 D0 = numpy.ones((I2,J0)) # relates E2 Mn0 = numpy.ones((I0,I1)) Mn1 = numpy.ones((I0,I1)) Mn2 = numpy.ones((I1,I2)) Mm0 = numpy.ones((I0,I0)) Mm1 = numpy.ones((I2,I2)) Ml0 = numpy.ones((I2,J0)) #size_Omegan = [I0*I1,I0*I1,I1*I2] #size_Omegam = [I0*(I0-1),I2*(I2-1)] #size_Omegal = [I2*J0] alphan = [11.,12.,13.] alpham = [14.,15.] alphal = [16.] R = [(R0,Mn0,E0,E1,alphan[0]),(R1,Mn1,E0,E1,alphan[1]),(R2,Mn2,E1,E2,alphan[2])] C = [(C0,Mm0,E0,alpham[0]),(C1,Mm1,E2,alpham[1])] D = [(D0,Ml0,E2,alphal[0])] E = [E0,E1,E2] K = {E0:K0,E1:K1,E2:K2} I = {E0:I0,E1:I1,E2:I2} J = [J0] #U1t = {'entity0':[0,1], 'entity1':[2], 1337:[] } #U2t = {'entity0':[], 'entity1':[0,1], 1337:[2] } #Vt = {'entity0':[0], 'entity1':[], 1337:[1] } #Wt = {'entity0':[], 'entity1':[], 1337:[0]} E_per_Rn = [(E0,E1),(E0,E1),(E1,E2)] E_per_Cm = [E0,E2] E_per_Dl = [E2] alphatau, betatau = 1., 2. alpha0, beta0 = 6., 7. lambdaF, lambdaG = 3., 8. lambdaSn, lambdaSm = 4., 5. priors = { 'alpha0':alpha0, 'beta0':beta0, 'alphatau':alphatau, 'betatau':betatau, 'lambdaF':lambdaF, 'lambdaG':lambdaG, 'lambdaSn':lambdaSn, 'lambdaSm':lambdaSm } settings = { 'priorF' : 'exponential', 'priorG' : 'normal', 'priorSn' : 'normal', 'priorSm' : 'normal', 'orderF' : 'columns', 'orderG' : 'rows', 'orderSn' : 'rows', 'orderSm' : 'rows', 'ARD' : True, 'element_sparsity': True } init = { 'F': 'kmeans', 'G': 'least', 'Sn': 'least', 'Sm': 'least', 'lambdat': 'random', 'lambdaS': 'random', 'tau': 'random' } iterations = 10 HMF = HMF_Gibbs(R,C,D,K,settings,priors) HMF.initialise(init) HMF.run(iterations) ''' Do size checks ''' for E0 in E: assert len(HMF.iterations_all_Ft[E0]) == iterations assert len(HMF.iterations_all_lambdat[E0]) == iterations for n in range(0,N): assert len(HMF.iterations_all_lambdan[n]) == iterations assert len(HMF.iterations_all_Sn[n]) == iterations assert len(HMF.iterations_all_taun[n]) == iterations for m in range(0,M): assert len(HMF.iterations_all_lambdam[m]) == iterations assert len(HMF.iterations_all_Sm[m]) == iterations assert len(HMF.iterations_all_taum[m]) == iterations for l in range(0,L): assert len(HMF.iterations_all_Gl[l]) == iterations assert len(HMF.iterations_all_taul[l]) == iterations ''' Check whether values change each iteration ''' for iteration in range(1,iterations): for E0 in E: for k in range(0,K[E0]): assert HMF.iterations_all_lambdat[E0][iteration][k] != HMF.iterations_all_lambdat[E0][iteration-1][k] for i,k in itertools.product(xrange(0,I[E0]),xrange(0,K[E0])): assert HMF.iterations_all_Ft[E0][iteration][i,k] != HMF.iterations_all_Ft[E0][iteration-1][i,k] for n in range(0,N): E0,E1 = E_per_Rn[n] for k,l in itertools.product(xrange(0,K[E0]),xrange(0,K[E1])): assert HMF.iterations_all_lambdan[n][iteration][k,l] != HMF.iterations_all_lambdan[n][iteration-1][k,l] assert HMF.iterations_all_Sn[n][iteration][k,l] != HMF.iterations_all_Sn[n][iteration-1][k,l] assert HMF.iterations_all_taun[n][iteration] != HMF.iterations_all_taun[n][iteration-1] for m in range(0,M): E0 = E_per_Cm[m] for k,l in itertools.product(xrange(0,K[E0]),xrange(0,K[E0])): assert HMF.iterations_all_lambdam[m][iteration][k,l] != HMF.iterations_all_lambdam[m][iteration-1][k,l] assert HMF.iterations_all_Sm[m][iteration][k,l] != HMF.iterations_all_Sm[m][iteration-1][k,l] assert HMF.iterations_all_taum[m][iteration] != HMF.iterations_all_taum[m][iteration-1] for l in range(0,l): E0 = E_per_Dl[l] for j,k in itertools.product(xrange(0,J[l]),xrange(0,K[E0])): assert HMF.iterations_all_Gl[l][iteration][j,k] != HMF.iterations_all_Dl[l][iteration-1][j,k] assert HMF.iterations_all_taul[l][iteration] != HMF.iterations_all_taul[l][iteration-1]
def test_initialise(): E0, E1, E2 = 'entity0','entity1',1337 I0, I1, I2 = 10,9,8 K0, K1, K2 = 3,2,1 J0 = 4 N, M, L, T = 3, 2, 1, 3 R0 = numpy.ones((I0,I1)) # relates E0, E1 R1 = numpy.ones((I0,I1)) # relates E0, E1 R2 = numpy.ones((I1,I2)) # relates E1, E2 C0 = numpy.ones((I0,I0)) # relates E0 C1 = numpy.ones((I2,I2)) # relates E2 D0 = numpy.ones((I2,J0)) # relates E2 Mn0 = numpy.ones((I0,I1)) Mn1 = numpy.ones((I0,I1)) Mn2 = numpy.ones((I1,I2)) Mm0 = numpy.ones((I0,I0)) Mm1 = numpy.ones((I2,I2)) Ml0 = numpy.ones((I2,J0)) #size_Omegan = [I0*I1,I0*I1,I1*I2] #size_Omegam = [I0*(I0-1),I2*(I2-1)] #size_Omegal = [I2*J0] alphan = [11.,12.,13.] alpham = [14.,15.] alphal = [16.] R = [(R0,Mn0,E0,E1,alphan[0]),(R1,Mn1,E0,E1,alphan[1]),(R2,Mn2,E1,E2,alphan[2])] C = [(C0,Mm0,E0,alpham[0]),(C1,Mm1,E2,alpham[1])] D = [(D0,Ml0,E2,alphal[0])] E = [E0,E1,E2] K = {E0:K0,E1:K1,E2:K2} I = {E0:I0,E1:I1,E2:I2} J = [J0] #U1t = {'entity0':[0,1], 'entity1':[2], 1337:[] } #U2t = {'entity0':[], 'entity1':[0,1], 1337:[2] } #Vt = {'entity0':[0], 'entity1':[], 1337:[1] } #Wt = {'entity0':[], 'entity1':[], 1337:[0]} E_per_Rn = [(E0,E1),(E0,E1),(E1,E2)] E_per_Cm = [E0,E2] E_per_Dl = [E2] alphatau, betatau = 1., 2. alpha0, beta0 = 6., 7. alphaS, betaS = 9., 10. lambdaF, lambdaG = 3., 8. lambdaSn, lambdaSm = 4., 5. priors = { 'alpha0':alpha0, 'beta0':beta0, 'alphaS':alphaS, 'betaS':betaS, 'alphatau':alphatau, 'betatau':betatau, 'lambdaF':lambdaF, 'lambdaG':lambdaG, 'lambdaSn':lambdaSn, 'lambdaSm':lambdaSm } """ We need to test the following cases: - F ~ Exp or ~ N - G ~ Exp or ~ N - S ~ Exp or ~ N - ARD or no ARD - F init random, exp, kmeans - G init random, exp, least - S init random, exp, least - lambdat init random, exp - tau init random, exp """ ''' F Exp, G Exp, S Exp, ARD, no element-wise sparsity. F exp, G exp, S exp, lambdat exp, tau exp. ''' settings = { 'priorF' : 'exponential', 'priorG' : 'exponential', 'priorSn' : 'exponential', 'priorSm' : 'exponential', 'orderF' : 'rows', 'orderG' : 'columns', 'orderSn' : 'individual', 'orderSm' : 'individual', 'ARD' : True, 'element_sparsity': True } init = { 'F' : 'exp', 'G' : 'exp', 'Sn' : 'exp', 'Sm' : 'exp', 'lambdat' : 'exp', 'lambdaS': 'exp', 'tau' : 'exp'} HMF = HMF_Gibbs(R,C,D,K,settings,priors) HMF.initialise(init) for E1 in E: for k in range(0,K[E1]): assert HMF.all_lambdat[E1][k] == alpha0 / float(beta0) for i,k in itertools.product(xrange(0,I[E1]),xrange(0,K[E1])): assert HMF.all_Ft[E1][i,k] == 1./HMF.all_lambdat[E1][k] expected_all_taun = [0.015369654419961557,0.015367151516936775,0.2442062783472021] for n in range(0,N): E1,E2 = E_per_Rn[n] for k,l in itertools.product(xrange(0,K[E1]),xrange(0,K[E2])): expected_lambdan_kl = alphaS / float(betaS) assert HMF.all_lambdan[n][k,l] == expected_lambdan_kl assert HMF.all_Sn[n][k,l] == 1./expected_lambdan_kl assert abs(HMF.all_taun[n] - expected_all_taun[n]) < 0.0000000001 expected_all_taum = [0.0062975762814580696,3.7505835292008993] for m in range(0,M): E1 = E_per_Cm[m] for k,l in itertools.product(xrange(0,K[E1]),xrange(0,K[E1])): expected_lambdam_kl = alphaS / float(betaS) assert HMF.all_lambdam[m][k,l] == expected_lambdam_kl assert HMF.all_Sm[m][k,l] == 1./expected_lambdam_kl assert abs(HMF.all_taum[m] - expected_all_taum[m]) < 0.000000001 expected_all_taul = [7.2634333565945441] for l in range(0,L): E1 = E_per_Dl[l] for j,k in itertools.product(xrange(0,J[l]),xrange(0,K[E1])): assert HMF.all_Gl[l][j,k] == 1./HMF.all_lambdat[E1][k] assert abs(HMF.all_taul[l] - expected_all_taul[l]) < 0.00000001 ''' F Exp, G Exp, S N, no ARD, element-wise sparsity. F random, G exp, Sn exp, Sm random, tau random. ''' settings = { 'priorF' : 'exponential', 'priorG' : 'exponential', 'priorSn' : 'normal', 'priorSm' : 'normal', 'orderF' : 'columns', 'orderG' : 'rows', 'orderSn' : 'rows', 'orderSm' : 'rows', 'ARD' : False, 'element_sparsity' : False } init = { 'F' : 'random', 'G' : 'exp', 'Sn' : 'exp', 'Sm' : 'random', 'lambdaS': 'exp', 'tau' : 'random' } HMF = HMF_Gibbs(R,C,D,K,settings,priors) HMF.initialise(init) for E1 in E: for i,k in itertools.product(xrange(0,I[E1]),xrange(0,K[E1])): assert HMF.all_Ft[E1][i,k] != 1./lambdaF for n in range(0,N): E1,E2 = E_per_Rn[n] for k,l in itertools.product(xrange(0,K[E1]),xrange(0,K[E2])): assert HMF.all_Sn[n][k,l] == 0.01 assert HMF.all_taun[n] >= 0. for m in range(0,M): E1 = E_per_Cm[m] for k,l in itertools.product(xrange(0,K[E1]),xrange(0,K[E1])): assert HMF.all_Sm[m][k,l] != 0. assert HMF.all_taum[m] >= 0. for l in range(0,L): E1 = E_per_Dl[l] for j,k in itertools.product(xrange(0,J[l]),xrange(0,K[E1])): assert HMF.all_Gl[l][j,k] == 1./lambdaG assert HMF.all_taul[l] >= 0. ''' F N, G N, Sn Exp, Sm N, ARD, no element-wise sparsity. F kmeans, G exp, S random, lambdat random, tau random. ''' settings = { 'priorF' : 'normal', 'priorG' : 'normal', 'priorSn' : 'exponential', 'priorSm' : 'normal', 'ARD' : True, 'orderF' : 'columns', 'orderG' : 'rows', 'orderSn' : 'rows', 'orderSm' : 'rows' } init = { 'F' : 'kmeans', 'G' : 'exp', 'Sn' : 'random', 'Sm' : 'random', 'lambdat' : 'random', 'tau' : 'random' } HMF = HMF_Gibbs(R,C,D,K,settings,priors) HMF.initialise(init) for E1 in E: for k in range(0,K[E1]): assert HMF.all_lambdat[E1][k] >= 0. for i,k in itertools.product(xrange(0,I[E1]),xrange(0,K[E1])): assert HMF.all_Ft[E1][i,k] == 0.2 or HMF.all_Ft[E1][i,k] == 1.2 for n in range(0,N): E1,E2 = E_per_Rn[n] for k,l in itertools.product(xrange(0,K[E1]),xrange(0,K[E2])): assert HMF.all_Sn[n][k,l] >= 0. assert HMF.all_taun[n] >= 0. expected_all_taum = [0.47612886531245974,1.7230629295737439] for m in range(0,M): E1 = E_per_Cm[m] for k,l in itertools.product(xrange(0,K[E1]),xrange(0,K[E1])): assert HMF.all_Sm[m][k,l] != 0. assert HMF.all_taum[m] >= 0. expected_all_taul = [4.1601208459214458] for l in range(0,L): E1 = E_per_Dl[l] for j,k in itertools.product(xrange(0,J[l]),xrange(0,K[E1])): assert HMF.all_Gl[l][j,k] == 0.01 assert HMF.all_taul[l] >= 0. ''' F Exp, G N, S N, no ARD, no element-wise sparsity. F kmeans, G least, S least, lambdat random, tau random. ''' settings = { 'priorF' : 'exponential', 'priorG' : 'normal', 'priorSn' : 'normal', 'priorSm' : 'normal', 'orderF' : 'columns', 'orderG' : 'rows', 'orderSn' : 'rows', 'orderSm' : 'rows', 'ARD' : False, 'element_sparsity' : False } init = { 'F': 'kmeans', 'G': 'least', 'Sn': 'least', 'Sm': 'least', 'lambdat': 'random', 'lambdaS': 'exp', 'tau': 'random' } HMF = HMF_Gibbs(R,C,D,K,settings,priors) HMF.initialise(init) for E1 in E: for i,k in itertools.product(xrange(0,I[E1]),xrange(0,K[E1])): assert HMF.all_Ft[E1][i,k] == 0.2 or HMF.all_Ft[E1][i,k] == 1.2 for n in range(0,N): E1,E2 = E_per_Rn[n] for k,l in itertools.product(xrange(0,K[E1]),xrange(0,K[E2])): assert HMF.all_Sn[n][k,l] != 0. assert HMF.all_taun[n] >= 0. expected_all_taum = [0.47612886531245974,1.7230629295737439] for m in range(0,M): E1 = E_per_Cm[m] for k,l in itertools.product(xrange(0,K[E1]),xrange(0,K[E1])): assert HMF.all_Sm[m][k,l] != 0. assert HMF.all_taum[m] >= 0. expected_all_taul = [4.1601208459214458] for l in range(0,L): E1 = E_per_Dl[l] for j,k in itertools.product(xrange(0,J[l]),xrange(0,K[E1])): assert HMF.all_Gl[l][j,k] != 1./lambdaG assert HMF.all_taul[l] >= 0.