예제 #1
0
def run_all_settings(all_K_alpha):
    fout = open('results_varying_K_hmf_no_ARD_ge_gm_to_pm_std.txt', 'w')

    all_average_performances = []
    for K in all_K:
        ''' Compute the folds '''
        n = len(X1)
        n_folds = 10
        shuffle, random_state = True, None
        folds = KFold(n=n,
                      n_folds=n_folds,
                      shuffle=shuffle,
                      random_state=random_state)
        ''' Run HMF to predict Y from X '''
        all_MSE, all_R2, all_Rp = numpy.zeros(n_folds), numpy.zeros(
            n_folds), numpy.zeros(n_folds)
        for i, (train_index, test_index) in enumerate(folds):
            print "Training fold %s for HMF-MTF." % (i + 1)
            ''' Split into train and test '''
            M_X1, M_X2, M_Y_train = numpy.ones(X1.shape), numpy.ones(
                X2.shape), numpy.ones(Y.shape)
            M_Y_train[test_index] = 0.
            M_Y_test = 1. - M_Y_train

            R = [(X1, M_X1, 'samples', 'genes', alpha[0]),
                 (X2, M_X2, 'samples', 'genes', alpha[1]),
                 (Y, M_Y_train, 'samples', 'genes', alpha[2])]
            ''' Train and predict '''
            HMF = HMF_Gibbs(R, C, D, K, settings, hyperparameters)
            HMF.initialise(init)
            HMF.run(iterations)
            ''' Compute the performances '''
            performances = HMF.predict_Rn(n=2,
                                          M_pred=M_Y_test,
                                          burn_in=burn_in,
                                          thinning=thinning)

            all_MSE[i], all_R2[i], all_Rp[i] = performances[
                'MSE'], performances['R^2'], performances['Rp']
            print "MSE: %s. R^2: %s. Rp: %s." % (
                performances['MSE'], performances['R^2'], performances['Rp'])

        print "Average MSE: %s +- %s. \nAverage R^2: %s +- %s. \nAverage Rp:  %s +- %s." % \
            (all_MSE.mean(),all_MSE.std(),all_R2.mean(),all_R2.std(),all_Rp.mean(),all_Rp.std())

        fout.write('Tried MF on PM -> GE, with K = %s, alphan = %s.\n' %
                   (K, alpha))
        fout.write('Average MSE: %s +- %s. \nAverage R^2: %s +- %s. \nAverage Rp:  %s +- %s.\n' % \
            (all_MSE.mean(),all_MSE.std(),all_R2.mean(),all_R2.std(),all_Rp.mean(),all_Rp.std()))
        fout.write('All MSE: %s. \nAll R^2: %s. \nAll Rp: %s.\n\n' %
                   (list(all_MSE), list(all_R2), list(all_Rp)))
        fout.flush()

        all_average_performances.append(all_MSE.mean())
    ''' Print for plotting. '''
    print "all_K = %s \nall_average_performances = %s" % (
        all_K, all_average_performances)
예제 #2
0
    print "Trying fraction %s." % fraction

    # Run the algorithm <repeats> times and store all the performances
    for metric in metrics:
        all_performances[metric].append([])
    for repeat, (M_train, M_test) in zip(range(0, repeats), Ms_train_test):
        print "Repeat %s of fraction %s." % (repeat + 1, fraction)

        D = [(R_ctrp, M_train, 'Cell_lines', alpha_l[0]),
             (R_gdsc, M_gdsc, 'Cell_lines', alpha_l[1]),
             (R_ccle_ic, M_ccle_ic, 'Cell_lines', alpha_l[2]),
             (R_ccle_ec, M_ccle_ec, 'Cell_lines', alpha_l[3])]
        R, C = [], []

        HMF = HMF_Gibbs(R, C, D, K, settings, hyperparameters)
        HMF.initialise(init)
        HMF.run(iterations)

        # Measure the performances
        performances = HMF.predict_Dl(l=0,
                                      M_pred=M_test,
                                      burn_in=burn_in,
                                      thinning=thinning)
        for metric in metrics:
            # Add this metric's performance to the list of <repeat> performances for this fraction
            all_performances[metric][-1].append(performances[metric])

    # Compute the average across attempts
    for metric in metrics:
        average_performances[metric].append(
            sum(all_performances[metric][-1]) / repeats)
예제 #3
0
def test_run():
    ''' Settings '''
    E0, E1, E2 = 'entity0','entity1',1337
    I0, I1, I2 = 10,9,8
    K0, K1, K2 = 3,2,1
    J0 = 4
    N, M, L, T = 3, 2, 1, 3
    
    R0 = numpy.ones((I0,I1)) # relates E0, E1
    R1 = numpy.ones((I0,I1)) # relates E0, E1
    R2 = numpy.ones((I1,I2)) # relates E1, E2
    C0 = numpy.ones((I0,I0)) # relates E0
    C1 = numpy.ones((I2,I2)) # relates E2
    D0 = numpy.ones((I2,J0)) # relates E2
    
    Mn0 = numpy.ones((I0,I1))
    Mn1 = numpy.ones((I0,I1))
    Mn2 = numpy.ones((I1,I2))
    Mm0 = numpy.ones((I0,I0))
    Mm1 = numpy.ones((I2,I2))
    Ml0 = numpy.ones((I2,J0))
    
    #size_Omegan = [I0*I1,I0*I1,I1*I2]
    #size_Omegam = [I0*(I0-1),I2*(I2-1)]
    #size_Omegal = [I2*J0]
    
    alphan = [11.,12.,13.]
    alpham = [14.,15.]
    alphal = [16.]
    
    R = [(R0,Mn0,E0,E1,alphan[0]),(R1,Mn1,E0,E1,alphan[1]),(R2,Mn2,E1,E2,alphan[2])]
    C = [(C0,Mm0,E0,alpham[0]),(C1,Mm1,E2,alpham[1])]
    D = [(D0,Ml0,E2,alphal[0])]
    E = [E0,E1,E2]
    K = {E0:K0,E1:K1,E2:K2}
    I = {E0:I0,E1:I1,E2:I2}
    J = [J0]
    
    #U1t = {'entity0':[0,1], 'entity1':[2], 1337:[] }
    #U2t = {'entity0':[], 'entity1':[0,1], 1337:[2] }
    #Vt = {'entity0':[0], 'entity1':[], 1337:[1] }
    #Wt = {'entity0':[], 'entity1':[], 1337:[0]}
    
    E_per_Rn = [(E0,E1),(E0,E1),(E1,E2)]
    E_per_Cm = [E0,E2]
    E_per_Dl = [E2]
    
    alphatau, betatau = 1., 2.
    alpha0, beta0 = 6., 7.
    lambdaF, lambdaG = 3., 8.
    lambdaSn, lambdaSm = 4., 5.
    priors = { 'alpha0':alpha0, 'beta0':beta0, 'alphatau':alphatau, 'betatau':betatau, 
               'lambdaF':lambdaF, 'lambdaG':lambdaG, 'lambdaSn':lambdaSn, 'lambdaSm':lambdaSm }
    settings = { 'priorF' : 'exponential', 'priorG' : 'normal', 'priorSn' : 'normal', 'priorSm' : 'normal',
                 'orderF' : 'columns', 'orderG' : 'rows', 'orderSn' : 'rows', 'orderSm' : 'rows',
                 'ARD' : True, 'element_sparsity': True }    
    init = { 'F': 'kmeans', 'G': 'least', 'Sn': 'least', 'Sm': 'least', 'lambdat': 'random', 'lambdaS': 'random', 'tau': 'random' }
    iterations = 10
    
    HMF = HMF_Gibbs(R,C,D,K,settings,priors)
    HMF.initialise(init)
    HMF.run(iterations)
    
    ''' Do size checks '''
    for E0 in E:
        assert len(HMF.iterations_all_Ft[E0]) == iterations
        assert len(HMF.iterations_all_lambdat[E0]) == iterations
    for n in range(0,N):
        assert len(HMF.iterations_all_lambdan[n]) == iterations
        assert len(HMF.iterations_all_Sn[n]) == iterations
        assert len(HMF.iterations_all_taun[n]) == iterations
    for m in range(0,M):
        assert len(HMF.iterations_all_lambdam[m]) == iterations
        assert len(HMF.iterations_all_Sm[m]) == iterations
        assert len(HMF.iterations_all_taum[m]) == iterations
    for l in range(0,L):
        assert len(HMF.iterations_all_Gl[l]) == iterations
        assert len(HMF.iterations_all_taul[l]) == iterations
    
    ''' Check whether values change each iteration '''
    for iteration in range(1,iterations):
        for E0 in E:
            for k in range(0,K[E0]):
                assert HMF.iterations_all_lambdat[E0][iteration][k] != HMF.iterations_all_lambdat[E0][iteration-1][k]
            for i,k in itertools.product(xrange(0,I[E0]),xrange(0,K[E0])):
                assert HMF.iterations_all_Ft[E0][iteration][i,k] != HMF.iterations_all_Ft[E0][iteration-1][i,k]
        for n in range(0,N):
            E0,E1 = E_per_Rn[n]
            for k,l in itertools.product(xrange(0,K[E0]),xrange(0,K[E1])):
                assert HMF.iterations_all_lambdan[n][iteration][k,l] != HMF.iterations_all_lambdan[n][iteration-1][k,l]
                assert HMF.iterations_all_Sn[n][iteration][k,l] != HMF.iterations_all_Sn[n][iteration-1][k,l]
            assert HMF.iterations_all_taun[n][iteration] != HMF.iterations_all_taun[n][iteration-1]
        for m in range(0,M):
            E0 = E_per_Cm[m]
            for k,l in itertools.product(xrange(0,K[E0]),xrange(0,K[E0])):
                assert HMF.iterations_all_lambdam[m][iteration][k,l] != HMF.iterations_all_lambdam[m][iteration-1][k,l]
                assert HMF.iterations_all_Sm[m][iteration][k,l] != HMF.iterations_all_Sm[m][iteration-1][k,l]
            assert HMF.iterations_all_taum[m][iteration] != HMF.iterations_all_taum[m][iteration-1]
        for l in range(0,l):
            E0 = E_per_Dl[l]
            for j,k in itertools.product(xrange(0,J[l]),xrange(0,K[E0])):
                assert HMF.iterations_all_Gl[l][iteration][j,k] != HMF.iterations_all_Dl[l][iteration-1][j,k]
            assert HMF.iterations_all_taul[l][iteration] != HMF.iterations_all_taul[l][iteration-1]
예제 #4
0
def test_initialise():
    E0, E1, E2 = 'entity0','entity1',1337
    I0, I1, I2 = 10,9,8
    K0, K1, K2 = 3,2,1
    J0 = 4
    N, M, L, T = 3, 2, 1, 3
    
    R0 = numpy.ones((I0,I1)) # relates E0, E1
    R1 = numpy.ones((I0,I1)) # relates E0, E1
    R2 = numpy.ones((I1,I2)) # relates E1, E2
    C0 = numpy.ones((I0,I0)) # relates E0
    C1 = numpy.ones((I2,I2)) # relates E2
    D0 = numpy.ones((I2,J0)) # relates E2
    
    Mn0 = numpy.ones((I0,I1))
    Mn1 = numpy.ones((I0,I1))
    Mn2 = numpy.ones((I1,I2))
    Mm0 = numpy.ones((I0,I0))
    Mm1 = numpy.ones((I2,I2))
    Ml0 = numpy.ones((I2,J0))
    
    #size_Omegan = [I0*I1,I0*I1,I1*I2]
    #size_Omegam = [I0*(I0-1),I2*(I2-1)]
    #size_Omegal = [I2*J0]
    
    alphan = [11.,12.,13.]
    alpham = [14.,15.]
    alphal = [16.]
    
    R = [(R0,Mn0,E0,E1,alphan[0]),(R1,Mn1,E0,E1,alphan[1]),(R2,Mn2,E1,E2,alphan[2])]
    C = [(C0,Mm0,E0,alpham[0]),(C1,Mm1,E2,alpham[1])]
    D = [(D0,Ml0,E2,alphal[0])]
    E = [E0,E1,E2]
    K = {E0:K0,E1:K1,E2:K2}
    I = {E0:I0,E1:I1,E2:I2}
    J = [J0]
    
    #U1t = {'entity0':[0,1], 'entity1':[2], 1337:[] }
    #U2t = {'entity0':[], 'entity1':[0,1], 1337:[2] }
    #Vt = {'entity0':[0], 'entity1':[], 1337:[1] }
    #Wt = {'entity0':[], 'entity1':[], 1337:[0]}
    E_per_Rn = [(E0,E1),(E0,E1),(E1,E2)]
    E_per_Cm = [E0,E2]
    E_per_Dl = [E2]
    
    alphatau, betatau = 1., 2.
    alpha0, beta0 = 6., 7.
    alphaS, betaS = 9., 10.
    lambdaF, lambdaG = 3., 8.
    lambdaSn, lambdaSm = 4., 5.
    priors = { 'alpha0':alpha0, 'beta0':beta0, 'alphaS':alphaS, 'betaS':betaS, 'alphatau':alphatau, 'betatau':betatau, 
               'lambdaF':lambdaF, 'lambdaG':lambdaG, 'lambdaSn':lambdaSn, 'lambdaSm':lambdaSm }
               
    """
    We need to test the following cases:
    - F ~ Exp or ~ N
    - G ~ Exp or ~ N
    - S ~ Exp or ~ N
    - ARD or no ARD
    - F init random, exp, kmeans
    - G init random, exp, least
    - S init random, exp, least
    - lambdat init random, exp
    - tau init random, exp
    """
    
    ''' F Exp, G Exp, S Exp, ARD, no element-wise sparsity. F exp, G exp, S exp, lambdat exp, tau exp. '''
    settings = { 'priorF' : 'exponential', 'priorG' : 'exponential', 'priorSn' : 'exponential', 'priorSm' : 'exponential',
                 'orderF' : 'rows', 'orderG' : 'columns', 'orderSn' : 'individual', 'orderSm' : 'individual',
                 'ARD' : True, 'element_sparsity': True }    
    init = { 'F' : 'exp', 'G' : 'exp', 'Sn' : 'exp', 'Sm' : 'exp', 'lambdat' : 'exp', 'lambdaS': 'exp', 'tau' : 'exp'}
    HMF = HMF_Gibbs(R,C,D,K,settings,priors)
    HMF.initialise(init)
    
    for E1 in E:
        for k in range(0,K[E1]):
            assert HMF.all_lambdat[E1][k] == alpha0 / float(beta0)
        for i,k in itertools.product(xrange(0,I[E1]),xrange(0,K[E1])):
            assert HMF.all_Ft[E1][i,k] == 1./HMF.all_lambdat[E1][k]
            
    expected_all_taun = [0.015369654419961557,0.015367151516936775,0.2442062783472021]
    for n in range(0,N):
        E1,E2 = E_per_Rn[n]
        for k,l in itertools.product(xrange(0,K[E1]),xrange(0,K[E2])):
            expected_lambdan_kl = alphaS / float(betaS)
            assert HMF.all_lambdan[n][k,l] == expected_lambdan_kl
            assert HMF.all_Sn[n][k,l] == 1./expected_lambdan_kl
        assert abs(HMF.all_taun[n] - expected_all_taun[n]) < 0.0000000001
            
    expected_all_taum = [0.0062975762814580696,3.7505835292008993]
    for m in range(0,M):
        E1 = E_per_Cm[m]
        for k,l in itertools.product(xrange(0,K[E1]),xrange(0,K[E1])):
            expected_lambdam_kl = alphaS / float(betaS)
            assert HMF.all_lambdam[m][k,l] == expected_lambdam_kl
            assert HMF.all_Sm[m][k,l] == 1./expected_lambdam_kl
        assert abs(HMF.all_taum[m] - expected_all_taum[m]) < 0.000000001
            
    expected_all_taul = [7.2634333565945441]
    for l in range(0,L):
        E1 = E_per_Dl[l]
        for j,k in itertools.product(xrange(0,J[l]),xrange(0,K[E1])):
            assert HMF.all_Gl[l][j,k] == 1./HMF.all_lambdat[E1][k]
        assert abs(HMF.all_taul[l] - expected_all_taul[l]) < 0.00000001
            
    ''' F Exp, G Exp, S N, no ARD, element-wise sparsity. F random, G exp, Sn exp, Sm random, tau random. '''
    settings = { 'priorF' : 'exponential', 'priorG' : 'exponential', 'priorSn' : 'normal', 'priorSm' : 'normal',
                 'orderF' : 'columns', 'orderG' : 'rows', 'orderSn' : 'rows', 'orderSm' : 'rows',
                 'ARD' : False, 'element_sparsity' : False }    
    init = { 'F' : 'random', 'G' : 'exp', 'Sn' : 'exp', 'Sm' : 'random', 'lambdaS': 'exp', 'tau' : 'random' }
    HMF = HMF_Gibbs(R,C,D,K,settings,priors)
    HMF.initialise(init)
    
    for E1 in E:
        for i,k in itertools.product(xrange(0,I[E1]),xrange(0,K[E1])):
            assert HMF.all_Ft[E1][i,k] != 1./lambdaF
            
    for n in range(0,N):
        E1,E2 = E_per_Rn[n]
        for k,l in itertools.product(xrange(0,K[E1]),xrange(0,K[E2])):
            assert HMF.all_Sn[n][k,l] == 0.01
        assert HMF.all_taun[n] >= 0.
            
    for m in range(0,M):
        E1 = E_per_Cm[m]
        for k,l in itertools.product(xrange(0,K[E1]),xrange(0,K[E1])):
            assert HMF.all_Sm[m][k,l] != 0.
        assert HMF.all_taum[m] >= 0.
            
    for l in range(0,L):
        E1 = E_per_Dl[l]
        for j,k in itertools.product(xrange(0,J[l]),xrange(0,K[E1])):
            assert HMF.all_Gl[l][j,k] == 1./lambdaG
        assert HMF.all_taul[l] >= 0.
            
    ''' F N, G N, Sn Exp, Sm N, ARD, no element-wise sparsity. F kmeans, G exp, S random, lambdat random, tau random. '''
    settings = { 'priorF' : 'normal', 'priorG' : 'normal', 'priorSn' : 'exponential', 'priorSm' : 'normal',
                 'ARD' : True, 'orderF' : 'columns', 'orderG' : 'rows', 'orderSn' : 'rows', 'orderSm' : 'rows' }    
    init = { 'F' : 'kmeans', 'G' : 'exp', 'Sn' : 'random', 'Sm' : 'random', 'lambdat' : 'random', 'tau' : 'random' }
    HMF = HMF_Gibbs(R,C,D,K,settings,priors)
    HMF.initialise(init)
    
    for E1 in E:
        for k in range(0,K[E1]):
            assert HMF.all_lambdat[E1][k] >= 0.
        for i,k in itertools.product(xrange(0,I[E1]),xrange(0,K[E1])):
            assert HMF.all_Ft[E1][i,k] == 0.2 or HMF.all_Ft[E1][i,k] == 1.2
            
    for n in range(0,N):
        E1,E2 = E_per_Rn[n]
        for k,l in itertools.product(xrange(0,K[E1]),xrange(0,K[E2])):
            assert HMF.all_Sn[n][k,l] >= 0.
        assert HMF.all_taun[n] >= 0.
            
    expected_all_taum = [0.47612886531245974,1.7230629295737439]
    for m in range(0,M):
        E1 = E_per_Cm[m]
        for k,l in itertools.product(xrange(0,K[E1]),xrange(0,K[E1])):
            assert HMF.all_Sm[m][k,l] != 0.
        assert HMF.all_taum[m] >= 0.
            
    expected_all_taul = [4.1601208459214458]
    for l in range(0,L):
        E1 = E_per_Dl[l]
        for j,k in itertools.product(xrange(0,J[l]),xrange(0,K[E1])):
            assert HMF.all_Gl[l][j,k] == 0.01
        assert HMF.all_taul[l] >= 0.
            
    ''' F Exp, G N, S N, no ARD, no element-wise sparsity. F kmeans, G least, S least, lambdat random, tau random. '''
    settings = { 'priorF' : 'exponential', 'priorG' : 'normal', 'priorSn' : 'normal', 'priorSm' : 'normal', 
                 'orderF' : 'columns', 'orderG' : 'rows', 'orderSn' : 'rows', 'orderSm' : 'rows',
                 'ARD' : False, 'element_sparsity' : False }    
    init = { 'F': 'kmeans', 'G': 'least', 'Sn': 'least', 'Sm': 'least', 'lambdat': 'random', 'lambdaS': 'exp', 'tau': 'random' }
    HMF = HMF_Gibbs(R,C,D,K,settings,priors)
    HMF.initialise(init)
    
    for E1 in E:
        for i,k in itertools.product(xrange(0,I[E1]),xrange(0,K[E1])):
            assert HMF.all_Ft[E1][i,k] == 0.2 or HMF.all_Ft[E1][i,k] == 1.2
            
    for n in range(0,N):
        E1,E2 = E_per_Rn[n]
        for k,l in itertools.product(xrange(0,K[E1]),xrange(0,K[E2])):
            assert HMF.all_Sn[n][k,l] != 0.
        assert HMF.all_taun[n] >= 0.
            
    expected_all_taum = [0.47612886531245974,1.7230629295737439]
    for m in range(0,M):
        E1 = E_per_Cm[m]
        for k,l in itertools.product(xrange(0,K[E1]),xrange(0,K[E1])):
            assert HMF.all_Sm[m][k,l] != 0.
        assert HMF.all_taum[m] >= 0.
            
    expected_all_taul = [4.1601208459214458]
    for l in range(0,L):
        E1 = E_per_Dl[l]
        for j,k in itertools.product(xrange(0,J[l]),xrange(0,K[E1])):
            assert HMF.all_Gl[l][j,k] != 1./lambdaG
        assert HMF.all_taul[l] >= 0.