Example #1
0
def test_search():
    # Check whether we get no exceptions...
    I,J = 10,9
    values_K = [1,2,4,5]
    values_L = [5,4,3]
    R = 2*numpy.ones((I,J))
    R[0,0] = 1
    M = numpy.ones((I,J))
    priors = { 'alpha':3, 'beta':4, 'lambdaF':5, 'lambdaS':6, 'lambdaG':7 }
    initFG = 'exp'
    initS = 'random'
    iterations = 1
    
    gridsearch = GridSearch(classifier,values_K,values_L,R,M,priors,initS,initFG,iterations)
    gridsearch.search()
Example #2
0
def test_init():
    I,J = 10,9
    values_K = [1,2,4,5]
    values_L = [5,4,3]
    R = 2*numpy.ones((I,J))
    M = numpy.ones((I,J))
    priors = { 'alpha':3, 'beta':4, 'lambdaF':5, 'lambdaS':6, 'lambdaG':7 }
    initFG = 'exp'
    initS = 'random'
    iterations = 11
    
    gridsearch = GridSearch(classifier,values_K,values_L,R,M,priors,initS,initFG,iterations)
    assert gridsearch.I == I
    assert gridsearch.J == J
    assert numpy.array_equal(gridsearch.values_K, values_K)
    assert numpy.array_equal(gridsearch.values_L, values_L)
    assert numpy.array_equal(gridsearch.R, R)
    assert numpy.array_equal(gridsearch.M, M)
    assert gridsearch.priors == priors
    assert gridsearch.iterations == iterations
    assert gridsearch.initS == initS
    assert gridsearch.initFG == initFG
    assert gridsearch.all_performances['BIC'].shape == (4,3)
    assert gridsearch.all_performances['AIC'].shape == (4,3)
    assert gridsearch.all_performances['loglikelihood'].shape == (4,3)
Example #3
0
def test_best_value():
    I,J = 10,9
    values_K = [1,2,4,5]
    values_L = [5,4,3]
    R = 2*numpy.ones((I,J))
    M = numpy.ones((I,J))
    priors = { 'alpha':3, 'beta':4, 'lambdaF':5, 'lambdaS':6, 'lambdaG':7 }
    initFG = 'exp'
    initS = 'random'
    iterations = 11
    
    gridsearch = GridSearch(classifier,values_K,values_L,R,M,priors,initS,initFG,iterations)
    gridsearch.all_performances = {
        'BIC' : [[10,9,8],[11,12,13],[17,16,15],[13,13,13]],
        'AIC' : [[8,8,8],[7,7,7],[10,11,15],[6,5,6]],
        'loglikelihood' : [[10,12,13],[17,18,29],[5,4,3],[3,2,1]]
    }
    assert gridsearch.best_value('BIC') == (1,3)
    assert gridsearch.best_value('AIC') == (5,4)
    assert gridsearch.best_value('loglikelihood') == (5,3)
    with pytest.raises(AssertionError) as error:
        gridsearch.all_values('FAIL')
    assert str(error.value) == "Unrecognised metric name: FAIL."
initS = 'random'

# Generate data
(_, _, _, _, _, R) = generate_dataset(I, J, true_K, true_L, lambdaF, lambdaS,
                                      lambdaG, tau)
M = try_generate_M(I, J, fraction_unknown, attempts_M)

# Run the line search. The priors lambdaF,S,G need to be a single value (recall K,L is unknown)
priors = {
    'alpha': alpha,
    'beta': beta,
    'lambdaF': lambdaF[0, 0],
    'lambdaS': lambdaS[0, 0],
    'lambdaG': lambdaG[0, 0]
}
grid_search = GridSearch(classifier, values_K, values_L, R, M, priors, initS,
                         initFG, iterations, restarts)
grid_search.search(burn_in, thinning)

# Plot the performances of all three metrics
for metric in ['loglikelihood', 'BIC', 'AIC', 'MSE']:
    # Make three lists of indices X,Y,Z (K,L,metric)
    values = numpy.array(grid_search.all_values(metric)).flatten()
    list_values_K = numpy.array([values_K for l in range(0, len(values_L))
                                 ]).T.flatten()
    list_values_L = numpy.array([values_L
                                 for k in range(0, len(values_K))]).flatten()

    # Set up a regular grid of interpolation points
    Ki, Li = (numpy.linspace(min(list_values_K), max(list_values_K), 100),
              numpy.linspace(min(list_values_L), max(list_values_L), 100))
    Ki, Li = numpy.meshgrid(Ki, Li)
Example #5
0
# Generate data
(_, _, _, _, _, R) = generate_dataset(I, J, true_K, true_L, lambdaF, lambdaS,
                                      lambdaG, tau)
M = numpy.ones((I, J))
#M = try_generate_M(I,J,fraction_unknown,attempts_M)

# Run the line search. The priors lambdaF,S,G need to be a single value (recall K,L is unknown)
priors = {
    'alpha': alpha,
    'beta': beta,
    'lambdaF': lambdaF[0, 0] / 10,
    'lambdaS': lambdaS[0, 0] / 10,
    'lambdaG': lambdaG[0, 0] / 10
}
grid_search = GridSearch(classifier, values_K, values_L, R, M, priors, initS,
                         initFG, iterations, restarts)
grid_search.search()

# Plot the performances of all three metrics
metrics = ['loglikelihood', 'BIC', 'AIC', 'MSE']
for metric in metrics:
    # Make three lists of indices X,Y,Z (K,L,metric)
    values = numpy.array(grid_search.all_values(metric)).flatten()
    list_values_K = numpy.array([values_K for l in range(0, len(values_L))
                                 ]).T.flatten()
    list_values_L = numpy.array([values_L
                                 for k in range(0, len(values_K))]).flatten()

    # Set up a regular grid of interpolation points
    Ki, Li = (numpy.linspace(min(list_values_K), max(list_values_K), 100),
              numpy.linspace(min(list_values_L), max(list_values_L), 100))
lambdaF = numpy.ones((I,true_K))
lambdaS = numpy.ones((true_K,true_L))
lambdaG = numpy.ones((J,true_L))

classifier = bnmtf_vb_optimised
initFG = 'kmeans'
initS = 'random'

# Generate data
(_,_,_,_,_,R) = generate_dataset(I,J,true_K,true_L,lambdaF,lambdaS,lambdaG,tau)
M = numpy.ones((I,J))
#M = try_generate_M(I,J,fraction_unknown,attempts_M)

# Run the line search. The priors lambdaF,S,G need to be a single value (recall K,L is unknown)
priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF[0,0]/10, 'lambdaS':lambdaS[0,0]/10, 'lambdaG':lambdaG[0,0]/10 }
grid_search = GridSearch(classifier,values_K,values_L,R,M,priors,initS,initFG,iterations,restarts)
grid_search.search()

# Plot the performances of all three metrics
metrics = ['loglikelihood', 'BIC', 'AIC','MSE']
for metric in metrics:
    # Make three lists of indices X,Y,Z (K,L,metric)
    values = numpy.array(grid_search.all_values(metric)).flatten()
    list_values_K = numpy.array([values_K for l in range(0,len(values_L))]).T.flatten()
    list_values_L = numpy.array([values_L for k in range(0,len(values_K))]).flatten()
    
    # Set up a regular grid of interpolation points
    Ki, Li = (numpy.linspace(min(list_values_K), max(list_values_K), 100), 
              numpy.linspace(min(list_values_L), max(list_values_L), 100))
    Ki, Li = numpy.meshgrid(Ki, Li)
    
tau = alpha / beta
lambdaF = numpy.ones((I,true_K))
lambdaS = numpy.ones((true_K,true_L))
lambdaG = numpy.ones((J,true_L))

classifier = bnmtf_gibbs_optimised
initFG = 'kmeans'
initS = 'random'

# Generate data
(_,_,_,_,_,R) = generate_dataset(I,J,true_K,true_L,lambdaF,lambdaS,lambdaG,tau)
M = try_generate_M(I,J,fraction_unknown,attempts_M)

# Run the line search. The priors lambdaF,S,G need to be a single value (recall K,L is unknown)
priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF[0,0], 'lambdaS':lambdaS[0,0], 'lambdaG':lambdaG[0,0] }
grid_search = GridSearch(classifier,values_K,values_L,R,M,priors,initS,initFG,iterations,restarts)
grid_search.search(burn_in,thinning)

# Plot the performances of all three metrics
for metric in ['loglikelihood', 'BIC', 'AIC','MSE']:
    # Make three lists of indices X,Y,Z (K,L,metric)
    values = numpy.array(grid_search.all_values(metric)).flatten()
    list_values_K = numpy.array([values_K for l in range(0,len(values_L))]).T.flatten()
    list_values_L = numpy.array([values_L for k in range(0,len(values_K))]).flatten()
    
    # Set up a regular grid of interpolation points
    Ki, Li = (numpy.linspace(min(list_values_K), max(list_values_K), 100), 
              numpy.linspace(min(list_values_L), max(list_values_L), 100))
    Ki, Li = numpy.meshgrid(Ki, Li)
    
    # Interpolate