def test_search(): # Check whether we get no exceptions... I,J = 10,9 values_K = [1,2,4,5] values_L = [5,4,3] R = 2*numpy.ones((I,J)) R[0,0] = 1 M = numpy.ones((I,J)) priors = { 'alpha':3, 'beta':4, 'lambdaF':5, 'lambdaS':6, 'lambdaG':7 } initFG = 'exp' initS = 'random' iterations = 1 gridsearch = GridSearch(classifier,values_K,values_L,R,M,priors,initS,initFG,iterations) gridsearch.search()
def test_init(): I,J = 10,9 values_K = [1,2,4,5] values_L = [5,4,3] R = 2*numpy.ones((I,J)) M = numpy.ones((I,J)) priors = { 'alpha':3, 'beta':4, 'lambdaF':5, 'lambdaS':6, 'lambdaG':7 } initFG = 'exp' initS = 'random' iterations = 11 gridsearch = GridSearch(classifier,values_K,values_L,R,M,priors,initS,initFG,iterations) assert gridsearch.I == I assert gridsearch.J == J assert numpy.array_equal(gridsearch.values_K, values_K) assert numpy.array_equal(gridsearch.values_L, values_L) assert numpy.array_equal(gridsearch.R, R) assert numpy.array_equal(gridsearch.M, M) assert gridsearch.priors == priors assert gridsearch.iterations == iterations assert gridsearch.initS == initS assert gridsearch.initFG == initFG assert gridsearch.all_performances['BIC'].shape == (4,3) assert gridsearch.all_performances['AIC'].shape == (4,3) assert gridsearch.all_performances['loglikelihood'].shape == (4,3)
def test_best_value(): I,J = 10,9 values_K = [1,2,4,5] values_L = [5,4,3] R = 2*numpy.ones((I,J)) M = numpy.ones((I,J)) priors = { 'alpha':3, 'beta':4, 'lambdaF':5, 'lambdaS':6, 'lambdaG':7 } initFG = 'exp' initS = 'random' iterations = 11 gridsearch = GridSearch(classifier,values_K,values_L,R,M,priors,initS,initFG,iterations) gridsearch.all_performances = { 'BIC' : [[10,9,8],[11,12,13],[17,16,15],[13,13,13]], 'AIC' : [[8,8,8],[7,7,7],[10,11,15],[6,5,6]], 'loglikelihood' : [[10,12,13],[17,18,29],[5,4,3],[3,2,1]] } assert gridsearch.best_value('BIC') == (1,3) assert gridsearch.best_value('AIC') == (5,4) assert gridsearch.best_value('loglikelihood') == (5,3) with pytest.raises(AssertionError) as error: gridsearch.all_values('FAIL') assert str(error.value) == "Unrecognised metric name: FAIL."
initS = 'random' # Generate data (_, _, _, _, _, R) = generate_dataset(I, J, true_K, true_L, lambdaF, lambdaS, lambdaG, tau) M = try_generate_M(I, J, fraction_unknown, attempts_M) # Run the line search. The priors lambdaF,S,G need to be a single value (recall K,L is unknown) priors = { 'alpha': alpha, 'beta': beta, 'lambdaF': lambdaF[0, 0], 'lambdaS': lambdaS[0, 0], 'lambdaG': lambdaG[0, 0] } grid_search = GridSearch(classifier, values_K, values_L, R, M, priors, initS, initFG, iterations, restarts) grid_search.search(burn_in, thinning) # Plot the performances of all three metrics for metric in ['loglikelihood', 'BIC', 'AIC', 'MSE']: # Make three lists of indices X,Y,Z (K,L,metric) values = numpy.array(grid_search.all_values(metric)).flatten() list_values_K = numpy.array([values_K for l in range(0, len(values_L)) ]).T.flatten() list_values_L = numpy.array([values_L for k in range(0, len(values_K))]).flatten() # Set up a regular grid of interpolation points Ki, Li = (numpy.linspace(min(list_values_K), max(list_values_K), 100), numpy.linspace(min(list_values_L), max(list_values_L), 100)) Ki, Li = numpy.meshgrid(Ki, Li)
# Generate data (_, _, _, _, _, R) = generate_dataset(I, J, true_K, true_L, lambdaF, lambdaS, lambdaG, tau) M = numpy.ones((I, J)) #M = try_generate_M(I,J,fraction_unknown,attempts_M) # Run the line search. The priors lambdaF,S,G need to be a single value (recall K,L is unknown) priors = { 'alpha': alpha, 'beta': beta, 'lambdaF': lambdaF[0, 0] / 10, 'lambdaS': lambdaS[0, 0] / 10, 'lambdaG': lambdaG[0, 0] / 10 } grid_search = GridSearch(classifier, values_K, values_L, R, M, priors, initS, initFG, iterations, restarts) grid_search.search() # Plot the performances of all three metrics metrics = ['loglikelihood', 'BIC', 'AIC', 'MSE'] for metric in metrics: # Make three lists of indices X,Y,Z (K,L,metric) values = numpy.array(grid_search.all_values(metric)).flatten() list_values_K = numpy.array([values_K for l in range(0, len(values_L)) ]).T.flatten() list_values_L = numpy.array([values_L for k in range(0, len(values_K))]).flatten() # Set up a regular grid of interpolation points Ki, Li = (numpy.linspace(min(list_values_K), max(list_values_K), 100), numpy.linspace(min(list_values_L), max(list_values_L), 100))
lambdaF = numpy.ones((I,true_K)) lambdaS = numpy.ones((true_K,true_L)) lambdaG = numpy.ones((J,true_L)) classifier = bnmtf_vb_optimised initFG = 'kmeans' initS = 'random' # Generate data (_,_,_,_,_,R) = generate_dataset(I,J,true_K,true_L,lambdaF,lambdaS,lambdaG,tau) M = numpy.ones((I,J)) #M = try_generate_M(I,J,fraction_unknown,attempts_M) # Run the line search. The priors lambdaF,S,G need to be a single value (recall K,L is unknown) priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF[0,0]/10, 'lambdaS':lambdaS[0,0]/10, 'lambdaG':lambdaG[0,0]/10 } grid_search = GridSearch(classifier,values_K,values_L,R,M,priors,initS,initFG,iterations,restarts) grid_search.search() # Plot the performances of all three metrics metrics = ['loglikelihood', 'BIC', 'AIC','MSE'] for metric in metrics: # Make three lists of indices X,Y,Z (K,L,metric) values = numpy.array(grid_search.all_values(metric)).flatten() list_values_K = numpy.array([values_K for l in range(0,len(values_L))]).T.flatten() list_values_L = numpy.array([values_L for k in range(0,len(values_K))]).flatten() # Set up a regular grid of interpolation points Ki, Li = (numpy.linspace(min(list_values_K), max(list_values_K), 100), numpy.linspace(min(list_values_L), max(list_values_L), 100)) Ki, Li = numpy.meshgrid(Ki, Li)
tau = alpha / beta lambdaF = numpy.ones((I,true_K)) lambdaS = numpy.ones((true_K,true_L)) lambdaG = numpy.ones((J,true_L)) classifier = bnmtf_gibbs_optimised initFG = 'kmeans' initS = 'random' # Generate data (_,_,_,_,_,R) = generate_dataset(I,J,true_K,true_L,lambdaF,lambdaS,lambdaG,tau) M = try_generate_M(I,J,fraction_unknown,attempts_M) # Run the line search. The priors lambdaF,S,G need to be a single value (recall K,L is unknown) priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF[0,0], 'lambdaS':lambdaS[0,0], 'lambdaG':lambdaG[0,0] } grid_search = GridSearch(classifier,values_K,values_L,R,M,priors,initS,initFG,iterations,restarts) grid_search.search(burn_in,thinning) # Plot the performances of all three metrics for metric in ['loglikelihood', 'BIC', 'AIC','MSE']: # Make three lists of indices X,Y,Z (K,L,metric) values = numpy.array(grid_search.all_values(metric)).flatten() list_values_K = numpy.array([values_K for l in range(0,len(values_L))]).T.flatten() list_values_L = numpy.array([values_L for k in range(0,len(values_K))]).flatten() # Set up a regular grid of interpolation points Ki, Li = (numpy.linspace(min(list_values_K), max(list_values_K), 100), numpy.linspace(min(list_values_L), max(list_values_L), 100)) Ki, Li = numpy.meshgrid(Ki, Li) # Interpolate