init_FG = 'kmeans' minimum_TN = 0.1 metrics = ['MSE', 'R^2', 'Rp'] # Load in data R = numpy.loadtxt(input_folder + "R.txt") # Seed all of the methods the same numpy.random.seed(3) # Generate matrices M - one list of M's for each fraction M_attempts = 100 all_Ms = [[ try_generate_M(I, J, fraction, M_attempts) for r in range(0, repeats) ] for fraction in fractions_unknown] all_Ms_test = [[calc_inverse_M(M) for M in Ms] for Ms in all_Ms] # Make sure each M has no empty rows or columns def check_empty_rows_columns(M, fraction): sums_columns = M.sum(axis=0) sums_rows = M.sum(axis=1) for i, c in enumerate(sums_rows): assert c != 0, "Fully unobserved row in M, row %s. Fraction %s." % ( i, fraction) for j, c in enumerate(sums_columns): assert c != 0, "Fully unobserved column in M, column %s. Fraction %s." % ( j, fraction)
fraction_unknown = 0.1 attempts_M = 100 alpha, beta = 100., 1. #1., 1. tau = alpha / beta lambdaF = numpy.ones((I,true_K)) lambdaS = numpy.ones((true_K,true_L)) lambdaG = numpy.ones((J,true_L)) classifier = bnmtf_gibbs_optimised initFG = 'kmeans' initS = 'random' # Generate data (_,_,_,_,_,R) = generate_dataset(I,J,true_K,true_L,lambdaF,lambdaS,lambdaG,tau) M = try_generate_M(I,J,fraction_unknown,attempts_M) # Run the line search. The priors lambdaF,S,G need to be a single value (recall K,L is unknown) priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF[0,0], 'lambdaS':lambdaS[0,0], 'lambdaG':lambdaG[0,0] } grid_search = GridSearch(classifier,values_K,values_L,R,M,priors,initS,initFG,iterations,restarts) grid_search.search(burn_in,thinning) # Plot the performances of all three metrics for metric in ['loglikelihood', 'BIC', 'AIC','MSE']: # Make three lists of indices X,Y,Z (K,L,metric) values = numpy.array(grid_search.all_values(metric)).flatten() list_values_K = numpy.array([values_K for l in range(0,len(values_L))]).T.flatten() list_values_L = numpy.array([values_L for k in range(0,len(values_K))]).flatten() # Set up a regular grid of interpolation points Ki, Li = (numpy.linspace(min(list_values_K), max(list_values_K), 100),
} init_S = 'random' init_FG = 'kmeans' minimum_TN = 0.1 metrics = ['MSE', 'R^2', 'Rp'] # Load in data R_true = numpy.loadtxt(input_folder + "R_true.txt") # For each noise ratio, generate mask matrices for each attempt M_attempts = 100 all_Ms = [[ try_generate_M(I, J, fraction_unknown, M_attempts) for r in range(0, repeats) ] for noise in noise_ratios] all_Ms_test = [[calc_inverse_M(M) for M in Ms] for Ms in all_Ms] # Make sure each M has no empty rows or columns def check_empty_rows_columns(M, fraction): sums_columns = M.sum(axis=0) sums_rows = M.sum(axis=1) for i, c in enumerate(sums_rows): assert c != 0, "Fully unobserved row in M, row %s. Fraction %s." % ( i, fraction) for j, c in enumerate(sums_columns): assert c != 0, "Fully unobserved column in M, column %s. Fraction %s." % ( j, fraction)