예제 #1
0
    'lambdaS': lambdaS,
    'lambdaG': lambdaG
}

init_S = 'random'
init_FG = 'kmeans'

metrics = ['MSE', 'R^2', 'Rp']

# Load in data
R_true = numpy.loadtxt(input_folder + "R_true.txt")

# For each noise ratio, generate mask matrices for each attempt
M_attempts = 100
all_Ms = [[
    try_generate_M(I, J, fraction_unknown, M_attempts)
    for r in range(0, repeats)
] for noise in noise_ratios]
all_Ms_test = [[calc_inverse_M(M) for M in Ms] for Ms in all_Ms]


# Make sure each M has no empty rows or columns
def check_empty_rows_columns(M, fraction):
    sums_columns = M.sum(axis=0)
    sums_rows = M.sum(axis=1)
    for i, c in enumerate(sums_rows):
        assert c != 0, "Fully unobserved row in M, row %s. Fraction %s." % (
            i, fraction)
    for j, c in enumerate(sums_columns):
        assert c != 0, "Fully unobserved column in M, column %s. Fraction %s." % (
            j, fraction)
예제 #2
0
init_FG = 'kmeans'

minimum_TN = 0.1

metrics = ['MSE', 'R^2', 'Rp']

# Load in data
R = numpy.loadtxt(input_folder+"R.txt")

# Seed all of the methods the same
numpy.random.seed(3)

# Generate matrices M - one list of M's for each fraction
M_attempts = 100
all_Ms = [ 
    [try_generate_M(I,J,fraction,M_attempts) for r in range(0,repeats)]
    for fraction in fractions_unknown
]
all_Ms_test = [ [calc_inverse_M(M) for M in Ms] for Ms in all_Ms ]

# Make sure each M has no empty rows or columns
def check_empty_rows_columns(M,fraction):
    sums_columns = M.sum(axis=0)
    sums_rows = M.sum(axis=1)
    for i,c in enumerate(sums_rows):
        assert c != 0, "Fully unobserved row in M, row %s. Fraction %s." % (i,fraction)
    for j,c in enumerate(sums_columns):
        assert c != 0, "Fully unobserved column in M, column %s. Fraction %s." % (j,fraction)
        
for Ms,fraction in zip(all_Ms,fractions_unknown):
    for M in Ms:
alpha, beta = 100., 1. #1., 1.
tau = alpha / beta
lambdaF = numpy.ones((I,true_K))
lambdaS = numpy.ones((true_K,true_L))
lambdaG = numpy.ones((J,true_L))

classifier = bnmtf_gibbs_optimised
initFG = 'kmeans'
initS = 'random'

search_metric = 'AIC'

# Generate data
(_,_,_,_,_,R) = generate_dataset(I,J,true_K,true_L,lambdaF,lambdaS,lambdaG,tau)
M = try_generate_M(I,J,fraction_unknown,attempts_M)

# Run the line search. The priors lambdaU and lambdaV need to be a single value (recall K is unknown)
priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF[0,0], 'lambdaS':lambdaS[0,0], 'lambdaG':lambdaG[0,0] }
greedy_search = GreedySearch(classifier,values_K,values_L,R,M,priors,initS,initFG,iterations,restarts)
greedy_search.search(search_metric,burn_in,thinning)

# Plot the performances of all three metrics
for metric in ['loglikelihood', 'BIC', 'AIC', 'MSE']:
    # Make three lists of indices X,Y,Z (K,L,metric)
    KLvalues = numpy.array(greedy_search.all_values(metric))
    (list_values_K,list_values_L,values) = zip(*KLvalues)
    
    # Set up a regular grid of interpolation points
    Ki, Li = (numpy.linspace(min(list_values_K), max(list_values_K), 100), 
              numpy.linspace(min(list_values_L), max(list_values_L), 100))
예제 #4
0
init_S = 'random'
init_FG = 'kmeans'

metrics = ['MSE', 'R^2', 'Rp']

# Load in data
R = numpy.loadtxt(input_folder + "R.txt")

# Seed all of the methods the same
numpy.random.seed(3)

# Generate matrices M - one list of M's for each fraction
M_attempts = 100
all_Ms = [[
    try_generate_M(I, J, fraction, M_attempts) for r in range(0, repeats)
] for fraction in fractions_unknown]
all_Ms_test = [[calc_inverse_M(M) for M in Ms] for Ms in all_Ms]


# Make sure each M has no empty rows or columns
def check_empty_rows_columns(M, fraction):
    sums_columns = M.sum(axis=0)
    sums_rows = M.sum(axis=1)
    for i, c in enumerate(sums_rows):
        assert c != 0, "Fully unobserved row in M, row %s. Fraction %s." % (
            i, fraction)
    for j, c in enumerate(sums_columns):
        assert c != 0, "Fully unobserved column in M, column %s. Fraction %s." % (
            j, fraction)
예제 #5
0
attempts_M = 100

alpha, beta = 100., 1.  #1., 1.
tau = alpha / beta
lambdaF = numpy.ones((I, true_K))
lambdaS = numpy.ones((true_K, true_L))
lambdaG = numpy.ones((J, true_L))

classifier = bnmtf_gibbs_optimised
initFG = 'kmeans'
initS = 'random'

# Generate data
(_, _, _, _, _, R) = generate_dataset(I, J, true_K, true_L, lambdaF, lambdaS,
                                      lambdaG, tau)
M = try_generate_M(I, J, fraction_unknown, attempts_M)

# Run the line search. The priors lambdaF,S,G need to be a single value (recall K,L is unknown)
priors = {
    'alpha': alpha,
    'beta': beta,
    'lambdaF': lambdaF[0, 0],
    'lambdaS': lambdaS[0, 0],
    'lambdaG': lambdaG[0, 0]
}
grid_search = GridSearch(classifier, values_K, values_L, R, M, priors, initS,
                         initFG, iterations, restarts)
grid_search.search(burn_in, thinning)

# Plot the performances of all three metrics
for metric in ['loglikelihood', 'BIC', 'AIC', 'MSE']: