Beispiel #1
0
def test_init():
    I, J = 10, 9
    values_K = [1, 2, 4, 5]
    values_L = [5, 4, 3]
    R = 2 * numpy.ones((I, J))
    M = numpy.ones((I, J))
    priors = {'alpha': 3, 'beta': 4, 'lambdaF': 5, 'lambdaS': 6, 'lambdaG': 7}
    initFG = 'exp'
    initS = 'random'
    iterations = 11

    greedysearch = GreedySearch(classifier, values_K, values_L, R, M, priors,
                                initS, initFG, iterations)
    assert greedysearch.I == I
    assert greedysearch.J == J
    assert numpy.array_equal(greedysearch.values_K, values_K)
    assert numpy.array_equal(greedysearch.values_L, values_L)
    assert numpy.array_equal(greedysearch.R, R)
    assert numpy.array_equal(greedysearch.M, M)
    assert greedysearch.priors == priors
    assert greedysearch.iterations == iterations
    assert greedysearch.initS == initS
    assert greedysearch.initFG == initFG
    assert greedysearch.all_performances['BIC'] == []
    assert greedysearch.all_performances['AIC'] == []
    assert greedysearch.all_performances['loglikelihood'] == []
Beispiel #2
0
def test_search():
    # Check whether we get no exceptions...
    I, J = 10, 9
    values_K = [1, 2, 4, 5]
    values_L = [5, 4, 3]
    R = 2 * numpy.ones((I, J))
    R[0, 0] = 1
    M = numpy.ones((I, J))
    priors = {'alpha': 3, 'beta': 4, 'lambdaF': 5, 'lambdaS': 6, 'lambdaG': 7}
    initFG = 'exp'
    initS = 'exp'
    iterations = 1
    search_metric = 'BIC'

    numpy.random.seed(0)
    random.seed(0)
    greedysearch = GreedySearch(classifier, values_K, values_L, R, M, priors,
                                initS, initFG, iterations)
    greedysearch.search(search_metric)

    with pytest.raises(AssertionError) as error:
        greedysearch.all_values('FAIL')
    assert str(error.value) == "Unrecognised metric name: FAIL."

    # We go from: (1,5) -> (1,4) -> (1,3), and try 6 locations
    assert len(greedysearch.all_values('BIC')) == 6
Beispiel #3
0
def test_best_value():
    I, J = 10, 9
    values_K = [1, 2, 4, 5]
    values_L = [5, 4, 3]
    R = 2 * numpy.ones((I, J))
    M = numpy.ones((I, J))
    priors = {'alpha': 3, 'beta': 4, 'lambdaF': 5, 'lambdaS': 6, 'lambdaG': 7}
    initFG = 'exp'
    initS = 'random'
    iterations = 11

    greedysearch = GreedySearch(classifier, values_K, values_L, R, M, priors,
                                initS, initFG, iterations)
    greedysearch.all_performances = {
        'BIC': [(1, 2, 10.), (2, 2, 20.), (2, 3, 30.), (2, 4, 5.),
                (5, 3, 20.)],
        'AIC': [(1, 2, 10.), (2, 2, 20.), (2, 3, 4.), (2, 4, 25.),
                (5, 3, 20.)],
        'loglikelihood': [(1, 2, 10.), (2, 2, 8.), (2, 3, 30.), (2, 4, 40.),
                          (5, 3, 20.)]
    }
    assert greedysearch.best_value('BIC') == (2, 4)
    assert greedysearch.best_value('AIC') == (2, 3)
    assert greedysearch.best_value('loglikelihood') == (2, 2)
    with pytest.raises(AssertionError) as error:
        greedysearch.all_values('FAIL')
    assert str(error.value) == "Unrecognised metric name: FAIL."
def test_search():
    # Check whether we get no exceptions...
    I,J = 10,9
    values_K = [1,2,4,5]
    values_L = [5,4,3]
    R = 2*numpy.ones((I,J))
    R[0,0] = 1
    M = numpy.ones((I,J))
    priors = { 'alpha':3, 'beta':4, 'lambdaF':5, 'lambdaS':6, 'lambdaG':7 }
    initFG = 'exp'
    initS = 'exp'
    iterations = 1
    search_metric = 'BIC'
    
    numpy.random.seed(0)
    random.seed(0)
    greedysearch = GreedySearch(classifier,values_K,values_L,R,M,priors,initS,initFG,iterations)
    greedysearch.search(search_metric)
    
    with pytest.raises(AssertionError) as error:
        greedysearch.all_values('FAIL')
    assert str(error.value) == "Unrecognised metric name: FAIL."
    
    # We go from: (1,5) -> (1,4) -> (1,3), and try 6 locations
    assert len(greedysearch.all_values('BIC')) == 6
    def run(self, burn_in=None, thinning=None, minimum_TN=None):
        folds_test = mask.compute_folds(self.I, self.J, self.folds, self.M)
        folds_training = mask.compute_Ms(folds_test)

        for i, (train, test) in enumerate(zip(folds_training, folds_test)):
            print "Fold %s." % (i + 1)

            # Run the greedy grid search
            greedy_search = GreedySearch(
                classifier=self.classifier,
                values_K=self.values_K,
                values_L=self.values_L,
                R=self.R,
                M=self.M,
                priors=self.priors,
                initS=self.init_S,
                initFG=self.init_FG,
                iterations=self.iterations,
                restarts=self.restarts,
            )
            greedy_search.search(self.quality_metric, burn_in=burn_in, thinning=thinning, minimum_TN=minimum_TN)

            # Store the model fits, and find the best one according to the metric
            all_performances = greedy_search.all_values(metric=self.quality_metric)
            self.fout.write(
                "All model fits for fold %s, metric %s: %s.\n" % (i + 1, self.quality_metric, all_performances)
            )
            self.fout.flush()

            best_KL = greedy_search.best_value(metric=self.quality_metric)
            self.fout.write("Best K,L for fold %s: %s.\n" % (i + 1, best_KL))

            # Train a model with this K and measure performance on the test set
            performance = self.run_model(
                train, test, best_KL[0], best_KL[1], burn_in=burn_in, thinning=thinning, minimum_TN=minimum_TN
            )
            self.fout.write("Performance: %s.\n\n" % performance)
            self.fout.flush()
    def run(self, burn_in=None, thinning=None, minimum_TN=None):
        folds_test = mask.compute_folds(self.I, self.J, self.folds, self.M)
        folds_training = mask.compute_Ms(folds_test)

        for i, (train, test) in enumerate(zip(folds_training, folds_test)):
            print "Fold %s." % (i + 1)

            # Run the greedy grid search
            greedy_search = GreedySearch(classifier=self.classifier,
                                         values_K=self.values_K,
                                         values_L=self.values_L,
                                         R=self.R,
                                         M=self.M,
                                         priors=self.priors,
                                         initS=self.init_S,
                                         initFG=self.init_FG,
                                         iterations=self.iterations,
                                         restarts=self.restarts)
            greedy_search.search(self.quality_metric,
                                 burn_in=burn_in,
                                 thinning=thinning,
                                 minimum_TN=minimum_TN)

            # Store the model fits, and find the best one according to the metric
            all_performances = greedy_search.all_values(
                metric=self.quality_metric)
            self.fout.write("All model fits for fold %s, metric %s: %s.\n" %
                            (i + 1, self.quality_metric, all_performances))
            self.fout.flush()

            best_KL = greedy_search.best_value(metric=self.quality_metric)
            self.fout.write("Best K,L for fold %s: %s.\n" % (i + 1, best_KL))

            # Train a model with this K and measure performance on the test set
            performance = self.run_model(train,
                                         test,
                                         best_KL[0],
                                         best_KL[1],
                                         burn_in=burn_in,
                                         thinning=thinning,
                                         minimum_TN=minimum_TN)
            self.fout.write("Performance: %s.\n\n" % performance)
            self.fout.flush()
def test_best_value():
    I,J = 10,9
    values_K = [1,2,4,5]
    values_L = [5,4,3]
    R = 2*numpy.ones((I,J))
    M = numpy.ones((I,J))
    priors = { 'alpha':3, 'beta':4, 'lambdaF':5, 'lambdaS':6, 'lambdaG':7 }
    initFG = 'exp'
    initS = 'random'
    iterations = 11
    
    greedysearch = GreedySearch(classifier,values_K,values_L,R,M,priors,initS,initFG,iterations)
    greedysearch.all_performances = {
        'BIC' : [(1,2,10.),(2,2,20.),(2,3,30.),(2,4,5.),(5,3,20.)],
        'AIC' : [(1,2,10.),(2,2,20.),(2,3,4.),(2,4,25.),(5,3,20.)],
        'loglikelihood' : [(1,2,10.),(2,2,8.),(2,3,30.),(2,4,40.),(5,3,20.)]
    }
    assert greedysearch.best_value('BIC') == (2,4)
    assert greedysearch.best_value('AIC') == (2,3)
    assert greedysearch.best_value('loglikelihood') == (2,2)
    with pytest.raises(AssertionError) as error:
        greedysearch.all_values('FAIL')
    assert str(error.value) == "Unrecognised metric name: FAIL."
classifier = bnmtf_vb_optimised

search_metric = "AIC"

# Load in data
(_, X_min, M, _, _, _, _) = load_Sanger(standardised=standardised)

folds_test = compute_folds(I, J, no_folds, M)
folds_training = compute_Ms(folds_test)
(M_train, M_test) = (folds_training[0], folds_test[0])

# Run the line search
priors = {"alpha": alpha, "beta": beta, "lambdaF": lambdaF, "lambdaS": lambdaS, "lambdaG": lambdaG}
greedy_search = GreedySearch(
    classifier, values_K, values_L, X_min, M, priors, initS, initFG, iterations, restarts=restarts
)
greedy_search.search(search_metric)

# Plot the performances of all metrics
metrics = ["loglikelihood", "BIC", "AIC", "MSE"]
for metric in metrics:
    # Make three lists of indices X,Y,Z (K,L,metric)
    KLvalues = numpy.array(greedy_search.all_values(metric))
    (list_values_K, list_values_L, values) = zip(*KLvalues)

    # Set up a regular grid of interpolation points
    Ki, Li = (
        numpy.linspace(min(list_values_K), max(list_values_K), 100),
        numpy.linspace(min(list_values_L), max(list_values_L), 100),
    )
folds_training = compute_Ms(folds_test)
(M_train, M_test) = (folds_training[0], folds_test[0])

# Run the line search
priors = {
    'alpha': alpha,
    'beta': beta,
    'lambdaF': lambdaF,
    'lambdaS': lambdaS,
    'lambdaG': lambdaG
}
greedy_search = GreedySearch(classifier,
                             values_K,
                             values_L,
                             X_min,
                             M,
                             priors,
                             initS,
                             initFG,
                             iterations,
                             restarts=restarts)
greedy_search.search(search_metric)

# Plot the performances of all metrics
metrics = ['loglikelihood', 'BIC', 'AIC', 'MSE']
for metric in metrics:
    # Make three lists of indices X,Y,Z (K,L,metric)
    KLvalues = numpy.array(greedy_search.all_values(metric))
    (list_values_K, list_values_L, values) = zip(*KLvalues)

    # Set up a regular grid of interpolation points
    Ki, Li = (numpy.linspace(min(list_values_K), max(list_values_K), 100),
lambdaS = numpy.ones((true_K,true_L))
lambdaG = numpy.ones((J,true_L))

classifier = bnmtf_gibbs_optimised
initFG = 'kmeans'
initS = 'random'

search_metric = 'AIC'

# Generate data
(_,_,_,_,_,R) = generate_dataset(I,J,true_K,true_L,lambdaF,lambdaS,lambdaG,tau)
M = try_generate_M(I,J,fraction_unknown,attempts_M)

# Run the line search. The priors lambdaU and lambdaV need to be a single value (recall K is unknown)
priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF[0,0], 'lambdaS':lambdaS[0,0], 'lambdaG':lambdaG[0,0] }
greedy_search = GreedySearch(classifier,values_K,values_L,R,M,priors,initS,initFG,iterations,restarts)
greedy_search.search(search_metric,burn_in,thinning)

# Plot the performances of all three metrics
for metric in ['loglikelihood', 'BIC', 'AIC', 'MSE']:
    # Make three lists of indices X,Y,Z (K,L,metric)
    KLvalues = numpy.array(greedy_search.all_values(metric))
    (list_values_K,list_values_L,values) = zip(*KLvalues)
    
    # Set up a regular grid of interpolation points
    Ki, Li = (numpy.linspace(min(list_values_K), max(list_values_K), 100), 
              numpy.linspace(min(list_values_L), max(list_values_L), 100))
    Ki, Li = numpy.meshgrid(Ki, Li)
    
    # Interpolate
    rbf = scipy.interpolate.Rbf(list_values_K, list_values_L, values, function='linear')
search_metric = 'AIC'

# Generate data
(_, _, _, _, _, R) = generate_dataset(I, J, true_K, true_L, lambdaF, lambdaS,
                                      lambdaG, tau)
M = try_generate_M(I, J, fraction_unknown, attempts_M)

# Run the line search. The priors lambdaU and lambdaV need to be a single value (recall K is unknown)
priors = {
    'alpha': alpha,
    'beta': beta,
    'lambdaF': lambdaF[0, 0],
    'lambdaS': lambdaS[0, 0],
    'lambdaG': lambdaG[0, 0]
}
greedy_search = GreedySearch(classifier, values_K, values_L, R, M, priors,
                             initS, initFG, iterations, restarts)
greedy_search.search(search_metric, burn_in, thinning)

# Plot the performances of all three metrics
for metric in ['loglikelihood', 'BIC', 'AIC', 'MSE']:
    # Make three lists of indices X,Y,Z (K,L,metric)
    KLvalues = numpy.array(greedy_search.all_values(metric))
    (list_values_K, list_values_L, values) = zip(*KLvalues)

    # Set up a regular grid of interpolation points
    Ki, Li = (numpy.linspace(min(list_values_K), max(list_values_K), 100),
              numpy.linspace(min(list_values_L), max(list_values_L), 100))
    Ki, Li = numpy.meshgrid(Ki, Li)

    # Interpolate
    rbf = scipy.interpolate.Rbf(list_values_K,