Exemple #1
0
 def test_hand_made_data_set(self):
     "Test HDPM with uncertainty on small hand-crafted data set."
     numpy.random.seed(37)
     F = 12
     K = 5
     genes = [
         [
             (0, [.1, .2, .99]),
         ],
         [
             (3, [.9, .9]),
             (2, [.2, .99]),
             (4, [.2, .99]),
             (5, [.2, .99]),
         ],
         [
             (0, [.1, .2, .99]),
             (2, [.1, .2]),
             (10, [.1, .2, .99]),
             (11, [.1, .2]),
             (9, [.1, .2]),
         ],
     ]
     options = U.get_default_options()
     data = U.Data(genes, F, options)
     dist = U.VariationalDistribution(data, K)
     test_log_likelihood_per_update(dist)
     stats = Statistics(dist)
     stats.log()
Exemple #2
0
 def test_sampled_data(self):
     "Test HDPM with uncertainty on sampled data sets of different sizes."
     for i, (F, K, G, average_n_g) in enumerate((
         ( 2, 10,  20,   5),
         ( 2,  4,  20,  10),
         ( 2, 10,  20,  10),
         ( 2, 10, 220,  50),
         (12, 80, 100,  50),
         (80,  6, 200, 200),
     )):
         numpy.random.seed(i+1)
         logging.debug('Testing sampled data with F=%d; K=%d; G=%d, average n_g=%d', F, K, G, average_n_g)
         options = U.get_default_options()
         options.a_tau = numpy.ones(F) / F
         options.a_omega = numpy.ones(F) / F
         rho = U.sample_rho(G, average_n_g=average_n_g)
         sample = U.sample(options, rho, K, F)
         genes = U.genes_from_sites(sample.sites, rho)
         data = U.Data(genes, F, options)
         dist = U.VariationalDistribution(data, K)
         test_log_likelihood_per_update(dist)
Exemple #3
0
reload(infpy.dp.hdpm.uncertainty)
import infpy.dp.hdpm.uncertainty as U
import infpy.dp.hdpm.uncertainty.summarise
reload(infpy.dp.hdpm.uncertainty.summarise)
from infpy.dp.hdpm.uncertainty.summarise import Statistics, InferenceHistory, Summariser
from infpy.convergence_test import LlConvergenceTest

logging.basicConfig(level=logging.DEBUG)

F, K, G, average_n_g = (12, 80, 100, 50)

numpy.random.seed(2)
logging.debug(
    'Testing sampled data with F=%d; K=%d; G=%d, average n_g=%d', F, K, G, average_n_g)

options = U.get_default_options()
options.a_tau = numpy.ones(F)
options.a_omega = numpy.ones(F)

rho = U.sample_rho(G, average_n_g=average_n_g)
sample = U.sample(options, rho, K, F)

genes = U.genes_from_sites(sample.sites, rho)
data = U.Data(genes, F, options)
dist = U.VariationalDistribution(data, K)

Summariser(dist, 'output/sampled/summary').summarise_all()

history = InferenceHistory(dist)
LL = dist.log_likelihood()
LL_tolerance = 1e-8 * data.N