Exemple #1
0
def test_entropy_bernoulli_bivariate__ci_():
    # Set the test parameters.
    integration = pytest.config.getoption('--integration')
    n_data = 250 if integration else 10
    n_step = 20 if integration else 1
    n_samp = 1000 if integration else 10
    prng = get_prng(10)
    # Generate a bivariate Bernoulli dataset.
    PX = [.3, .7]
    PY = [[.2, .8], [.6, .4]]
    TX = prng.choice([0, 1], p=PX, size=n_data)
    TY = np.zeros(shape=len(TX))
    TY[TX == 0] = prng.choice([0, 1], p=PY[0], size=len(TX[TX == 0]))
    TY[TX == 1] = prng.choice([0, 1], p=PY[1], size=len(TX[TX == 1]))
    T = np.column_stack((TY, TX))
    # Create ensemble.
    ensemble = CrossCatEnsemble(
        outputs=[0, 1],
        inputs=[],
        distributions=[('bernoulli', None)] * 2,
        Cd=[[0, 1]],
        chains=64,
        rng=prng,
    )
    # Observe data.
    for rowid, (x, y) in enumerate(T):
        ensemble.observe(rowid, {0: x, 1: y})
    # Run inference.
    program = make_custom_program(N=n_step)
    ensemble.transition(program, multiprocess=1)
    # Exact computation.
    entropy_exact = (-PX[0] * PY[0][0] * np.log(PX[0] * PY[0][0]) -
                     PX[0] * PY[0][1] * np.log(PX[0] * PY[0][1]) -
                     PX[1] * PY[1][0] * np.log(PX[1] * PY[1][0]) -
                     PX[1] * PY[1][1] * np.log(PX[1] * PY[1][1]))
    # logpdf computation.
    logps = ensemble.logpdf_bulk(None, [{
        0: 0,
        1: 0
    }, {
        0: 0,
        1: 1
    }, {
        0: 1,
        1: 0
    }, {
        0: 1,
        1: 1
    }])
    entropy_logpdf = [-np.sum(np.exp(logp) * logp) for logp in logps]
    # Mutual_information computation.
    n_samp = 10 if not integration else 1000
    entropy_mi_estimates = ensemble.mutual_information([0, 1], [0, 1],
                                                       N=n_samp)
    entropy_mi = [estimate.mean for estimate in entropy_mi_estimates]
    # Punt CLT analysis and go for a small tolerance.
    assert not integration or np.allclose(
        entropy_exact, entropy_logpdf, atol=.1)
    assert not integration or np.allclose(entropy_exact, entropy_mi, atol=.1)
    assert not integration or np.allclose(entropy_logpdf, entropy_mi, atol=.1)
Exemple #2
0
 def __init__(self,
              outputs,
              inputs,
              hypers=None,
              params=None,
              distargs=None,
              rng=None):
     # Populate default kwargs.
     hypers = hypers or dict()
     params = params or dict()
     distargs = distargs or dict()
     # From constructor.
     self.outputs = list(outputs)
     self.inputs = list(inputs)
     self.params = params
     self.rng = rng or get_prng(1)
     # Internal attributes.
     self.data = OrderedDict()
     self.N = 0
     self.sum_x = 0
     self.sum_log_fact_x = 0
     self.a = hypers.get('a', 1.)
     self.b = hypers.get('b', 1.)
     assert self.a > 0
     assert self.b > 0
Exemple #3
0
 def __init__(self,
              outputs,
              inputs,
              hypers=None,
              params=None,
              distargs=None,
              rng=None):
     # Populate default kwargs.
     hypers = hypers or dict()
     params = params or dict()
     distargs = distargs or dict()
     # From constructor.
     self.outputs = list(outputs)
     self.inputs = list(inputs)
     self.params = params
     self.rng = rng or get_prng(1)
     # Internal attributes.
     self.data = OrderedDict()
     self.N = 0
     self.sum_x = 0
     self.sum_x_sq = 0
     self.m = hypers.get('m', 0.)
     self.r = hypers.get('r', 1.)
     self.s = hypers.get('s', 1.)
     self.nu = hypers.get('nu', 1.)
     assert self.s > 0.
     assert self.r > 0.
     assert self.nu > 0.
 def __init__(self,
              outputs,
              inputs,
              distributions,
              chains=1,
              Cd=None,
              Ci=None,
              rng=None):
     # Assertion.
     assert len(outputs) == len(distributions)
     assert inputs == []
     # From constructor.
     self.outputs = outputs
     self.inputs = inputs
     self.chains = chains
     self.distributions = distributions
     self.Cd = tuple(Cd or [])
     self.Ci = tuple(Ci or [])
     self.rng = rng or get_prng(1)
     # Derived attributes.
     self.chains_list = range(chains)
     seeds = self.rng.randint(0, 2**32 - 1, size=chains)
     self.cgpms = map(make_random_crosscat,
                      [(outputs, distributions, self.Cd, self.Ci, seed)
                       for seed in seeds])
def test_mutation_hypers_crp():
    prng = get_prng(2)
    synthesizer = get_crosscat_synthesizer(prng)
    crp = synthesizer.crosscat.cgpms[0].cgpm_row_divide
    for v in np.linspace(0.01, 10, 1):
        synthesizer.set_hypers_row_divide(0, {'alpha': v})
        assert crp.get_hypers()['alpha'] == v
def test_independence_inference_merge():
    # Get lovecat to merge dependent columns into one view.
    prng = get_prng(582)
    data = get_independence_inference_data(prng)
    # Hack: Use Cd/Ci to initialize CrossCat as
    #   {0:0, 1:0, 2:1, 3:1, 4:2, 5:2, 6:3, 7:3}
    Cd = ((0, 1), (2, 3), (4, 5), (6, 7))
    Ci = ((0, 2), (0, 4), (0, 6), (2, 4), (2, 6), (4, 6))
    ensemble = CrossCatEnsemble(outputs=range(8),
                                inputs=[],
                                distributions=[('normal', None)] * 8,
                                chains=1,
                                Cd=Cd,
                                Ci=Ci,
                                rng=prng)
    ensemble.Ci = ()
    incorporate_data(ensemble, data)
    ensemble.transition(ensemble.make_default_inference_program(N=100))
    crosscat = ensemble.cgpms[0]
    Zv = {c: i for i, cgpm in enumerate(crosscat.cgpms) for c in cgpm.outputs}
    for output in [
            0,
            1,
            2,
            3,
    ]:
        assert Zv[output] == Zv[0]
    for output in [4, 5, 6, 7]:
        assert Zv[output] == Zv[4]
    assert len(crosscat.cgpms) == 2
def test_convert_cgpm_to_cgpm2():
    prng = get_prng(2)
    data = np.concatenate((
        prng.normal(loc=0, scale=2, size=20),
        prng.normal(loc=30, scale=1, size=20),
        prng.normal(loc=-30, scale=1, size=20),
    ))
    state = State(X=np.reshape(data, (len(data), 1)),
                  outputs=[0],
                  cctypes=['normal'],
                  rng=prng)
    view_cgpm1 = state.views[0]
    view_cgpm1.transition(N=5)
    # Convert
    product = convert_cgpm_state_to_cgpm2(state, prng)
    view_cgpm2 = product.cgpms[0]
    # Verify row assignments.
    assignments0 = view_cgpm1.Zr()
    partition0 = [[r for r, z in assignments0.iteritems() if z == u]
                  for u in set(assignments0.values())]
    assignments1 = view_cgpm2.cgpm_row_divide.data
    partition1 = [[r for r, z in assignments1.iteritems() if z == u]
                  for u in set(assignments1.values())]
    partition0_sorted = sorted(partition0, key=min)
    partition1_sorted = sorted(partition1, key=min)
    assert partition0_sorted == partition1_sorted
    # Verify hyperparameters.
    hypers0 = view_cgpm1.dims[0].hypers
    hypers1 = view_cgpm2.cgpm_components_array.cgpm_base.cgpms[0].get_hypers()
    assert hypers0 == hypers1
    # Verify CRP alpha.
    alpha0 = view_cgpm1.crp.hypers
    alpha1 = view_cgpm2.cgpm_row_divide.get_hypers()
    assert alpha0 == alpha1
def test_transition_hypers_basic():
    prng = get_prng(2)
    component0 = Product([
        Poisson([0], [], hypers={'m': 100}, rng=prng),
        Normal([1], [], hypers={'m': 100}, rng=prng)
    ],
                         rng=prng)
    cgpm_row_divide = CRP([2], [], rng=prng)
    infinite_mixture = FlexibleRowMixture(cgpm_row_divide=cgpm_row_divide,
                                          cgpm_components_base=component0,
                                          rng=prng)
    # Make normal observations.
    infinite_mixture.observe(0, {1: 100})
    infinite_mixture.observe(1, {1: 300})
    infinite_mixture.observe(2, {1: -300})
    # Fetch log score.
    log_score0 = infinite_mixture.logpdf_score()
    # Run inference.
    normal_cgpms = get_cgpms_by_output_index(infinite_mixture, 1)
    grids_normal = transition_hyper_grids(normal_cgpms, 30)
    hypers_normal = [
        transition_hypers(normal_cgpms, grids_normal, prng) for _i in xrange(2)
    ]
    assert not all(hypers == hypers_normal[0] for hypers in hypers_normal)
    log_score1 = infinite_mixture.logpdf_score()
    assert log_score0 < log_score1
Exemple #9
0
def test_entropy_bernoulli_univariate__ci_():
    integration = pytest.config.getoption('--integration')
    n_data = 250 if integration else 10
    n_step = 10 if integration else 1
    n_samp = 1000 if integration else 10
    prng = get_prng(10)
    # Generate a biased Bernoulli dataset.
    T = prng.choice([0, 1], p=[.3, .7], size=n_data)
    # Create ensemble.
    ensemble = CrossCatEnsemble(
        outputs=[0],
        inputs=[],
        distributions=[('bernoulli', None)],
        chains=16,
        rng=prng,
    )
    # Observe data.
    for rowid, x in enumerate(T):
        ensemble.observe(rowid, {0: x})
    # Run inference.
    program = make_custom_program(N=n_step)
    ensemble.transition(program, multiprocess=1)
    # Exact computation.
    entropy_exact = -(.3 * np.log(.3) + .7 * np.log(.7))
    # logpdf computation.
    logps = ensemble.logpdf_bulk(None, [{0: 0}, {0: 1}])
    entropy_logpdf = [-np.sum(np.exp(logp) * logp) for logp in logps]
    # Mutual_information computation.
    entropy_mi_estimates = ensemble.mutual_information([0], [0], N=n_samp)
    entropy_mi = [estimate.mean for estimate in entropy_mi_estimates]
    # Punt CLT analysis and go for 1 decimal place.
    assert not integration or np.allclose(
        entropy_exact, entropy_logpdf, atol=.1)
    assert not integration or np.allclose(entropy_exact, entropy_mi, atol=.1)
    assert not integration or np.allclose(entropy_logpdf, entropy_mi, atol=.05)
def test_mutation_hypers_component():
    prng = get_prng(2)
    synthesizer = get_crosscat_synthesizer(prng)
    normals = get_distribution_cgpms(synthesizer.crosscat, [0])[0]
    for v in np.linspace(0.01, 10, 1):
        synthesizer.set_hypers_distribution(0, {'m': v})
        for cgpm in normals:
            assert cgpm.get_hypers()['m'] == v
def test_dependencies_zero_based():
    prng = get_prng(2)
    CrossCatEnsemble(outputs=(1, 2),
                     inputs=(),
                     Ci=[(1, 2)],
                     distributions=[('normal', None)] * 2,
                     chains=5,
                     rng=prng)
def test_flexible_mixture_three_component__ci_():
    prng = get_prng(2)
    flexible_mixture = FlexibleRowMixture(cgpm_row_divide=CRP([1], [],
                                                              rng=prng),
                                          cgpm_components_base=Normal(
                                              [0], [], rng=prng),
                                          rng=prng)
    integration = pytest.config.getoption('--integration')
    run_mixture_test(flexible_mixture, integration, prng)
def test_mutation_set_view_assignment():
    prng = get_prng(2)
    synthesizer = get_crosscat_synthesizer(prng)
    # Move column 0 zero to singleton view.
    synthesizer.set_view_assignment(0, None)
    assert len(synthesizer.crosscat.cgpms) == 2
    # Move column 1 to view of column 0.
    synthesizer.set_view_assignment(1, 0)
    assert len(synthesizer.crosscat.cgpms) == 1
def test_crosscat_two_component_nominal__ci_():
    prng = get_prng(10)
    integration = pytest.config.getoption('--integration')
    # Build CGPM with adversarial initialization.
    crosscat = Product([
        FlexibleRowMixture(
            cgpm_row_divide=CRP([-1], [], rng=prng),
            cgpm_components_base=Product([
                Normal([0], [], rng=prng),
            ], rng=prng),
            rng=prng),
        FlexibleRowMixture(
            cgpm_row_divide=CRP([-2], [], rng=prng),
            cgpm_components_base=Product([
                Normal([1], [], rng=prng),
                Categorical([50], [], distargs={'k':4}, rng=prng),
            ], rng=prng),
            rng=prng),
    ], rng=prng,)
    # Fetch data and add a nominal variable.
    data_xy = make_bivariate_two_clusters(prng)
    data_z = np.zeros(len(data_xy))
    data_z[:15] = 0
    data_z[15:30] = 1
    data_z[30:45] = 2
    data_z[45:60] = 3
    data = np.column_stack((data_xy, data_z))
    # Observe.
    for rowid, row in enumerate(data):
        crosscat.observe(rowid, {0: row[0], 1: row[1], 50:row[2]})
    # Run inference.
    synthesizer = GibbsCrossCat(crosscat)
    synthesizer.transition(N=(50 if integration else 1), progress=False)
    synthesizer.transition(N=(100 if integration else 1),
            kernels=['hypers_distributions','hypers_row_divide'],
            progress=False)

    # Assert views are merged into one.
    assert not integration or len(synthesizer.crosscat.cgpms) == 1
    crp_output = synthesizer.crosscat.cgpms[0].cgpm_row_divide.outputs[0]

    # Check joint samples for all nominals.
    samples = synthesizer.crosscat.simulate(None, [crp_output,0,1,50], N=250)
    not integration or check_sampled_data(samples, [0, 7], 3, 110)
    # Check joint samples for nominals [0, 2].
    samples_a = [s for s in samples if s[50] in [0,2]]
    not integration or check_sampled_data(samples_a, [0, 7], 3, 45)
    # Check joint samples for nominals [1, 3].
    samples_b = [s for s in samples if s[50] in [1,3]]
    not integration or check_sampled_data(samples_b, [0, 7], 3, 45)

    # Check conditional samples in correct quadrants.
    means = {0:0, 1:0, 2:7, 3:7}
    for z in [0, 1, 2, 3]:
        samples = synthesizer.crosscat.simulate(None, [0, 1], {50:z}, N=100)
        not integration or check_sampled_data(samples, [means[z]], 3, 90)
Exemple #15
0
 def __init__(self, cgpms, indexer, rng=None):
     # From constructor.
     self.cgpms = cgpms
     self.rng = rng or get_prng(1)
     # Derived attributes.
     self.outputs = self.cgpms[0].outputs
     self.inputs = [indexer] + self.cgpms[0].inputs
     self.indexer = indexer
     # Internal attributes.
     self.rowid_to_index = {}
Exemple #16
0
def test_logpdf_basic():
    prng = get_prng(2)
    crosscat = get_crosscat(prng)
    crosscat = populate_crosscat(crosscat, prng)
    for _rowid, row in get_dataset(crosscat, 0):
        logp = crosscat.logpdf(None, row)
        if np.isnan(row.values()[0]):
            assert np.allclose(logp, 0)
        else:
            assert logp < 0
def test_dependencies_no_cpp():
    prng = get_prng(2)
    ensemble = CrossCatEnsemble(outputs=(0, 1),
                                inputs=[],
                                Ci=[(0, 1)],
                                distributions=[('normal', None)] * 2,
                                chains=5,
                                rng=prng)
    ensemble.observe(0, {0: 0, 1: 1})
    synthesizer = GibbsCrossCat(ensemble.cgpms[0], Ci=ensemble.Ci)
    synthesizer.transition_view_assignments()
Exemple #18
0
 def __init__(self, cgpms, rng=None):
     # Assertions.
     validate_cgpms_product(cgpms)
     # From constructor.
     self.cgpms = flatten_cgpms(cgpms, Product)
     self.rng = rng or get_prng(1)
     # Derived attributes.
     self.outputs = lchain(*[cgpm.outputs for cgpm in self.cgpms])
     self.inputs = lchain(*[cgpm.inputs for cgpm in self.cgpms])
     self.output_to_index = {output:i for i, cgpm in enumerate(self.cgpms)
         for output in cgpm.outputs}
Exemple #19
0
def make_random_crosscat((outputs, distributions, Cd, Ci, seed)):
    rng = get_prng(seed)
    alpha = rng.gamma(2, 1)
    N = len(outputs)
    partition = make_random_partition(N, alpha, Cd, Ci, rng)
    views = [
        make_random_view(outputs=[outputs[i] for i in block],
                         distributions=[distributions[i] for i in block],
                         rng=rng) for block in partition
    ]
    crosscat = Product(cgpms=views, rng=rng)
    return crosscat
def test_simple_product():
    prng = get_prng(2)
    column0 = Normal([0], [], rng=prng)
    column1 = Normal([1], [], rng=prng)
    column2 = Categorical([2], [], distargs={'k':4}, rng=prng)
    product = Product([column0, column1, column2], prng)
    assert product.outputs == [0,1,2]
    assert product.inputs == []
    sample = product.simulate(None, [1,2,0])
    assert set(sample.keys()) == set([1, 2, 0])
    logp = product.logpdf(None, sample)
    assert logp < 0
def test_finite_mixture_three_component__ci_():
    prng = get_prng(2)
    finite_mixture = FiniteRowMixture(
        cgpm_row_divide=Categorical([1], [], distargs={'k': 3}, rng=prng),
        cgpm_components=[
            Normal([0], [], rng=prng),
            Normal([0], [], rng=prng),
            Normal([0], [], rng=prng)
        ],
        rng=prng,
    )
    integration = pytest.config.getoption('--integration')
    run_mixture_test(finite_mixture, integration, prng)
def test_crosscat_two_component_no_view__ci_():
    prng = get_prng(10)
    integration = pytest.config.getoption('--integration')
    crosscat = get_crosscat(prng)
    def func_inference(crosscat):
        synthesizer = GibbsCrossCat(crosscat)
        n_step = 500 if integration else 1
        for _step in xrange(n_step):
            synthesizer.transition_row_assignments()
            synthesizer.transition_hypers_distributions()
            synthesizer.transition_hypers_row_divide()
        return synthesizer
    run_crosscat_test(crosscat, func_inference, integration, prng)
def test_crosscat_two_component_cpp__ci_():
    prng = get_prng(10)
    integration = pytest.config.getoption('--integration')
    crosscat = get_crosscat(prng)
    def func_inference(crosscat):
        synthesizer = GibbsCrossCat(crosscat)
        n_step = 1000 if integration else 1
        synthesizer = GibbsCrossCat(crosscat)
        synthesizer.transition_structure_cpp(N=n_step)
        synthesizer.transition_hypers_distributions()
        synthesizer.transition_hypers_row_divide()
        return synthesizer
    run_crosscat_test(crosscat, func_inference, integration, prng)
def test_simple_product_as_chain():
    prng = get_prng(2)
    component0 = Chain([
        Poisson([0], [], hypers={'a': 10, 'b': 1}, rng=prng),
        Normal([1], [], hypers={'m':100}, rng=prng)
        ],
        rng=prng)
    cgpm_row_divide = CRP([2], [], rng=prng)
    infinite_mixture = FlexibleRowMixture(
        cgpm_row_divide=cgpm_row_divide,
        cgpm_components_base=component0,
        rng=prng)
    assert infinite_mixture.cgpm_row_divide.support() == [0]
    # Test logpdf identities.
    lp0 = infinite_mixture.logpdf(None, {0:1})
    assert lp0 < 0
    lp1 = infinite_mixture.logpdf(None, {0:1, 2:0})
    assert np.allclose(lp0, lp1)
    lp2 = infinite_mixture.logpdf(None, {0:1, 2:1})
    assert lp2 == -float('inf')
    # Add an observation.
    infinite_mixture.observe(0, {1:100})
    lp0 = infinite_mixture.logpdf(None, {1:100, 2:0}, constraints={0:1})
    lp1 = infinite_mixture.logpdf(None, {1:100, 2:1}, constraints={0:1})
    lp2 = infinite_mixture.logpdf(None, {1:100, 2:2}, constraints={0:1})
    assert lp1 < lp0
    assert lp2 == float('-inf')
    # Remove observation.
    observation = infinite_mixture.unobserve(0)
    assert observation == ({1:100, 2:0}, {})
    # Remove observation again.
    with pytest.raises(Exception):
        infinite_mixture.unobserve(0)
    # Add more observations.
    infinite_mixture.observe(0, {1:100})
    infinite_mixture.observe(1, {1:300})
    infinite_mixture.observe(2, {0:2})
    # Constrained cluster has zero density.
    with pytest.raises(ValueError):
        infinite_mixture.logpdf(None, {0:1}, constraints={2:10})
    with pytest.raises(ValueError):
        infinite_mixture.logpdf(None, {0:1}, constraints={2:10})
    # Convert to/from metadata and assert unobserves return correct data.
    metadata = infinite_mixture.to_metadata()
    infinite_mixture2 = FlexibleRowMixture.from_metadata(metadata, prng)
    assert infinite_mixture2.unobserve(0) == \
        ({1:100, 2: infinite_mixture.cgpm_row_divide.data[0]}, {})
    assert infinite_mixture2.unobserve(1) == \
        ({1:300, 2: infinite_mixture.cgpm_row_divide.data[1]}, {})
    assert infinite_mixture2.unobserve(2) == \
        ({0:2, 2: infinite_mixture.cgpm_row_divide.data[2]}, {})
Exemple #25
0
 def __init__(self, cgpms, accuracy=None, rng=None):
     # Validate inputs.
     cgpms_valid = validate_cgpms(cgpms)
     # From constructor
     self.cgpms = flatten_cgpms(cgpms_valid, Chain)
     self.accuracy = accuracy or 1
     self.rng = rng if rng else get_prng(1)
     # Derived attributes.
     self.outputs = lchain(*[cgpm.outputs for cgpm in self.cgpms])
     self.inputs = lchain(*[cgpm.inputs for cgpm in self.cgpms])
     self.v_to_c = retrieve_variable_to_cgpm(self.cgpms)
     self.adjacency = retrieve_adjacency_list(self.cgpms, self.v_to_c)
     self.extraneous = retrieve_extraneous_inputs(self.cgpms, self.v_to_c)
     self.topo = topological_sort(self.adjacency)
Exemple #26
0
def test_crosscat_add_remove():
    prng = get_prng(2)
    crosscat =  get_crosscat(prng)
    infinite_mixture4 = FlexibleRowMixture(
        cgpm_row_divide=CRP([-4], [], rng=prng),
        cgpm_components_base=Product([
            Categorical([6], [], distargs={'k':4}, rng=prng),
        ], rng=prng),
        rng=prng)
    crosscat = add_cgpm(crosscat, infinite_mixture4)
    assert crosscat.outputs == [-1, 0, 1, -2, 2, 3, 4, -3, 5, -4, 6]
    crosscat = remove_cgpm(crosscat, -1)
    assert crosscat.outputs == [-2, 2, 3, 4, -3, 5, -4, 6]
    crosscat = remove_cgpm(crosscat, 5)
    assert crosscat.outputs == [-2, 2, 3, 4, -4, 6]
def test_custom_independence(Ci):
    prng = get_prng(1)
    D = prng.normal(size=(10, 1))
    T = np.repeat(D, 10, axis=1)
    ensemble = CrossCatEnsemble(outputs=range(10),
                                inputs=[],
                                distributions=[('normal', None)] * 10,
                                chains=5,
                                Ci=Ci,
                                rng=prng)
    incorporate_data(ensemble, T)
    for crosscat in ensemble.cgpms:
        validate_crosscat_dependencies(crosscat, (), Ci)
    ensemble.transition(ensemble.make_default_inference_program(N=10))
    for crosscat in ensemble.cgpms:
        validate_crosscat_dependencies(crosscat, (), Ci)
Exemple #28
0
def test_transition_crp_mixture():
    prng = get_prng(2)
    data = np.concatenate((
        prng.normal(loc=0, scale=2, size=20),
        prng.normal(loc=30, scale=1, size=20),
        prng.normal(loc=-30, scale=1, size=20),
    ))
    infinite_mixture = FlexibleRowMixture(cgpm_row_divide=CRP([1], [],
                                                              rng=prng),
                                          cgpm_components_base=Normal(
                                              [0], [], rng=prng),
                                          rng=prng)
    for rowid, value in enumerate(data):
        infinite_mixture.observe(rowid, {0: value})
    cgpms = {
        0: get_cgpms_by_output_index(infinite_mixture, 0),
        1: get_cgpms_by_output_index(infinite_mixture, 1),
    }
    grids = {
        0: transition_hyper_grids(cgpms[0], 30),
        1: transition_hyper_grids(cgpms[1], 30),
    }
    for _step in xrange(50):
        rowids = prng.permutation(range(len(data)))
        for rowid in rowids:
            transition_rows(infinite_mixture, rowid, prng)
        for output in infinite_mixture.outputs:
            transition_hypers(cgpms[output], grids[output], prng)
    rowids = range(60)
    assignments0 = [
        infinite_mixture.simulate(r, [1])[1] for r in rowids[00:20]
    ]
    assignments1 = [
        infinite_mixture.simulate(r, [1])[1] for r in rowids[20:40]
    ]
    assignments2 = [
        infinite_mixture.simulate(r, [1])[1] for r in rowids[40:60]
    ]
    mode0 = Counter(assignments0).most_common(1)[0][0]
    mode1 = Counter(assignments1).most_common(1)[0][0]
    mode2 = Counter(assignments2).most_common(1)[0][0]
    assert sum(a == mode0
               for a in assignments0) > int(0.95 * len(assignments0))
    assert sum(a == mode1
               for a in assignments1) > int(0.95 * len(assignments1))
    assert sum(a == mode2
               for a in assignments2) > int(0.95 * len(assignments2))
def test_transition_rows_fixed_mixture():
    prng = get_prng(2)
    component0 = Product([
        Normal([0], [], hypers={'m':1000}, rng=prng),
        Normal([1], [], hypers={'m':0}, rng=prng)
        ], rng=prng)
    component1 = Product([
        Normal([0], [], hypers={'m':-1000}, rng=prng),
        Normal([1], [], hypers={'m':1000}, rng=prng)
        ], rng=prng)
    component2 = Product([
        Normal([0], [], hypers={'m':0}, rng=prng),
        Normal([1], [], hypers={'m':-100}, rng=prng)
        ], rng=prng)
    cgpm_row_divide = Categorical([2], [], distargs={'k':3}, rng=prng)
    finite_mixture = FiniteRowMixture(
        cgpm_row_divide=cgpm_row_divide,
        cgpm_components=[component0, component1, component2],
        rng=prng)
    # For component 0.
    finite_mixture.observe(0, {0:1000, 1:0, 2:0})
    finite_mixture.observe(1, {0:990, 1:-10, 2:0})
    # For component 1.
    finite_mixture.observe(2, {0:-1000, 1:1000, 2:0})
    finite_mixture.observe(3, {0:-990, 1:990, 2:0})
    # For component 2.
    finite_mixture.observe(4, {0:0, 1:-1000, 2:0})
    finite_mixture.observe(5, {0:10, 1:-990, 2:0})
    # Confirm all rows in component 0.
    assert finite_mixture.simulate(0, [2]) == {2:0}
    assert finite_mixture.simulate(1, [2]) == {2:0}
    assert finite_mixture.simulate(2, [2]) == {2:0}
    assert finite_mixture.simulate(3, [2]) == {2:0}
    assert finite_mixture.simulate(4, [2]) == {2:0}
    assert finite_mixture.simulate(5, [2]) == {2:0}
    # Run transitions
    for _i in xrange(10):
        for rowid in range(6):
            transition_rows(finite_mixture, rowid, prng)
    # Confirm all rows in correct components.
    assert finite_mixture.simulate(0, [2]) == {2:0}
    assert finite_mixture.simulate(1, [2]) == {2:0}
    assert finite_mixture.simulate(2, [2]) == {2:1}
    assert finite_mixture.simulate(3, [2]) == {2:1}
    assert finite_mixture.simulate(4, [2]) == {2:2}
    assert finite_mixture.simulate(5, [2]) == {2:2}
def test_mutation_set_rowid_component():
    prng = get_prng(2)
    synthesizer = get_crosscat_synthesizer(prng)
    crp = synthesizer.crosscat.cgpms[0].cgpm_row_divide
    # Move row 0 to singleton component.
    synthesizer.set_rowid_component(0, 0, None)
    new_cluster = crp.data[0]
    assert crp.counts[new_cluster] == 1
    # Move row 0 to singleton component again.
    synthesizer.set_rowid_component(0, 0, None)
    new_cluster_prime = crp.data[0]
    assert new_cluster_prime == new_cluster
    assert crp.counts[new_cluster_prime] == 1
    # Move row 10 to component of row 0.
    synthesizer.set_rowid_component(0, 10, 0)
    assert crp.data[10] == new_cluster_prime
    assert crp.counts[new_cluster_prime] == 2