def test_crosscat_two_component_nominal__ci_(): prng = get_prng(10) integration = pytest.config.getoption('--integration') # Build CGPM with adversarial initialization. crosscat = Product([ FlexibleRowMixture( cgpm_row_divide=CRP([-1], [], rng=prng), cgpm_components_base=Product([ Normal([0], [], rng=prng), ], rng=prng), rng=prng), FlexibleRowMixture( cgpm_row_divide=CRP([-2], [], rng=prng), cgpm_components_base=Product([ Normal([1], [], rng=prng), Categorical([50], [], distargs={'k':4}, rng=prng), ], rng=prng), rng=prng), ], rng=prng,) # Fetch data and add a nominal variable. data_xy = make_bivariate_two_clusters(prng) data_z = np.zeros(len(data_xy)) data_z[:15] = 0 data_z[15:30] = 1 data_z[30:45] = 2 data_z[45:60] = 3 data = np.column_stack((data_xy, data_z)) # Observe. for rowid, row in enumerate(data): crosscat.observe(rowid, {0: row[0], 1: row[1], 50:row[2]}) # Run inference. synthesizer = GibbsCrossCat(crosscat) synthesizer.transition(N=(50 if integration else 1), progress=False) synthesizer.transition(N=(100 if integration else 1), kernels=['hypers_distributions','hypers_row_divide'], progress=False) # Assert views are merged into one. assert not integration or len(synthesizer.crosscat.cgpms) == 1 crp_output = synthesizer.crosscat.cgpms[0].cgpm_row_divide.outputs[0] # Check joint samples for all nominals. samples = synthesizer.crosscat.simulate(None, [crp_output,0,1,50], N=250) not integration or check_sampled_data(samples, [0, 7], 3, 110) # Check joint samples for nominals [0, 2]. samples_a = [s for s in samples if s[50] in [0,2]] not integration or check_sampled_data(samples_a, [0, 7], 3, 45) # Check joint samples for nominals [1, 3]. samples_b = [s for s in samples if s[50] in [1,3]] not integration or check_sampled_data(samples_b, [0, 7], 3, 45) # Check conditional samples in correct quadrants. means = {0:0, 1:0, 2:7, 3:7} for z in [0, 1, 2, 3]: samples = synthesizer.crosscat.simulate(None, [0, 1], {50:z}, N=100) not integration or check_sampled_data(samples, [means[z]], 3, 90)
def test_transition_hypers_basic(): prng = get_prng(2) component0 = Product([ Poisson([0], [], hypers={'m': 100}, rng=prng), Normal([1], [], hypers={'m': 100}, rng=prng) ], rng=prng) cgpm_row_divide = CRP([2], [], rng=prng) infinite_mixture = FlexibleRowMixture(cgpm_row_divide=cgpm_row_divide, cgpm_components_base=component0, rng=prng) # Make normal observations. infinite_mixture.observe(0, {1: 100}) infinite_mixture.observe(1, {1: 300}) infinite_mixture.observe(2, {1: -300}) # Fetch log score. log_score0 = infinite_mixture.logpdf_score() # Run inference. normal_cgpms = get_cgpms_by_output_index(infinite_mixture, 1) grids_normal = transition_hyper_grids(normal_cgpms, 30) hypers_normal = [ transition_hypers(normal_cgpms, grids_normal, prng) for _i in xrange(2) ] assert not all(hypers == hypers_normal[0] for hypers in hypers_normal) log_score1 = infinite_mixture.logpdf_score() assert log_score0 < log_score1
def test_flexible_mixture_three_component__ci_(): prng = get_prng(2) flexible_mixture = FlexibleRowMixture(cgpm_row_divide=CRP([1], [], rng=prng), cgpm_components_base=Normal( [0], [], rng=prng), rng=prng) integration = pytest.config.getoption('--integration') run_mixture_test(flexible_mixture, integration, prng)
def get_crosscat(prng): view = FlexibleRowMixture( cgpm_row_divide=CRP([2], [], rng=prng), cgpm_components_base=Product([ Normal([0], [], rng=prng), Normal([1], [], rng=prng), ], rng=prng), rng=prng) return Product(cgpms=[view], rng=prng)
def make_random_view(outputs, distributions, rng): crp_output = rng.randint(2**32 - 1) cgpm_row_divide = CRP([crp_output], [], rng=rng) cgpm_base_list = [ make_random_primitive(output, distribution, rng) for output, distribution in zip(outputs, distributions) ] view = FlexibleRowMixture(cgpm_row_divide=cgpm_row_divide, cgpm_components_base=Product(cgpm_base_list, rng=rng), rng=rng) return view
def get_crosscat_synthesizer(prng): view = FlexibleRowMixture(cgpm_row_divide=CRP([2], [], rng=prng), cgpm_components_base=Product([ Normal([0], [], rng=prng), Normal([1], [], rng=prng), ], rng=prng), rng=prng) crosscat = Product(cgpms=[view], rng=prng) data = make_bivariate_two_clusters(prng) for rowid, row in enumerate(data): crosscat.observe(rowid, {0: row[0], 1: row[1]}) return GibbsCrossCat(crosscat)
def get_crosscat(prng): view0 = FlexibleRowMixture( cgpm_row_divide=CRP([-1], [], rng=prng), cgpm_components_base=Product([ Normal([0], [], rng=prng), Normal([1], [], rng=prng), ], rng=prng), rng=prng) view1 = FlexibleRowMixture( cgpm_row_divide=CRP([-2], [], rng=prng), cgpm_components_base=Product([ Poisson([2], [], rng=prng), Normal([3], [], rng=prng), Normal([4], [], rng=prng), ], rng=prng), rng=prng) view2 = FlexibleRowMixture( cgpm_row_divide=CRP([-3], [], rng=prng), cgpm_components_base=Product([ Categorical([5], [], distargs={'k':4}, rng=prng), ], rng=prng), rng=prng) return Product([view0, view1, view2], rng=prng)
def test_product_mixture_walk(): prng = get_prng(2) component_base = Product([ Poisson([0], [], hypers={ 'a': 10, 'b': 1 }, rng=prng), Normal([1], [], hypers={'m': 100}, rng=prng) ], rng=prng) cgpm_row_divide = CRP([2], [], rng=prng) infinite_mixture = FlexibleRowMixture(cgpm_row_divide=cgpm_row_divide, cgpm_components_base=component_base, rng=prng) # Only the base CGPMs in the flexible mixture. cgpm_poisson = get_cgpms_by_output_index(infinite_mixture, 0) cgpm_normal = get_cgpms_by_output_index(infinite_mixture, 1) cgpm_crp = get_cgpms_by_output_index(infinite_mixture, 2) assert cgpm_poisson == [component_base.cgpms[0]] assert cgpm_normal == [component_base.cgpms[1]] assert cgpm_crp == [cgpm_row_divide] infinite_mixture.observe(0, {0: 1}) # New CGPMs in the flexible CGPM after observing. cgpm_poisson = get_cgpms_by_output_index(infinite_mixture, 0) cgpm_normal = get_cgpms_by_output_index(infinite_mixture, 1) assert len(cgpm_poisson) == len(cgpm_normal) == 2 assert [cgpm_poisson[-1]] == [component_base.cgpms[0]] assert [cgpm_normal[-1]] == [component_base.cgpms[1]] assert cgpm_poisson[0].N == 1 assert cgpm_normal[0].N == 0 cgpm_crp = get_cgpms_by_output_index(infinite_mixture, 2) assert len(cgpm_crp) == 1 assert cgpm_crp[0].N == 1 assert cgpm_crp[0].data[0] == 0 # Misc. errors, no such output. with pytest.raises(Exception): get_cgpms_by_output_index(infinite_mixture, -1)
def test_add_remove(): prng = get_prng(2) mixture0 = FlexibleRowMixture( cgpm_row_divide=CRP([2], [], rng=prng), cgpm_components_base=Product([ Normal([0], [], rng=prng), Normal([1], [], rng=prng), ], rng=prng), rng=prng) for rowid, row in enumerate([[0,.9] ,[.5, 1], [-.5, 1.2]]): mixture0.observe(rowid, {0:row[0], 1:row[1]}) mixture1 = remove_cgpm(mixture0, 0) assert mixture0.outputs == [2, 0, 1] assert mixture1.outputs == [2, 1] mixture2 = add_cgpm(mixture1, Normal([0], [], rng=prng)) assert mixture0.outputs == [2, 0, 1] assert mixture1.outputs == [2, 1] assert mixture2.outputs == [2, 1, 0] mixture3 = remove_cgpm(mixture2, 1) assert mixture0.outputs == [2, 0, 1] assert mixture1.outputs == [2, 1] assert mixture2.outputs == [2, 1, 0] assert mixture3.outputs == [2, 0] mixture4 = remove_cgpm(mixture3, 0) assert mixture0.outputs == [2, 0, 1] assert mixture1.outputs == [2, 1] assert mixture2.outputs == [2, 1, 0] assert mixture3.outputs == [2, 0] assert mixture4.outputs == [2] with pytest.raises(Exception): # Cannot remove the cgpm_row_divide for a mixture. mixture3 = remove_cgpm(mixture2, 2)
def test_crosscat_add_remove(): prng = get_prng(2) crosscat = get_crosscat(prng) infinite_mixture4 = FlexibleRowMixture( cgpm_row_divide=CRP([-4], [], rng=prng), cgpm_components_base=Product([ Categorical([6], [], distargs={'k':4}, rng=prng), ], rng=prng), rng=prng) crosscat = add_cgpm(crosscat, infinite_mixture4) assert crosscat.outputs == [-1, 0, 1, -2, 2, 3, 4, -3, 5, -4, 6] crosscat = remove_cgpm(crosscat, -1) assert crosscat.outputs == [-2, 2, 3, 4, -3, 5, -4, 6] crosscat = remove_cgpm(crosscat, 5) assert crosscat.outputs == [-2, 2, 3, 4, -4, 6]
def test_crosscat_three_component_cpp__ci_(): prng = get_prng(12) integration = pytest.config.getoption('--integration') view = FlexibleRowMixture(cgpm_row_divide=CRP([1], [], rng=prng), cgpm_components_base=Product( cgpms=[Normal([0], [], rng=prng)], rng=prng), rng=prng) crosscat = Product(cgpms=[view], rng=prng) def func_inference(crosscat): n_step = 1000 if integration else 1 synthesizer = GibbsCrossCat(crosscat) synthesizer.transition_structure_cpp(N=n_step) synthesizer.transition_hypers_distributions() synthesizer.transition_hypers_row_divide() return synthesizer run_crosscat_test(crosscat, func_inference, integration, prng)
def test_transition_crp_mixture(): prng = get_prng(2) data = np.concatenate(( prng.normal(loc=0, scale=2, size=20), prng.normal(loc=30, scale=1, size=20), prng.normal(loc=-30, scale=1, size=20), )) infinite_mixture = FlexibleRowMixture(cgpm_row_divide=CRP([1], [], rng=prng), cgpm_components_base=Normal( [0], [], rng=prng), rng=prng) for rowid, value in enumerate(data): infinite_mixture.observe(rowid, {0: value}) cgpms = { 0: get_cgpms_by_output_index(infinite_mixture, 0), 1: get_cgpms_by_output_index(infinite_mixture, 1), } grids = { 0: transition_hyper_grids(cgpms[0], 30), 1: transition_hyper_grids(cgpms[1], 30), } for _step in xrange(50): rowids = prng.permutation(range(len(data))) for rowid in rowids: transition_rows(infinite_mixture, rowid, prng) for output in infinite_mixture.outputs: transition_hypers(cgpms[output], grids[output], prng) rowids = range(60) assignments0 = [ infinite_mixture.simulate(r, [1])[1] for r in rowids[00:20] ] assignments1 = [ infinite_mixture.simulate(r, [1])[1] for r in rowids[20:40] ] assignments2 = [ infinite_mixture.simulate(r, [1])[1] for r in rowids[40:60] ] mode0 = Counter(assignments0).most_common(1)[0][0] mode1 = Counter(assignments1).most_common(1)[0][0] mode2 = Counter(assignments2).most_common(1)[0][0] assert sum(a == mode0 for a in assignments0) > int(0.95 * len(assignments0)) assert sum(a == mode1 for a in assignments1) > int(0.95 * len(assignments1)) assert sum(a == mode2 for a in assignments2) > int(0.95 * len(assignments2))
def test_simple_product_as_chain(): prng = get_prng(2) component0 = Chain([ Poisson([0], [], hypers={'a': 10, 'b': 1}, rng=prng), Normal([1], [], hypers={'m':100}, rng=prng) ], rng=prng) cgpm_row_divide = CRP([2], [], rng=prng) infinite_mixture = FlexibleRowMixture( cgpm_row_divide=cgpm_row_divide, cgpm_components_base=component0, rng=prng) assert infinite_mixture.cgpm_row_divide.support() == [0] # Test logpdf identities. lp0 = infinite_mixture.logpdf(None, {0:1}) assert lp0 < 0 lp1 = infinite_mixture.logpdf(None, {0:1, 2:0}) assert np.allclose(lp0, lp1) lp2 = infinite_mixture.logpdf(None, {0:1, 2:1}) assert lp2 == -float('inf') # Add an observation. infinite_mixture.observe(0, {1:100}) lp0 = infinite_mixture.logpdf(None, {1:100, 2:0}, constraints={0:1}) lp1 = infinite_mixture.logpdf(None, {1:100, 2:1}, constraints={0:1}) lp2 = infinite_mixture.logpdf(None, {1:100, 2:2}, constraints={0:1}) assert lp1 < lp0 assert lp2 == float('-inf') # Remove observation. observation = infinite_mixture.unobserve(0) assert observation == ({1:100, 2:0}, {}) # Remove observation again. with pytest.raises(Exception): infinite_mixture.unobserve(0) # Add more observations. infinite_mixture.observe(0, {1:100}) infinite_mixture.observe(1, {1:300}) infinite_mixture.observe(2, {0:2}) # Constrained cluster has zero density. with pytest.raises(ValueError): infinite_mixture.logpdf(None, {0:1}, constraints={2:10}) with pytest.raises(ValueError): infinite_mixture.logpdf(None, {0:1}, constraints={2:10}) # Convert to/from metadata and assert unobserves return correct data. metadata = infinite_mixture.to_metadata() infinite_mixture2 = FlexibleRowMixture.from_metadata(metadata, prng) assert infinite_mixture2.unobserve(0) == \ ({1:100, 2: infinite_mixture.cgpm_row_divide.data[0]}, {}) assert infinite_mixture2.unobserve(1) == \ ({1:300, 2: infinite_mixture.cgpm_row_divide.data[1]}, {}) assert infinite_mixture2.unobserve(2) == \ ({0:2, 2: infinite_mixture.cgpm_row_divide.data[2]}, {})