def get_crosscat(prng): view = FlexibleRowMixture( cgpm_row_divide=CRP([2], [], rng=prng), cgpm_components_base=Product([ Normal([0], [], rng=prng), Normal([1], [], rng=prng), ], rng=prng), rng=prng) return Product(cgpms=[view], rng=prng)
def test_simple_product(): prng = get_prng(2) column0 = Normal([0], [], rng=prng) column1 = Normal([1], [], rng=prng) column2 = Categorical([2], [], distargs={'k':4}, rng=prng) product = Product([column0, column1, column2], prng) assert product.outputs == [0,1,2] assert product.inputs == [] sample = product.simulate(None, [1,2,0]) assert set(sample.keys()) == set([1, 2, 0]) logp = product.logpdf(None, sample) assert logp < 0
def test_finite_mixture_two_component__ci_(): prng = get_prng(2) integration = pytest.config.getoption('--integration') finite_mixture = FiniteRowMixture( cgpm_row_divide=Categorical([2], [], distargs={'k':2}, rng=prng), cgpm_components=[ Product([Normal([0], [], rng=prng), Normal([1], [], rng=prng)], rng=prng), Product([Normal([0], [], rng=prng), Normal([1], [], rng=prng)], rng=prng), ], rng=prng) run_mixture_test(finite_mixture, integration, prng)
def get_crosscat_synthesizer(prng): view = FlexibleRowMixture(cgpm_row_divide=CRP([2], [], rng=prng), cgpm_components_base=Product([ Normal([0], [], rng=prng), Normal([1], [], rng=prng), ], rng=prng), rng=prng) crosscat = Product(cgpms=[view], rng=prng) data = make_bivariate_two_clusters(prng) for rowid, row in enumerate(data): crosscat.observe(rowid, {0: row[0], 1: row[1]}) return GibbsCrossCat(crosscat)
def test_transition_hypers_basic(): prng = get_prng(2) component0 = Product([ Poisson([0], [], hypers={'m': 100}, rng=prng), Normal([1], [], hypers={'m': 100}, rng=prng) ], rng=prng) cgpm_row_divide = CRP([2], [], rng=prng) infinite_mixture = FlexibleRowMixture(cgpm_row_divide=cgpm_row_divide, cgpm_components_base=component0, rng=prng) # Make normal observations. infinite_mixture.observe(0, {1: 100}) infinite_mixture.observe(1, {1: 300}) infinite_mixture.observe(2, {1: -300}) # Fetch log score. log_score0 = infinite_mixture.logpdf_score() # Run inference. normal_cgpms = get_cgpms_by_output_index(infinite_mixture, 1) grids_normal = transition_hyper_grids(normal_cgpms, 30) hypers_normal = [ transition_hypers(normal_cgpms, grids_normal, prng) for _i in xrange(2) ] assert not all(hypers == hypers_normal[0] for hypers in hypers_normal) log_score1 = infinite_mixture.logpdf_score() assert log_score0 < log_score1
def test_transition_rows_fixed_mixture(): prng = get_prng(2) component0 = Product([ Normal([0], [], hypers={'m':1000}, rng=prng), Normal([1], [], hypers={'m':0}, rng=prng) ], rng=prng) component1 = Product([ Normal([0], [], hypers={'m':-1000}, rng=prng), Normal([1], [], hypers={'m':1000}, rng=prng) ], rng=prng) component2 = Product([ Normal([0], [], hypers={'m':0}, rng=prng), Normal([1], [], hypers={'m':-100}, rng=prng) ], rng=prng) cgpm_row_divide = Categorical([2], [], distargs={'k':3}, rng=prng) finite_mixture = FiniteRowMixture( cgpm_row_divide=cgpm_row_divide, cgpm_components=[component0, component1, component2], rng=prng) # For component 0. finite_mixture.observe(0, {0:1000, 1:0, 2:0}) finite_mixture.observe(1, {0:990, 1:-10, 2:0}) # For component 1. finite_mixture.observe(2, {0:-1000, 1:1000, 2:0}) finite_mixture.observe(3, {0:-990, 1:990, 2:0}) # For component 2. finite_mixture.observe(4, {0:0, 1:-1000, 2:0}) finite_mixture.observe(5, {0:10, 1:-990, 2:0}) # Confirm all rows in component 0. assert finite_mixture.simulate(0, [2]) == {2:0} assert finite_mixture.simulate(1, [2]) == {2:0} assert finite_mixture.simulate(2, [2]) == {2:0} assert finite_mixture.simulate(3, [2]) == {2:0} assert finite_mixture.simulate(4, [2]) == {2:0} assert finite_mixture.simulate(5, [2]) == {2:0} # Run transitions for _i in xrange(10): for rowid in range(6): transition_rows(finite_mixture, rowid, prng) # Confirm all rows in correct components. assert finite_mixture.simulate(0, [2]) == {2:0} assert finite_mixture.simulate(1, [2]) == {2:0} assert finite_mixture.simulate(2, [2]) == {2:1} assert finite_mixture.simulate(3, [2]) == {2:1} assert finite_mixture.simulate(4, [2]) == {2:2} assert finite_mixture.simulate(5, [2]) == {2:2}
def test_crosscat_three_component_cpp__ci_(): prng = get_prng(12) integration = pytest.config.getoption('--integration') view = FlexibleRowMixture(cgpm_row_divide=CRP([1], [], rng=prng), cgpm_components_base=Product( cgpms=[Normal([0], [], rng=prng)], rng=prng), rng=prng) crosscat = Product(cgpms=[view], rng=prng) def func_inference(crosscat): n_step = 1000 if integration else 1 synthesizer = GibbsCrossCat(crosscat) synthesizer.transition_structure_cpp(N=n_step) synthesizer.transition_hypers_distributions() synthesizer.transition_hypers_row_divide() return synthesizer run_crosscat_test(crosscat, func_inference, integration, prng)
def test_flexible_mixture_two_component__ci_(): prng = get_prng(2) integration = pytest.config.getoption('--integration') flexible_mixture = FlexibleRowMixture( cgpm_row_divide=CRP([2], [], rng=prng), cgpm_components_base=Product([ Normal([0], [], rng=prng), Normal([1], [], rng=prng), ], rng=prng), rng=prng) run_mixture_test(flexible_mixture, integration, prng)
def make_random_crosscat((outputs, distributions, Cd, Ci, seed)): rng = get_prng(seed) alpha = rng.gamma(2, 1) N = len(outputs) partition = make_random_partition(N, alpha, Cd, Ci, rng) views = [ make_random_view(outputs=[outputs[i] for i in block], distributions=[distributions[i] for i in block], rng=rng) for block in partition ] crosscat = Product(cgpms=views, rng=rng) return crosscat
def make_random_view(outputs, distributions, rng): crp_output = rng.randint(2**32 - 1) cgpm_row_divide = CRP([crp_output], [], rng=rng) cgpm_base_list = [ make_random_primitive(output, distribution, rng) for output, distribution in zip(outputs, distributions) ] view = FlexibleRowMixture(cgpm_row_divide=cgpm_row_divide, cgpm_components_base=Product(cgpm_base_list, rng=rng), rng=rng) return view
def test_simple_product_finite_array(): prng = get_prng(2) array0_component0 = Normal([0], [], hypers={'m':100}, rng=prng) array0_component1 = Normal([0], [], hypers={'m':-100}, rng=prng) array0_component2 = Normal([0], [], hypers={'m':0}, rng=prng) indexer_0 = 128 cgpm_array_0 = FiniteArray( cgpms=[array0_component0, array0_component1, array0_component2], indexer=indexer_0, rng=prng) with pytest.raises(Exception): # Missing indexer_0 as a required input. cgpm_array_0.simulate(None, [0]) array1_component0 = Normal([1], [], hypers={'m':1000}, rng=prng) array1_component1 = Normal([1], [], hypers={'m':-1000}, rng=prng) array1_component2 = Normal([1], [], hypers={'m':50}, rng=prng) indexer_1 = 129 cgpm_array_1 = FiniteArray( cgpms=[array1_component0, array1_component1, array1_component2], indexer=indexer_1, rng=prng) product = Product([cgpm_array_0, cgpm_array_1], prng) assert product.outputs == [0, 1] assert product.inputs == [indexer_0, indexer_1] with pytest.raises(Exception): # Missing indexer_0. product.simulate(None, [0,1], inputs={indexer_1: 0}) with pytest.raises(Exception): # Missing indexer_1. product.simulate(None, [0,1], inputs={indexer_0: 1}) # Should work, since output 1 is not being queried. product.simulate(None, [0], inputs={indexer_0: 1}) # Sampling from correct components. sample = product.simulate(None, [0,1], inputs={indexer_0:1, indexer_1:0}) assert abs(-100 - sample[0]) < 10 assert abs(1000 - sample[1]) < 10 logp = product.logpdf(None, sample, inputs={indexer_0:1, indexer_1:0}) assert np.allclose(logp, array0_component1.logpdf(None, {0: sample[0]}) + array1_component0.logpdf(None, {1: sample[1]}))
def get_crosscat(prng): view0 = FlexibleRowMixture( cgpm_row_divide=CRP([-1], [], rng=prng), cgpm_components_base=Product([ Normal([0], [], rng=prng), Normal([1], [], rng=prng), ], rng=prng), rng=prng) view1 = FlexibleRowMixture( cgpm_row_divide=CRP([-2], [], rng=prng), cgpm_components_base=Product([ Poisson([2], [], rng=prng), Normal([3], [], rng=prng), Normal([4], [], rng=prng), ], rng=prng), rng=prng) view2 = FlexibleRowMixture( cgpm_row_divide=CRP([-3], [], rng=prng), cgpm_components_base=Product([ Categorical([5], [], distargs={'k':4}, rng=prng), ], rng=prng), rng=prng) return Product([view0, view1, view2], rng=prng)
def test_product_mixture_constraints(): prng = get_prng(2) component0 = Product([ Normal([0], [], hypers={'m':1000}, rng=prng), Normal([1], [], hypers={'m':1000}, rng=prng) ], rng=prng) component1 = Product([ Normal([0], [], hypers={'m':-1000}, rng=prng), Normal([1], [], hypers={'m':-1000}, rng=prng) ], rng=prng) component2 = Product([ Normal([0], [], hypers={'m':0}, rng=prng), Normal([1], [], hypers={'m':0}, rng=prng) ], rng=prng) cgpm_row_divide = Categorical([2], [], distargs={'k':3}, rng=prng) finite_mixture = FiniteRowMixture( cgpm_row_divide=cgpm_row_divide, cgpm_components=[component0, component1, component2], rng=prng) def run_mixture_tests(mixture): N = 100 # Simulate from component 1. samples = mixture.simulate(None, [0], constraints={2:1}, N=N) assert len([s for s in samples if -1100 < s[0] < -900]) > int(.9*N) # Simulate from random components. samples = mixture.simulate(None, [0], N=N) assert len([s for s in samples if -900 < s[0] < -1100]) < int(.33*N) # Simulate (implicitly) from component 0. samples = mixture.simulate(None, [1,2], constraints={0:1000}, N=N) assert len([s for s in samples if 900 < s[1] < 1100]) > int(.9*N) assert len([s for s in samples if s[2] == 0]) == N # Run tests on finite_mixture. run_mixture_tests(finite_mixture) # Run tests after to/from metadata conversion. metadata = finite_mixture.to_metadata() finite_mixture2 = FiniteRowMixture.from_metadata(metadata, prng) run_mixture_tests(finite_mixture2)
def test_crosscat_add_remove(): prng = get_prng(2) crosscat = get_crosscat(prng) infinite_mixture4 = FlexibleRowMixture( cgpm_row_divide=CRP([-4], [], rng=prng), cgpm_components_base=Product([ Categorical([6], [], distargs={'k':4}, rng=prng), ], rng=prng), rng=prng) crosscat = add_cgpm(crosscat, infinite_mixture4) assert crosscat.outputs == [-1, 0, 1, -2, 2, 3, 4, -3, 5, -4, 6] crosscat = remove_cgpm(crosscat, -1) assert crosscat.outputs == [-2, 2, 3, 4, -3, 5, -4, 6] crosscat = remove_cgpm(crosscat, 5) assert crosscat.outputs == [-2, 2, 3, 4, -4, 6]
def test_crosscat_two_component_nominal__ci_(): prng = get_prng(10) integration = pytest.config.getoption('--integration') # Build CGPM with adversarial initialization. crosscat = Product([ FlexibleRowMixture( cgpm_row_divide=CRP([-1], [], rng=prng), cgpm_components_base=Product([ Normal([0], [], rng=prng), ], rng=prng), rng=prng), FlexibleRowMixture( cgpm_row_divide=CRP([-2], [], rng=prng), cgpm_components_base=Product([ Normal([1], [], rng=prng), Categorical([50], [], distargs={'k':4}, rng=prng), ], rng=prng), rng=prng), ], rng=prng,) # Fetch data and add a nominal variable. data_xy = make_bivariate_two_clusters(prng) data_z = np.zeros(len(data_xy)) data_z[:15] = 0 data_z[15:30] = 1 data_z[30:45] = 2 data_z[45:60] = 3 data = np.column_stack((data_xy, data_z)) # Observe. for rowid, row in enumerate(data): crosscat.observe(rowid, {0: row[0], 1: row[1], 50:row[2]}) # Run inference. synthesizer = GibbsCrossCat(crosscat) synthesizer.transition(N=(50 if integration else 1), progress=False) synthesizer.transition(N=(100 if integration else 1), kernels=['hypers_distributions','hypers_row_divide'], progress=False) # Assert views are merged into one. assert not integration or len(synthesizer.crosscat.cgpms) == 1 crp_output = synthesizer.crosscat.cgpms[0].cgpm_row_divide.outputs[0] # Check joint samples for all nominals. samples = synthesizer.crosscat.simulate(None, [crp_output,0,1,50], N=250) not integration or check_sampled_data(samples, [0, 7], 3, 110) # Check joint samples for nominals [0, 2]. samples_a = [s for s in samples if s[50] in [0,2]] not integration or check_sampled_data(samples_a, [0, 7], 3, 45) # Check joint samples for nominals [1, 3]. samples_b = [s for s in samples if s[50] in [1,3]] not integration or check_sampled_data(samples_b, [0, 7], 3, 45) # Check conditional samples in correct quadrants. means = {0:0, 1:0, 2:7, 3:7} for z in [0, 1, 2, 3]: samples = synthesizer.crosscat.simulate(None, [0, 1], {50:z}, N=100) not integration or check_sampled_data(samples, [means[z]], 3, 90)
def test_add_remove(): prng = get_prng(2) mixture0 = FlexibleRowMixture( cgpm_row_divide=CRP([2], [], rng=prng), cgpm_components_base=Product([ Normal([0], [], rng=prng), Normal([1], [], rng=prng), ], rng=prng), rng=prng) for rowid, row in enumerate([[0,.9] ,[.5, 1], [-.5, 1.2]]): mixture0.observe(rowid, {0:row[0], 1:row[1]}) mixture1 = remove_cgpm(mixture0, 0) assert mixture0.outputs == [2, 0, 1] assert mixture1.outputs == [2, 1] mixture2 = add_cgpm(mixture1, Normal([0], [], rng=prng)) assert mixture0.outputs == [2, 0, 1] assert mixture1.outputs == [2, 1] assert mixture2.outputs == [2, 1, 0] mixture3 = remove_cgpm(mixture2, 1) assert mixture0.outputs == [2, 0, 1] assert mixture1.outputs == [2, 1] assert mixture2.outputs == [2, 1, 0] assert mixture3.outputs == [2, 0] mixture4 = remove_cgpm(mixture3, 0) assert mixture0.outputs == [2, 0, 1] assert mixture1.outputs == [2, 1] assert mixture2.outputs == [2, 1, 0] assert mixture3.outputs == [2, 0] assert mixture4.outputs == [2] with pytest.raises(Exception): # Cannot remove the cgpm_row_divide for a mixture. mixture3 = remove_cgpm(mixture2, 2)
def test_product_mixture_walk(): prng = get_prng(2) component_base = Product([ Poisson([0], [], hypers={ 'a': 10, 'b': 1 }, rng=prng), Normal([1], [], hypers={'m': 100}, rng=prng) ], rng=prng) cgpm_row_divide = CRP([2], [], rng=prng) infinite_mixture = FlexibleRowMixture(cgpm_row_divide=cgpm_row_divide, cgpm_components_base=component_base, rng=prng) # Only the base CGPMs in the flexible mixture. cgpm_poisson = get_cgpms_by_output_index(infinite_mixture, 0) cgpm_normal = get_cgpms_by_output_index(infinite_mixture, 1) cgpm_crp = get_cgpms_by_output_index(infinite_mixture, 2) assert cgpm_poisson == [component_base.cgpms[0]] assert cgpm_normal == [component_base.cgpms[1]] assert cgpm_crp == [cgpm_row_divide] infinite_mixture.observe(0, {0: 1}) # New CGPMs in the flexible CGPM after observing. cgpm_poisson = get_cgpms_by_output_index(infinite_mixture, 0) cgpm_normal = get_cgpms_by_output_index(infinite_mixture, 1) assert len(cgpm_poisson) == len(cgpm_normal) == 2 assert [cgpm_poisson[-1]] == [component_base.cgpms[0]] assert [cgpm_normal[-1]] == [component_base.cgpms[1]] assert cgpm_poisson[0].N == 1 assert cgpm_normal[0].N == 0 cgpm_crp = get_cgpms_by_output_index(infinite_mixture, 2) assert len(cgpm_crp) == 1 assert cgpm_crp[0].N == 1 assert cgpm_crp[0].data[0] == 0 # Misc. errors, no such output. with pytest.raises(Exception): get_cgpms_by_output_index(infinite_mixture, -1)