def test_crosscat_two_component_nominal__ci_():
    prng = get_prng(10)
    integration = pytest.config.getoption('--integration')
    # Build CGPM with adversarial initialization.
    crosscat = Product([
        FlexibleRowMixture(
            cgpm_row_divide=CRP([-1], [], rng=prng),
            cgpm_components_base=Product([
                Normal([0], [], rng=prng),
            ], rng=prng),
            rng=prng),
        FlexibleRowMixture(
            cgpm_row_divide=CRP([-2], [], rng=prng),
            cgpm_components_base=Product([
                Normal([1], [], rng=prng),
                Categorical([50], [], distargs={'k':4}, rng=prng),
            ], rng=prng),
            rng=prng),
    ], rng=prng,)
    # Fetch data and add a nominal variable.
    data_xy = make_bivariate_two_clusters(prng)
    data_z = np.zeros(len(data_xy))
    data_z[:15] = 0
    data_z[15:30] = 1
    data_z[30:45] = 2
    data_z[45:60] = 3
    data = np.column_stack((data_xy, data_z))
    # Observe.
    for rowid, row in enumerate(data):
        crosscat.observe(rowid, {0: row[0], 1: row[1], 50:row[2]})
    # Run inference.
    synthesizer = GibbsCrossCat(crosscat)
    synthesizer.transition(N=(50 if integration else 1), progress=False)
    synthesizer.transition(N=(100 if integration else 1),
            kernels=['hypers_distributions','hypers_row_divide'],
            progress=False)

    # Assert views are merged into one.
    assert not integration or len(synthesizer.crosscat.cgpms) == 1
    crp_output = synthesizer.crosscat.cgpms[0].cgpm_row_divide.outputs[0]

    # Check joint samples for all nominals.
    samples = synthesizer.crosscat.simulate(None, [crp_output,0,1,50], N=250)
    not integration or check_sampled_data(samples, [0, 7], 3, 110)
    # Check joint samples for nominals [0, 2].
    samples_a = [s for s in samples if s[50] in [0,2]]
    not integration or check_sampled_data(samples_a, [0, 7], 3, 45)
    # Check joint samples for nominals [1, 3].
    samples_b = [s for s in samples if s[50] in [1,3]]
    not integration or check_sampled_data(samples_b, [0, 7], 3, 45)

    # Check conditional samples in correct quadrants.
    means = {0:0, 1:0, 2:7, 3:7}
    for z in [0, 1, 2, 3]:
        samples = synthesizer.crosscat.simulate(None, [0, 1], {50:z}, N=100)
        not integration or check_sampled_data(samples, [means[z]], 3, 90)
Ejemplo n.º 2
0
def test_transition_hypers_basic():
    prng = get_prng(2)
    component0 = Product([
        Poisson([0], [], hypers={'m': 100}, rng=prng),
        Normal([1], [], hypers={'m': 100}, rng=prng)
    ],
                         rng=prng)
    cgpm_row_divide = CRP([2], [], rng=prng)
    infinite_mixture = FlexibleRowMixture(cgpm_row_divide=cgpm_row_divide,
                                          cgpm_components_base=component0,
                                          rng=prng)
    # Make normal observations.
    infinite_mixture.observe(0, {1: 100})
    infinite_mixture.observe(1, {1: 300})
    infinite_mixture.observe(2, {1: -300})
    # Fetch log score.
    log_score0 = infinite_mixture.logpdf_score()
    # Run inference.
    normal_cgpms = get_cgpms_by_output_index(infinite_mixture, 1)
    grids_normal = transition_hyper_grids(normal_cgpms, 30)
    hypers_normal = [
        transition_hypers(normal_cgpms, grids_normal, prng) for _i in xrange(2)
    ]
    assert not all(hypers == hypers_normal[0] for hypers in hypers_normal)
    log_score1 = infinite_mixture.logpdf_score()
    assert log_score0 < log_score1
def test_flexible_mixture_three_component__ci_():
    prng = get_prng(2)
    flexible_mixture = FlexibleRowMixture(cgpm_row_divide=CRP([1], [],
                                                              rng=prng),
                                          cgpm_components_base=Normal(
                                              [0], [], rng=prng),
                                          rng=prng)
    integration = pytest.config.getoption('--integration')
    run_mixture_test(flexible_mixture, integration, prng)
def get_crosscat(prng):
    view = FlexibleRowMixture(
        cgpm_row_divide=CRP([2], [], rng=prng),
        cgpm_components_base=Product([
            Normal([0], [], rng=prng),
            Normal([1], [], rng=prng),
        ], rng=prng),
        rng=prng)
    return Product(cgpms=[view], rng=prng)
Ejemplo n.º 5
0
def make_random_view(outputs, distributions, rng):
    crp_output = rng.randint(2**32 - 1)
    cgpm_row_divide = CRP([crp_output], [], rng=rng)
    cgpm_base_list = [
        make_random_primitive(output, distribution, rng)
        for output, distribution in zip(outputs, distributions)
    ]
    view = FlexibleRowMixture(cgpm_row_divide=cgpm_row_divide,
                              cgpm_components_base=Product(cgpm_base_list,
                                                           rng=rng),
                              rng=rng)
    return view
Ejemplo n.º 6
0
def get_crosscat_synthesizer(prng):
    view = FlexibleRowMixture(cgpm_row_divide=CRP([2], [], rng=prng),
                              cgpm_components_base=Product([
                                  Normal([0], [], rng=prng),
                                  Normal([1], [], rng=prng),
                              ],
                                                           rng=prng),
                              rng=prng)
    crosscat = Product(cgpms=[view], rng=prng)
    data = make_bivariate_two_clusters(prng)
    for rowid, row in enumerate(data):
        crosscat.observe(rowid, {0: row[0], 1: row[1]})
    return GibbsCrossCat(crosscat)
Ejemplo n.º 7
0
def get_crosscat(prng):
    view0 = FlexibleRowMixture(
        cgpm_row_divide=CRP([-1], [], rng=prng),
        cgpm_components_base=Product([
            Normal([0], [], rng=prng),
            Normal([1], [], rng=prng),
        ], rng=prng),
        rng=prng)
    view1 = FlexibleRowMixture(
        cgpm_row_divide=CRP([-2], [], rng=prng),
        cgpm_components_base=Product([
            Poisson([2], [], rng=prng),
            Normal([3], [], rng=prng),
            Normal([4], [], rng=prng),
        ], rng=prng),
        rng=prng)
    view2 = FlexibleRowMixture(
        cgpm_row_divide=CRP([-3], [], rng=prng),
        cgpm_components_base=Product([
            Categorical([5], [], distargs={'k':4}, rng=prng),
        ], rng=prng),
        rng=prng)
    return Product([view0, view1, view2], rng=prng)
Ejemplo n.º 8
0
def test_product_mixture_walk():
    prng = get_prng(2)
    component_base = Product([
        Poisson([0], [], hypers={
            'a': 10,
            'b': 1
        }, rng=prng),
        Normal([1], [], hypers={'m': 100}, rng=prng)
    ],
                             rng=prng)
    cgpm_row_divide = CRP([2], [], rng=prng)
    infinite_mixture = FlexibleRowMixture(cgpm_row_divide=cgpm_row_divide,
                                          cgpm_components_base=component_base,
                                          rng=prng)
    # Only the base CGPMs in the flexible mixture.
    cgpm_poisson = get_cgpms_by_output_index(infinite_mixture, 0)
    cgpm_normal = get_cgpms_by_output_index(infinite_mixture, 1)
    cgpm_crp = get_cgpms_by_output_index(infinite_mixture, 2)
    assert cgpm_poisson == [component_base.cgpms[0]]
    assert cgpm_normal == [component_base.cgpms[1]]
    assert cgpm_crp == [cgpm_row_divide]
    infinite_mixture.observe(0, {0: 1})
    # New CGPMs in the flexible CGPM after observing.
    cgpm_poisson = get_cgpms_by_output_index(infinite_mixture, 0)
    cgpm_normal = get_cgpms_by_output_index(infinite_mixture, 1)
    assert len(cgpm_poisson) == len(cgpm_normal) == 2
    assert [cgpm_poisson[-1]] == [component_base.cgpms[0]]
    assert [cgpm_normal[-1]] == [component_base.cgpms[1]]
    assert cgpm_poisson[0].N == 1
    assert cgpm_normal[0].N == 0
    cgpm_crp = get_cgpms_by_output_index(infinite_mixture, 2)
    assert len(cgpm_crp) == 1
    assert cgpm_crp[0].N == 1
    assert cgpm_crp[0].data[0] == 0
    # Misc. errors, no such output.
    with pytest.raises(Exception):
        get_cgpms_by_output_index(infinite_mixture, -1)
Ejemplo n.º 9
0
def test_add_remove():
    prng = get_prng(2)
    mixture0 = FlexibleRowMixture(
        cgpm_row_divide=CRP([2], [], rng=prng),
        cgpm_components_base=Product([
            Normal([0], [], rng=prng),
            Normal([1], [], rng=prng),
        ], rng=prng),
        rng=prng)
    for rowid, row in enumerate([[0,.9] ,[.5, 1], [-.5, 1.2]]):
        mixture0.observe(rowid, {0:row[0], 1:row[1]})

    mixture1 = remove_cgpm(mixture0, 0)
    assert mixture0.outputs == [2, 0, 1]
    assert mixture1.outputs == [2, 1]

    mixture2 = add_cgpm(mixture1, Normal([0], [], rng=prng))
    assert mixture0.outputs == [2, 0, 1]
    assert mixture1.outputs == [2, 1]
    assert mixture2.outputs == [2, 1, 0]

    mixture3 = remove_cgpm(mixture2, 1)
    assert mixture0.outputs == [2, 0, 1]
    assert mixture1.outputs == [2, 1]
    assert mixture2.outputs == [2, 1, 0]
    assert mixture3.outputs == [2, 0]

    mixture4 = remove_cgpm(mixture3, 0)
    assert mixture0.outputs == [2, 0, 1]
    assert mixture1.outputs == [2, 1]
    assert mixture2.outputs == [2, 1, 0]
    assert mixture3.outputs == [2, 0]
    assert mixture4.outputs == [2]

    with pytest.raises(Exception):
        # Cannot remove the cgpm_row_divide for a mixture.
        mixture3 = remove_cgpm(mixture2, 2)
Ejemplo n.º 10
0
def test_crosscat_add_remove():
    prng = get_prng(2)
    crosscat =  get_crosscat(prng)
    infinite_mixture4 = FlexibleRowMixture(
        cgpm_row_divide=CRP([-4], [], rng=prng),
        cgpm_components_base=Product([
            Categorical([6], [], distargs={'k':4}, rng=prng),
        ], rng=prng),
        rng=prng)
    crosscat = add_cgpm(crosscat, infinite_mixture4)
    assert crosscat.outputs == [-1, 0, 1, -2, 2, 3, 4, -3, 5, -4, 6]
    crosscat = remove_cgpm(crosscat, -1)
    assert crosscat.outputs == [-2, 2, 3, 4, -3, 5, -4, 6]
    crosscat = remove_cgpm(crosscat, 5)
    assert crosscat.outputs == [-2, 2, 3, 4, -4, 6]
def test_crosscat_three_component_cpp__ci_():
    prng = get_prng(12)
    integration = pytest.config.getoption('--integration')
    view = FlexibleRowMixture(cgpm_row_divide=CRP([1], [], rng=prng),
                              cgpm_components_base=Product(
                                  cgpms=[Normal([0], [], rng=prng)], rng=prng),
                              rng=prng)
    crosscat = Product(cgpms=[view], rng=prng)

    def func_inference(crosscat):
        n_step = 1000 if integration else 1
        synthesizer = GibbsCrossCat(crosscat)
        synthesizer.transition_structure_cpp(N=n_step)
        synthesizer.transition_hypers_distributions()
        synthesizer.transition_hypers_row_divide()
        return synthesizer

    run_crosscat_test(crosscat, func_inference, integration, prng)
Ejemplo n.º 12
0
def test_transition_crp_mixture():
    prng = get_prng(2)
    data = np.concatenate((
        prng.normal(loc=0, scale=2, size=20),
        prng.normal(loc=30, scale=1, size=20),
        prng.normal(loc=-30, scale=1, size=20),
    ))
    infinite_mixture = FlexibleRowMixture(cgpm_row_divide=CRP([1], [],
                                                              rng=prng),
                                          cgpm_components_base=Normal(
                                              [0], [], rng=prng),
                                          rng=prng)
    for rowid, value in enumerate(data):
        infinite_mixture.observe(rowid, {0: value})
    cgpms = {
        0: get_cgpms_by_output_index(infinite_mixture, 0),
        1: get_cgpms_by_output_index(infinite_mixture, 1),
    }
    grids = {
        0: transition_hyper_grids(cgpms[0], 30),
        1: transition_hyper_grids(cgpms[1], 30),
    }
    for _step in xrange(50):
        rowids = prng.permutation(range(len(data)))
        for rowid in rowids:
            transition_rows(infinite_mixture, rowid, prng)
        for output in infinite_mixture.outputs:
            transition_hypers(cgpms[output], grids[output], prng)
    rowids = range(60)
    assignments0 = [
        infinite_mixture.simulate(r, [1])[1] for r in rowids[00:20]
    ]
    assignments1 = [
        infinite_mixture.simulate(r, [1])[1] for r in rowids[20:40]
    ]
    assignments2 = [
        infinite_mixture.simulate(r, [1])[1] for r in rowids[40:60]
    ]
    mode0 = Counter(assignments0).most_common(1)[0][0]
    mode1 = Counter(assignments1).most_common(1)[0][0]
    mode2 = Counter(assignments2).most_common(1)[0][0]
    assert sum(a == mode0
               for a in assignments0) > int(0.95 * len(assignments0))
    assert sum(a == mode1
               for a in assignments1) > int(0.95 * len(assignments1))
    assert sum(a == mode2
               for a in assignments2) > int(0.95 * len(assignments2))
Ejemplo n.º 13
0
def test_simple_product_as_chain():
    prng = get_prng(2)
    component0 = Chain([
        Poisson([0], [], hypers={'a': 10, 'b': 1}, rng=prng),
        Normal([1], [], hypers={'m':100}, rng=prng)
        ],
        rng=prng)
    cgpm_row_divide = CRP([2], [], rng=prng)
    infinite_mixture = FlexibleRowMixture(
        cgpm_row_divide=cgpm_row_divide,
        cgpm_components_base=component0,
        rng=prng)
    assert infinite_mixture.cgpm_row_divide.support() == [0]
    # Test logpdf identities.
    lp0 = infinite_mixture.logpdf(None, {0:1})
    assert lp0 < 0
    lp1 = infinite_mixture.logpdf(None, {0:1, 2:0})
    assert np.allclose(lp0, lp1)
    lp2 = infinite_mixture.logpdf(None, {0:1, 2:1})
    assert lp2 == -float('inf')
    # Add an observation.
    infinite_mixture.observe(0, {1:100})
    lp0 = infinite_mixture.logpdf(None, {1:100, 2:0}, constraints={0:1})
    lp1 = infinite_mixture.logpdf(None, {1:100, 2:1}, constraints={0:1})
    lp2 = infinite_mixture.logpdf(None, {1:100, 2:2}, constraints={0:1})
    assert lp1 < lp0
    assert lp2 == float('-inf')
    # Remove observation.
    observation = infinite_mixture.unobserve(0)
    assert observation == ({1:100, 2:0}, {})
    # Remove observation again.
    with pytest.raises(Exception):
        infinite_mixture.unobserve(0)
    # Add more observations.
    infinite_mixture.observe(0, {1:100})
    infinite_mixture.observe(1, {1:300})
    infinite_mixture.observe(2, {0:2})
    # Constrained cluster has zero density.
    with pytest.raises(ValueError):
        infinite_mixture.logpdf(None, {0:1}, constraints={2:10})
    with pytest.raises(ValueError):
        infinite_mixture.logpdf(None, {0:1}, constraints={2:10})
    # Convert to/from metadata and assert unobserves return correct data.
    metadata = infinite_mixture.to_metadata()
    infinite_mixture2 = FlexibleRowMixture.from_metadata(metadata, prng)
    assert infinite_mixture2.unobserve(0) == \
        ({1:100, 2: infinite_mixture.cgpm_row_divide.data[0]}, {})
    assert infinite_mixture2.unobserve(1) == \
        ({1:300, 2: infinite_mixture.cgpm_row_divide.data[1]}, {})
    assert infinite_mixture2.unobserve(2) == \
        ({0:2, 2: infinite_mixture.cgpm_row_divide.data[2]}, {})