예제 #1
0
 def __init__(self,
              outputs,
              inputs,
              k=None,
              hypers=None,
              params=None,
              distargs=None,
              rng=None):
     self.rng = gu.gen_rng() if rng is None else rng
     self.outputs = outputs
     self.inputs = inputs
     self.rng = gu.gen_rng() if rng is None else rng
     assert len(self.outputs) == 1
     assert len(self.inputs) >= 1
     assert self.outputs[0] not in self.inputs
     assert len(distargs['inputs']['stattypes']) == len(self.inputs)
     self.stattypes = distargs['inputs']['stattypes']
     # Number of output categories and input dimension.
     # XXX WHATTA HACK. BayesDB passes in top-level kwargs, not in distargs.
     self.k = k if k is not None else int(distargs['k'])
     self.p = len(distargs['inputs']['stattypes'])
     # Sufficient statistics.
     self.N = 0
     self.data = Data(x=OrderedDict(), Y=OrderedDict())
     self.counts = [0] * self.k
     # Outlier and random forest parameters.
     if params is None: params = {}
     self.alpha = params.get('alpha', .1)
     self.regressor = params.get('forest', None)
     if self.regressor is None:
         self.regressor = RandomForestClassifier(random_state=self.rng)
예제 #2
0
def serialize_generic(Model, additional=None):
    """Model is either State or Engine class."""
    # Create categorical data of DATA_NUM_0 zeros and DATA_NUM_1 ones.
    data = np.random.normal(size=(100,5))
    data[:,0] = 0
    # Run a single chain for a few iterations.
    model = Model(
        data,
        cctypes=['bernoulli','normal','normal','normal','normal'],
        rng=gu.gen_rng(0))
    model.transition(N=1, checkpoint=1)
    # To metadata.
    metadata = model.to_metadata()
    modname = importlib.import_module(metadata['factory'][0])
    builder = getattr(modname, metadata['factory'][1])
    model = builder.from_metadata(metadata)
    # To JSON.
    json_metadata = json.dumps(model.to_metadata())
    model = builder.from_metadata(json.loads(json_metadata))
    # To pickle.
    with tempfile.NamedTemporaryFile(prefix='gpmcc-serialize') as temp:
        with open(temp.name, 'w') as f:
            model.to_pickle(f)
        with open(temp.name, 'r') as f:
            # Use the file itself
            model = Model.from_pickle(f, rng=gu.gen_rng(10))
            if additional:
                additional(model)
        # Use the filename as a string
        model = Model.from_pickle(temp.name, rng=gu.gen_rng(10))
        if additional:
            additional(model)
예제 #3
0
def test_dependence_probability_pairwise():
    cctypes, distargs = cu.parse_distargs(['normal', 'normal', 'normal'])

    T, Zv, _Zc = tu.gen_data_table(10, [.5, .5], [[.25, .25, .5], [.3, .7]],
                                   cctypes,
                                   distargs, [.95] * len(cctypes),
                                   rng=gu.gen_rng(100))

    outputs = [0, 1, 2]
    engine = Engine(T.T,
                    outputs=outputs,
                    cctypes=cctypes,
                    num_states=4,
                    distargs=distargs,
                    Zv={o: z
                        for o, z in zip(outputs, Zv)},
                    rng=gu.gen_rng(0))

    Ds = engine.dependence_probability_pairwise(multiprocess=0)
    assert len(Ds) == engine.num_states()
    assert all(np.shape(D) == (len(outputs), len(outputs)) for D in Ds)
    for D in Ds:
        for col0, col1 in itertools.product(outputs, outputs):
            i0 = outputs.index(col0)
            i1 = outputs.index(col1)
            actual = D[i0, i1]
            expected = Zv[i0] == Zv[i1]
            assert actual == expected

    Ds = engine.dependence_probability_pairwise(colnos=[0, 2], multiprocess=0)
    assert len(Ds) == engine.num_states()
    assert all(np.shape(D) == (2, 2) for D in Ds)
예제 #4
0
def engine():
    # Set up the data generation
    cctypes, distargs = cu.parse_distargs([
        'normal',
        'poisson',
        'bernoulli',
        'categorical(k=4)',
        'lognormal',
        'exponential',
        'beta',
        'geometric',
        'vonmises',
    ])

    T, Zv, Zc = tu.gen_data_table(20, [1], [[.25, .25, .5]],
                                  cctypes,
                                  distargs, [.95] * len(cctypes),
                                  rng=gu.gen_rng(10))

    return Engine(T.T,
                  cctypes=cctypes,
                  distargs=distargs,
                  num_states=4,
                  rng=gu.gen_rng(312),
                  multiprocess=False)
예제 #5
0
def test_transition_hypers(cctype):
    name, arg = cctype
    model = cu.cctype_class(name)(outputs=[0],
                                  inputs=None,
                                  distargs=arg,
                                  rng=gu.gen_rng(10))
    D, Zv, Zc = tu.gen_data_table(50, [1], [[.33, .33, .34]], [name], [arg],
                                  [.8],
                                  rng=gu.gen_rng(1))

    hypers_previous = model.get_hypers()
    for rowid, x in enumerate(np.ravel(D)[:25]):
        model.incorporate(rowid, {0: x}, None)
    model.transition_hypers(N=3)
    hypers_new = model.get_hypers()
    assert not all(
        np.allclose(hypers_new[hyper], hypers_previous[hyper])
        for hyper in hypers_new)

    for rowid, x in enumerate(np.ravel(D)[:25]):
        model.incorporate(rowid + 25, {0: x}, None)
    model.transition_hypers(N=3)
    hypers_newer = model.get_hypers()
    assert not all(
        np.allclose(hypers_new[hyper], hypers_newer[hyper])
        for hyper in hypers_newer)
예제 #6
0
def test_crp_simple(N, alpha, seed):
    # Obtain the partitions.
    A = gu.simulate_crp(N, alpha, rng=gu.gen_rng(seed))
    Nk = list(np.bincount(A))

    crp = simulate_crp_gpm(N, alpha, rng=gu.gen_rng(seed))

    assert A == crp.data.values()
    assert_crp_equality(alpha, Nk, crp)
예제 #7
0
def test_complex_independent_relationships_lovecat():
    rng = gu.gen_rng(1)
    D = rng.normal(size=(10, 1))
    T = np.repeat(D, 10, axis=1)
    Ci = [(2, 8), (0, 3)]
    Cd = [(2, 3), (0, 8)]
    state = State(T, cctypes=['normal'] * 10, Ci=Ci, Cd=Cd, rng=gu.gen_rng(0))
    state.transition_lovecat(N=1000, progress=1)
    vu.validate_crp_constrained_partition(state.Zv(), Cd, Ci, {}, {})
예제 #8
0
def test_naive_bayes_independence_lovecat():
    rng = gu.gen_rng(1)
    D = rng.normal(size=(10, 1))
    T = np.repeat(D, 10, axis=1)
    Ci = list(itertools.combinations(range(10), 2))
    state = State(T, cctypes=['normal'] * 10, Ci=Ci, rng=gu.gen_rng(0))
    state.transition(N=10, progress=0)
    vu.validate_crp_constrained_partition(state.Zv(), [], Ci, {}, {})
    state.transition_lovecat(N=100, progress=0)
    vu.validate_crp_constrained_partition(state.Zv(), [], Ci, {}, {})
예제 #9
0
def test_no_constraints():
    N, alpha = 10, .4
    Cd = Ci = []
    Rd = Ri = {}

    Z = gu.simulate_crp_constrained(
        N, alpha, Cd, Ci, Rd, Ri, rng=gu.gen_rng(0))
    assert vu.validate_crp_constrained_partition(Z, Cd, Ci, Rd, Ri)

    Z = gu.simulate_crp_constrained_dependent(
        N, alpha, Cd, rng=gu.gen_rng(0))
    assert vu.validate_crp_constrained_partition(Z, Cd, [], [], [])
예제 #10
0
def test_all_friends():
    N, alpha = 10, 1.4
    Cd = [range(N)]
    Ci = []
    Rd = Ri = {}

    Z = gu.simulate_crp_constrained(
        N, alpha, Cd, Ci, Rd, Ri, rng=gu.gen_rng(0))
    assert vu.validate_crp_constrained_partition(Z, Cd, Ci, Rd, Ri)

    Z = gu.simulate_crp_constrained_dependent(
        N, alpha, Cd, rng=gu.gen_rng(0))
    assert vu.validate_crp_constrained_partition(Z, Cd, [], [], [])
예제 #11
0
def test_dependence_probability():
    '''Test that Loom correctly recovers a 2-view dataset.'''
    D, Zv, Zc = tu.gen_data_table(n_rows=150,
                                  view_weights=None,
                                  cluster_weights=[
                                      [.2, .2, .2, .4],
                                      [.3, .2, .5],
                                  ],
                                  cctypes=['normal'] * 6,
                                  distargs=[None] * 6,
                                  separation=[0.95] * 6,
                                  view_partition=[0, 0, 0, 1, 1, 1],
                                  rng=gu.gen_rng(12))

    engine = Engine(
        D.T,
        outputs=[7, 2, 12, 80, 129, 98],
        cctypes=['normal'] * len(D),
        distargs=[None] * 6,
        rng=gu.gen_rng(122),
        num_states=20,
    )

    logscore0 = engine.logpdf_score()
    engine.transition_loom(N=100)
    logscore1 = engine.logpdf_score()
    assert numpy.mean(logscore1) > numpy.mean(logscore0)

    dependence_probability = numpy.mean(
        engine.dependence_probability_pairwise(), axis=0)

    assert dependence_probability[0, 1] > 0.8
    assert dependence_probability[1, 2] > 0.8
    assert dependence_probability[0, 2] > 0.8

    assert dependence_probability[3, 4] > 0.8
    assert dependence_probability[4, 5] > 0.8
    assert dependence_probability[3, 5] > 0.8

    assert dependence_probability[0, 3] < 0.2
    assert dependence_probability[0, 4] < 0.2
    assert dependence_probability[0, 5] < 0.2

    assert dependence_probability[1, 3] < 0.2
    assert dependence_probability[1, 4] < 0.2
    assert dependence_probability[1, 5] < 0.2

    assert dependence_probability[2, 3] < 0.2
    assert dependence_probability[2, 4] < 0.2
    assert dependence_probability[2, 5] < 0.2
예제 #12
0
파일: test_mvkde.py 프로젝트: wilsondy/cgpm
def test_serialize():
    rng = gu.gen_rng(1)

    data = rng.rand(20, 5)
    data[:10,-1] = 0
    data[10:,-1] = 1

    kde = MultivariateKde(
        range(5), None,
        distargs={O: {ST: [N, N, N, N, C], SA: [{},{},{},{},{'k':1}]}}, rng=rng)
    for rowid, x in enumerate(data):
        kde.incorporate(rowid, dict(zip(range(5), x)))
    kde.transition()

    metadata_s = json.dumps(kde.to_metadata())
    metadata_l = json.loads(metadata_s)

    modname = importlib.import_module(metadata_l['factory'][0])
    builder = getattr(modname, metadata_l['factory'][1])
    kde2 = builder.from_metadata(metadata_l, rng=rng)

    # Variable indexes.
    assert kde2.outputs == kde.outputs
    assert kde2.inputs == kde.inputs
    # Distargs.
    assert kde2.get_distargs() == kde.get_distargs()
    # Dataset.
    assert kde2.data == kde.data
    assert kde2.N == kde.N
    # Bandwidth params.
    assert np.allclose(kde2.bw, kde.bw)
    # Statistical types.
    assert kde2.stattypes == kde.stattypes
예제 #13
0
 def __init__(self, outputs, inputs, hypers, params, distargs, rng):
     assert len(outputs) == 1
     assert not inputs
     self.outputs = list(outputs)
     self.inputs = []
     self.data = dict()
     self.rng = gu.gen_rng() if rng is None else rng
예제 #14
0
def test_serialize():
    # Direct factor anaysis
    rng = gu.gen_rng(12)
    iris = sklearn.datasets.load_iris()

    fact = FactorAnalysis([1, 2, 3, 4, -5, 47], None, L=2, rng=rng)
    for i, row in enumerate(iris.data):
        fact.incorporate(i, {q: v for q, v in zip(fact.outputs, row)})

    metadata = json.dumps(fact.to_metadata())
    metadata = json.loads(metadata)

    modname = importlib.import_module(metadata['factory'][0])
    builder = getattr(modname, metadata['factory'][1])
    fact2 = builder.from_metadata(metadata, rng=rng)

    assert fact2.L == fact.L
    assert fact2.D == fact.D
    # Varible indexes.
    assert fact2.outputs == fact.outputs
    assert fact2.latents == fact.latents
    # Dataset.
    assert fact2.data == fact.data
    assert fact2.N == fact.N
    # Parameters of Factor Analysis.
    assert np.allclose(fact2.mux, fact.mux)
    assert np.allclose(fact2.Psi, fact.Psi)
    assert np.allclose(fact2.W, fact.W)
    # Parameters of joint distribution [x,z].
    assert np.allclose(fact2.mu, fact.mu)
    assert np.allclose(fact2.cov, fact.cov)
예제 #15
0
 def __init__(self, outputs, inputs, K=None, M=None, distargs=None,
         params=None, rng=None):
     # Input validation.
     self._validate_init(outputs, inputs, K, M, distargs, params, rng)
     # Default arguments.
     if params is None:
         params = {}
     if rng is None:
         rng = gu.gen_rng(1)
     if M is None:
         M = K
     # Build the object.
     self.rng = rng
     # Varible indexes.
     self.outputs = outputs
     self.inputs = []
     # Distargs.
     self.stattypes = distargs['outputs']['stattypes']
     self.statargs = distargs['outputs']['statargs']
     self.levels = {
         o: self.statargs[i]['k']
         for i, o in enumerate(outputs) if self.stattypes[i] != 'numerical'
     }
     # Dataset.
     self.data = OrderedDict()
     self.N = 0
     # Ordering of the chain.
     self.ordering = list(self.rng.permutation(self.outputs))
     # Number of nearest neighbors.
     self.K = K
     self.M = M
예제 #16
0
def state():
    # Set up the data generation
    cctypes, distargs = cu.parse_distargs(
        ['normal', 'poisson', 'bernoulli', 'lognormal', 'beta', 'vonmises'])
    T, Zv, Zc = tu.gen_data_table(30, [1], [[.25, .25, .5]],
                                  cctypes,
                                  distargs, [.95] * len(cctypes),
                                  rng=gu.gen_rng(0))
    T = T.T
    s = State(T,
              cctypes=cctypes,
              distargs=distargs,
              Zv={i: 0
                  for i in xrange(len(cctypes))},
              rng=gu.gen_rng(0))
    return s
예제 #17
0
def test_categorical_forest_manual_inputs_errors():
    state = State(
        T, cctypes=CCTYPES, distargs=DISTARGS, rng=gu.gen_rng(1))
    state.transition(N=1, progress=False)
    cat_id = CCTYPES.index('categorical')

    # Put 1201 into the first view.
    view_idx = min(state.views)
    state.incorporate_dim(
        T[:,CCTYPES.index('categorical')], outputs=[1201],
        cctype='categorical', distargs=DISTARGS[cat_id], v=view_idx)

    # Updating cctype with completely invalid input should raise.
    with pytest.raises(Exception):
        distargs = DISTARGS[cat_id].copy()
        distargs['inputs'] = [10000]
        state.update_cctype(1201, 'random_forest', distargs=distargs)

    # Updating cctype with input dimensions outside the view should raise.
    cols_in_view = state.views[view_idx].dims.keys()
    cols_out_view = [c for c in state.outputs if c not in cols_in_view]
    assert len(cols_in_view) > 0 and len(cols_out_view) > 0
    with pytest.raises(Exception):
        distargs = DISTARGS[cat_id].copy()
        distargs['inputs'] = cols_out_view
        state.update_cctype(1201, 'random_forest', distargs=distargs)

    # Updating cctype with no input dimensions should raise.
    with pytest.raises(Exception):
        distargs = DISTARGS[cat_id].copy()
        distargs['inputs'] = []
        state.update_cctype(1201, 'random_forest', distargs=distargs)
예제 #18
0
파일: vsinline.py 프로젝트: wilsondy/cgpm
 def __init__(self, outputs, inputs, rng=None, expression=None, **kwargs):
     # Set the rng.
     self.rng = rng if rng is not None else gu.gen_rng(1)
     seed = self.rng.randint(1, 2**31 - 1)
     # Basic input and output checking.
     if len(outputs) != 1:
         raise ValueError('InlineVsCgpm produces 1 output only.')
     if len(set(inputs)) != len(inputs):
         raise ValueError('Non unique inputs: %s' % inputs)
     if not all(o not in inputs for o in outputs):
         raise ValueError('Duplicates: %s, %s' % (inputs, outputs))
     if not all(i not in outputs for i in inputs):
         raise ValueError('Duplicates: %s, %s' % (inputs, outputs))
     # Retrieve the expression.
     if expression is None:
         raise ValueError('Missing expression: %s' % expression)
     # Save the outputs.
     self.outputs = outputs
     # Check correct inputs against the expression.
     self._validate_expression_concrete(expression, inputs)
     # Store the inputs and expression.
     self.inputs = inputs
     self.expression = expression
     # Execute the program in the ripl to make sure it parses.
     self.ripl = vs.make_lite_ripl(seed=seed)
     self.ripl.execute_program(self.expression)
     self.ripl.execute_program('assume uniform = uniform_continuous')
예제 #19
0
def test_incorporate_session():
    rng = gu.gen_rng(4)
    state = State(X,
                  cctypes=['normal'] * 5,
                  Zv={
                      0: 0,
                      1: 0,
                      2: 1,
                      3: 1,
                      4: 2
                  },
                  rng=rng)
    # Incorporate row into a singleton cluster for all views.
    previous = [len(state.views[v].Nk()) for v in [0, 1, 2]]
    data = {i: rng.normal() for i in xrange(5)}
    clusters = {
        state.views[0].outputs[0]: previous[0],
        state.views[1].outputs[0]: previous[1],
        state.views[2].outputs[0]: previous[2],
    }
    state.incorporate(state.n_rows(), gu.merged(data, clusters))
    assert [len(state.views[v].Nk()) for v in [0,1,2]] == \
        [p+1 for p in previous]
    # Incorporate row without specifying clusters, and some missing values
    data = {i: rng.normal() for i in xrange(2)}
    state.incorporate(state.n_rows(), data)
    state.transition(N=3)
    # Remove the incorporated rowid.
    state.unincorporate(state.n_rows() - 1)
    state.transition(N=3)
예제 #20
0
def test_view_serialize():
    data = np.random.normal(size=(100,5))
    data[:,0] = 0
    # Run a single chain for a few iterations.
    outputs = [2,4,6,8,10]
    X = {c:data[:,i].tolist() for i,c in enumerate(outputs)}
    model = View(
        X,
        cctypes=['bernoulli','normal','normal','normal','normal'],
        outputs=[1000]+outputs,
        rng=gu.gen_rng(0))
    model.transition(N=1)
    # Pick out some data.
    # To metadata.
    metadata = model.to_metadata()
    modname = importlib.import_module(metadata['factory'][0])
    builder = getattr(modname, metadata['factory'][1])
    model2 = builder.from_metadata(metadata)
    # Pick out some data.
    assert np.allclose(model.alpha(), model.alpha())
    assert dict(model2.Zr()) == dict(model.Zr())
    assert np.allclose(
        model.logpdf(-1, {0:0, 1:1}, {2:0}),
        model2.logpdf(-1, {0:0, 1:1}, {2:0}))
    assert np.allclose(
        model.logpdf(-1, {0:0, 1:1}),
        model2.logpdf(-1, {0:0, 1:1}))
예제 #21
0
 def __init__(self,
              outputs,
              inputs,
              hypers=None,
              params=None,
              distargs=None,
              rng=None):
     if params is None:
         params = {}
     self.outputs = outputs
     self.inputs = inputs
     self.rng = gu.gen_rng() if rng is None else rng
     assert len(self.outputs) == 1
     assert len(self.inputs) >= 1
     assert self.outputs[0] not in self.inputs
     assert len(distargs['inputs']['stattypes']) == len(self.inputs)
     self.input_cctypes = distargs['inputs']['stattypes']
     self.input_ccargs = distargs['inputs']['statargs']
     # Determine number of covariates (with 1 bias term) and number of
     # categories for categorical covariates.
     p, counts = zip(*[
         self._predictor_count(cctype, ccarg)
         for cctype, ccarg in zip(self.input_cctypes, self.input_ccargs)
     ])
     self.p = sum(p) + 1
     self.inputs_discrete = {i: c for i, c in enumerate(counts) if c}
     # Dataset.
     self.N = 0
     self.data = Data(x=OrderedDict(), Y=OrderedDict())
     # Noise of the regression.
     self.noise = params.get('noise', 1)
     # Regressor.
     self.regressor = params.get('regressor', None)
     if self.regressor is None:
         self.regressor = LinearRegression()
예제 #22
0
파일: test_cmi.py 프로젝트: wilsondy/cgpm
def test_cmi_different_views__ci_():
    rng = gen_rng(0)
    T = np.zeros((50,3))
    T[:,0] = rng.normal(loc=-5, scale=1, size=50)
    T[:,1] = rng.normal(loc=2, scale=2, size=50)
    T[:,2] = rng.normal(loc=12, scale=3, size=50)
    state = State(
        T,
        outputs=[0, 1, 2],
        cctypes=['normal','normal','normal'],
        Zv={0:0, 1:1, 2:2},
        rng=rng
    )
    state.transition(N=30,
        kernels=['alpha','view_alphas','column_params','column_hypers','rows'])

    mi01 = state.mutual_information([0], [1])
    mi02 = state.mutual_information([0], [2])
    mi12 = state.mutual_information([1], [2])

    # Marginal MI all zero.
    assert np.allclose(mi01, 0)
    assert np.allclose(mi02, 0)
    assert np.allclose(mi12, 0)

    # CMI on variable in other view equal to MI.
    assert np.allclose(state.mutual_information([0], [1], {2:10}), mi01)
    assert np.allclose(state.mutual_information([0], [2], {1:0}), mi02)
    assert np.allclose(state.mutual_information([1], [2], {0:-2}), mi12)
    assert np.allclose(state.mutual_information([1], [2], {0:None}, T=5), mi12)
예제 #23
0
파일: linreg.py 프로젝트: wilsondy/cgpm
 def __init__(self, outputs, inputs, hypers=None, params=None, distargs=None,
         rng=None):
     # io data.
     self.outputs = outputs
     self.inputs = inputs
     self.rng = gu.gen_rng() if rng is None else rng
     assert len(self.outputs) == 1
     assert len(self.inputs) >= 1
     assert self.outputs[0] not in self.inputs
     assert len(distargs['inputs']['stattypes']) == len(self.inputs)
     self.input_cctypes = distargs['inputs']['stattypes']
     self.input_ccargs = distargs['inputs']['statargs']
     # Determine number of covariates (with 1 bias term) and number of
     # categories for categorical covariates.
     p, counts = zip(*[
         self._predictor_count(cctype, ccarg) for cctype, ccarg
         in zip(self.input_cctypes, self.input_ccargs)])
     self.p = sum(p)+1
     self.inputs_discrete = {i:c for i, c in enumerate(counts) if c}
     # For numerical covariates, map index in inputs to index in code.
     self.lookup_numerical_index = self.input_to_code_index()
     # Dataset.
     self.N = 0
     self.data = Data(x=OrderedDict(), Y=OrderedDict())
     # Hyper parameters.
     if hypers is None: hypers = {}
     self.a = hypers.get('a', 1.)
     self.b = hypers.get('b', 1.)
     self.mu = hypers.get('mu', np.zeros(self.p))
     self.V = hypers.get('V', np.eye(self.p))
예제 #24
0
def test_two_views_column_partition_normal__ci_(lovecat):
    D = retrieve_normal_dataset()

    engine = Engine(D.T,
                    outputs=[5, 0, 1, 2, 3, 4],
                    cctypes=['normal'] * len(D),
                    rng=gu.gen_rng(12),
                    num_states=64)

    if lovecat:
        engine.transition_lovecat(N=200)
    else:
        engine.transition(N=200)

    P = engine.dependence_probability_pairwise()
    R1 = engine.row_similarity_pairwise(cols=[5, 0, 1])
    R2 = engine.row_similarity_pairwise(cols=[2, 3, 4])

    pu.plot_clustermap(P)
    pu.plot_clustermap(R1)
    pu.plot_clustermap(R2)

    P_THEORY = [
        [1, 1, 1, 0, 0, 0],
        [1, 1, 1, 0, 0, 0],
        [1, 1, 1, 0, 0, 0],
        [0, 0, 0, 1, 1, 1],
        [0, 0, 0, 1, 1, 1],
        [0, 0, 0, 1, 1, 1],
    ]
    return engine
예제 #25
0
def test_two_views_row_partition_bernoulli__ci_(lovecat):
    D = retrieve_bernoulli_dataset()

    if lovecat:
        engine = Engine(D.T,
                        cctypes=['categorical'] * len(D),
                        distargs=[{
                            'k': 2
                        }] * len(D),
                        Zv={
                            0: 0,
                            1: 0,
                            2: 1,
                            3: 1
                        },
                        rng=gu.gen_rng(12),
                        num_states=64)
        engine.transition_lovecat(N=100,
                                  kernels=[
                                      'row_partition_assignments',
                                      'row_partition_hyperparameters',
                                      'column_hyperparameters',
                                  ])
    else:
        engine = Engine(D.T,
                        cctypes=['bernoulli'] * len(D),
                        Zv={
                            0: 0,
                            1: 0,
                            2: 1,
                            3: 1
                        },
                        rng=gu.gen_rng(12),
                        num_states=64)
        engine.transition(N=100,
                          kernels=[
                              'view_alphas',
                              'rows',
                              'column_hypers',
                          ])

    R1 = engine.row_similarity_pairwise(cols=[0, 1])
    R2 = engine.row_similarity_pairwise(cols=[2, 3])

    pu.plot_clustermap(R1)
    pu.plot_clustermap(R2)
    return engine
예제 #26
0
파일: piecewise.py 프로젝트: wilsondy/cgpm
 def from_metadata(cls, metadata, rng=None):
     if rng is None:
         rng = gu.gen_rng(0)
     return cls(outputs=metadata['outputs'],
                inputs=metadata['inputs'],
                sigma=metadata['sigma'],
                flip=metadata['flip'],
                rng=rng)
예제 #27
0
def test_poisson_categorical():
    state = State(
        T, cctypes=CCTYPES, distargs=DISTARGS, rng=gu.gen_rng(0))
    state.transition(N=1, progress=False)
    state.update_cctype(CCTYPES.index('categorical'), 'poisson')
    state.transition(N=1, progress=False)
    state.update_cctype(CCTYPES.index('categorical'), 'categorical',
        distargs={'k':2})
예제 #28
0
def test_independence_inference_quality_lovecat():
    rng = gu.gen_rng(584)
    column_view_1 = rng.normal(loc=0, size=(50, 1))

    column_view_2 = np.concatenate((
        rng.normal(loc=10, size=(25, 1)),
        rng.normal(loc=20, size=(25, 1)),
    ))

    data_view_1 = np.repeat(column_view_1, 4, axis=1)
    data_view_2 = np.repeat(column_view_2, 4, axis=1)
    data = np.column_stack((data_view_1, data_view_2))

    Zv0 = {i: 0 for i in xrange(8)}
    state = State(data, Zv=Zv0, cctypes=['normal'] * 8, rng=gu.gen_rng(10))
    state.transition_lovecat(N=100, progress=1)
    for col in [
            0,
            1,
            2,
            3,
    ]:
        assert state.Zv(col) == state.Zv(0)
    for col in [4, 5, 6, 7]:
        assert state.Zv(col) == state.Zv(4)
    assert state.Zv(0) != state.Zv(4)

    # Get lovecat to merge the dependent columns into one view.
    Cd = [(0, 1), (2, 3), (4, 5), (6, 7)]
    Zv0 = {0: 0, 1: 0, 2: 1, 3: 1, 4: 2, 5: 2, 6: 3, 7: 3}
    state = State(data,
                  Zv=Zv0,
                  cctypes=['normal'] * 8,
                  Cd=Cd,
                  rng=gu.gen_rng(1))
    state.transition_lovecat(N=100, progress=1)
    for col in [
            0,
            1,
            2,
            3,
    ]:
        assert state.Zv(col) == state.Zv(0)
    for col in [4, 5, 6, 7]:
        assert state.Zv(col) == state.Zv(4)
    assert state.Zv(0) != state.Zv(4)
예제 #29
0
 def from_metadata(cls, metadata, rng=None):
     if rng is None:
         rng = gu.gen_rng(0)
     return cls(
         outputs=metadata['outputs'],
         inputs=metadata['inputs'],
         rng=rng,
     )
예제 #30
0
def test_complex_relationships():
    N, alpha = 15, 10
    Cd = [(0,1,4), (2,3,5), (8,7)]
    Ci = [(2,8), (0,3)]
    Rd = Ri = {}
    Z = gu.simulate_crp_constrained(
        N, alpha, Cd, Ci, Rd, Ri, rng=gu.gen_rng(0))
    assert vu.validate_crp_constrained_partition(Z, Cd, Ci, Rd, Ri)