def test_cmi_different_views__ci_(): rng = gen_rng(0) T = np.zeros((50,3)) T[:,0] = rng.normal(loc=-5, scale=1, size=50) T[:,1] = rng.normal(loc=2, scale=2, size=50) T[:,2] = rng.normal(loc=12, scale=3, size=50) state = State( T, outputs=[0, 1, 2], cctypes=['normal','normal','normal'], Zv={0:0, 1:1, 2:2}, rng=rng ) state.transition(N=30, kernels=['alpha','view_alphas','column_params','column_hypers','rows']) mi01 = state.mutual_information([0], [1]) mi02 = state.mutual_information([0], [2]) mi12 = state.mutual_information([1], [2]) # Marginal MI all zero. assert np.allclose(mi01, 0) assert np.allclose(mi02, 0) assert np.allclose(mi12, 0) # CMI on variable in other view equal to MI. assert np.allclose(state.mutual_information([0], [1], {2:10}), mi01) assert np.allclose(state.mutual_information([0], [2], {1:0}), mi02) assert np.allclose(state.mutual_information([1], [2], {0:-2}), mi12) assert np.allclose(state.mutual_information([1], [2], {0:None}, T=5), mi12)
def test_naive_bayes_independence(): rng = gu.gen_rng(1) D = rng.normal(size=(10, 1)) T = np.repeat(D, 10, axis=1) Ci = list(itertools.combinations(range(10), 2)) state = State(T, cctypes=['normal'] * 10, Ci=Ci, rng=rng) state.transition(N=10, progress=0) vu.validate_crp_constrained_partition(state.Zv(), [], Ci, {}, {})
def test_complex_independent_relationships(): rng = gu.gen_rng(1) D = rng.normal(size=(10, 1)) T = np.repeat(D, 10, axis=1) Ci = [(2, 8), (0, 3)] state = State(T, cctypes=['normal'] * 10, Ci=Ci, rng=rng) state.transition(N=10, progress=0) vu.validate_crp_constrained_partition(state.Zv(), [], Ci, {}, {})
def test_poisson_categorical(): state = State( T, cctypes=CCTYPES, distargs=DISTARGS, rng=gu.gen_rng(0)) state.transition(N=1, progress=False) state.update_cctype(CCTYPES.index('categorical'), 'poisson') state.transition(N=1, progress=False) state.update_cctype(CCTYPES.index('categorical'), 'categorical', distargs={'k':2})
def test_geometric_exponential(): state = State( T, cctypes=CCTYPES, distargs=DISTARGS, rng=gu.gen_rng(0)) state.transition(N=1, progress=False) state.update_cctype(CCTYPES.index('geometric'), 'exponential') state.transition(N=1, progress=False) # Incompatible numeric conversion. with pytest.raises(Exception): state.update_cctype(CCTYPES.index('exponential'), 'geometric')
def generate_gaussian_samples(): state = State(D, cctypes=['normal', 'normal'], Zv={ 0: 0, 1: 0 }, rng=gu.gen_rng(0)) view = state.view_for(1) state.transition(S=15, kernels=['rows', 'column_params', 'column_hypers']) samples = view.simulate(-1, [0, 1, view.outputs[0]], N=100) return [replace_key(s, view.outputs[0], -1) for s in samples]
def state(): rng = gu.gen_rng(5) rows = 120 cctypes = ['normal', 'bernoulli', 'normal'] G = generate_quadrants(rows, rng) B, Zv, Zrv = tu.gen_data_table(rows, [1], [[.5, .5]], ['bernoulli'], [None], [.95], rng=rng) T = np.column_stack((G, B.T))[:, [0, 2, 1]] state = State(T, outputs=[0, 1, 2], cctypes=cctypes, rng=rng) state.transition(N=20) return state
def init_view_state(data, iters, cctypes): if isinstance(data, list): data = np.array(data) D = len(data[0]) outputs = range(D) X = {c: data[:, i].tolist() for i, c in enumerate(outputs)} view = View(X, cctypes=cctypes, outputs=[1000] + outputs, rng=RNG) state = State(data[:, 0:D], outputs=outputs, cctypes=cctypes, rng=RNG) if iters > 0: view.transition(iters) state.transition(iters) return view, state
def gen_state_cgpm(get_data): outputs, data, assignments, cctypes, distargs = get_data() state = State(outputs=outputs, X=data, cctypes=cctypes, distargs=distargs, Zv={output: 0 for output in outputs}, Zrv={0: assignments}, view_alphas={0: 1.5}, rng=gu.gen_rng(1)) for i in xrange(10): state.transition_dim_hypers() return state
def test_convert_cgpm_to_cgpm2(): prng = get_prng(2) data = np.concatenate(( prng.normal(loc=0, scale=2, size=20), prng.normal(loc=30, scale=1, size=20), prng.normal(loc=-30, scale=1, size=20), )) state = State(X=np.reshape(data, (len(data), 1)), outputs=[0], cctypes=['normal'], rng=prng) view_cgpm1 = state.views[0] view_cgpm1.transition(N=5) # Convert product = convert_cgpm_state_to_cgpm2(state, prng) view_cgpm2 = product.cgpms[0] # Verify row assignments. assignments0 = view_cgpm1.Zr() partition0 = [[r for r, z in assignments0.iteritems() if z == u] for u in set(assignments0.values())] assignments1 = view_cgpm2.cgpm_row_divide.data partition1 = [[r for r, z in assignments1.iteritems() if z == u] for u in set(assignments1.values())] partition0_sorted = sorted(partition0, key=min) partition1_sorted = sorted(partition1, key=min) assert partition0_sorted == partition1_sorted # Verify hyperparameters. hypers0 = view_cgpm1.dims[0].hypers hypers1 = view_cgpm2.cgpm_components_array.cgpm_base.cgpms[0].get_hypers() assert hypers0 == hypers1 # Verify CRP alpha. alpha0 = view_cgpm1.crp.hypers alpha1 = view_cgpm2.cgpm_row_divide.get_hypers() assert alpha0 == alpha1
def test_cmi_marginal_crash(): X = np.eye(5) cctypes = ['normal'] * 5 s = State(X, Zv={0:0, 1:0, 2:0, 3:1, 4:1}, cctypes=cctypes) # One marginalized constraint variable. s.mutual_information([0], [1], {2:None}, T=10, N=10) # Two marginalized constraint variables. s.mutual_information([0], [1], {2:None, 3:None}, T=10, N=10) # Two marginalized constraint variables and one constrained variable. s.mutual_information([0], [1], {2:None, 3:None, 4:0}, T=10, N=10)
def test_zero_based_outputs(): """Constraints must have zero-based output variables for now.""" rng = gu.gen_rng(1) D = rng.normal(size=(10, 1)) T = np.repeat(D, 10, axis=1) outputs = range(10, 20) with pytest.raises(ValueError): State(T, outputs=range(10, 20), cctypes=['normal'] * 10, Cd=[(2, 0)], rng=rng) with pytest.raises(ValueError): State(T, outputs=range(10, 20), cctypes=['normal'] * 10, Ci=[(2, 0)], rng=gu.gen_rng(0))
def get_state(): return State( X, outputs=range(5), cctypes=['normal'] * 5, Zv={ 0: 0, 1: 0, 2: 0, 3: 1, 4: 1 }, rng=gu.gen_rng(0), )
def gen_simple_state(): data = np.array([[1, 1, 1]]) R = len(data) D = len(data[0]) outputs = range(D) state = State( X=data, outputs=outputs, alpha=1., cctypes=['bernoulli']*D, hypers=[{'alpha': 1., 'beta': 1.} for i in outputs], Zv={0: 0, 1: 0, 2: 1}, view_alphas=[1.]*D, Zrv={0: [0]*R, 1: [0]*R}) return state
def test_simple_dependence_constraint(Ci): rng = gu.gen_rng(1) D = rng.normal(size=(10, 1)) T = np.repeat(D, 10, axis=1) Cd = [(2, 0), (8, 3)] state = State(T, cctypes=['normal'] * 10, Ci=Ci, Cd=Cd, rng=rng) with pytest.raises(ValueError): # Cannot transition columns with dependencies. state.transition(N=10, kernels=['columns'], progress=0) state.transition( N=10, kernels=['rows', 'alpha', 'column_hypers', 'alpha', 'view_alphas'], progress=False) vu.validate_crp_constrained_partition(state.Zv(), Cd, Ci, {}, {})
def generate_regression_samples(): state = State(D, cctypes=['normal', 'normal'], Zv={ 0: 0, 1: 0 }, rng=gu.gen_rng(4)) view = state.view_for(1) assert not state._composite state.update_cctype(1, 'linear_regression') assert state._composite state.transition(S=30, kernels=['rows', 'column_params', 'column_hypers']) samples = view.simulate(-1, [0, 1, view.outputs[0]], N=100) return [replace_key(s, view.outputs[0], -1) for s in samples]
def test_errors(): """Targets loomcat._validate_transition.""" D, Zv, Zc = tu.gen_data_table(n_rows=150, view_weights=None, cluster_weights=[ [.2, .2, .2, .4], [.3, .2, .5], ], cctypes=['normal'] * 6, distargs=[None] * 6, separation=[0.95] * 6, view_partition=[0, 0, 0, 1, 1, 1], rng=gu.gen_rng(12)) state = State( D.T, outputs=range(10, 16), cctypes=['normal'] * len(D), distargs=[None] * 6, rng=gu.gen_rng(122), ) engine = Engine( D.T, outputs=range(10, 16), cctypes=['normal'] * len(D), distargs=[None] * 6, rng=gu.gen_rng(122), ) def check_errors(cgpm): with pytest.raises(ValueError): cgpm.transition_loom(N=10, S=5) with pytest.raises(ValueError): cgpm.transition_loom(N=10, kernels=['alpha']) with pytest.raises(ValueError): cgpm.transition_loom(N=10, progress=True) with pytest.raises(ValueError): cgpm.transition_loom(N=10, progress=True) with pytest.raises(ValueError): cgpm.transition_loom(N=10, checkpoint=2) cgpm.transition_loom(N=2) check_errors(state) check_errors(engine)
def generate_state(T): # Remember that c1 is ignored. outputs_prime = [0, 2, 3, 4, 5, 6, 7] cctypes_prime = [ c if c == 'categorical' else 'normal' for i, c in enumerate(cctypes) if i != 1 ] distargs_prime = [d for i, d in enumerate(distargs) if i != 1] state = State(X=np.transpose([T[:, o] for o in outputs_prime]), outputs=outputs_prime, cctypes=cctypes_prime, distargs=distargs_prime, Zv={o: 0 for o in outputs_prime}, rng=rng) return state
def retrieve_state(): X = np.eye(7) cctypes = ['normal'] * 7 return State( X, outputs=[10, 11, 12, 13, 14, 15, 16], Zv={ 10: 0, 11: 0, 12: 1, 13: 2, 14: 2, 15: 2, 16: 0 }, cctypes=cctypes, rng=gen_rng(2), )
def retrieve_state(): X = np.asarray([ [1, np.nan, 2, -1, np.nan], [1, 3, 2, -1, -5], [1, np.nan, np.nan, np.nan, np.nan], ]) outputs = [0, 1, 2, 3, 4] return State(X, outputs=outputs, cctypes=['normal'] * 5, Zv={ 0: 0, 1: 0, 2: 0, 3: 0, 4: 0 }, Zrv={0: [0, 1, 2]})
def test_Zv_without_Zrv(): rng = gu.gen_rng(2) D = rng.normal(size=(10, 4)) state = State( D, outputs=[ 3, 2, 1, 0, ], cctypes=['normal'] * D.shape[1], Zv={ 3: 0, 2: 1, 1: 2, 0: 4 }, rng=rng, )
def state(): cctypes, distargs = cu.parse_distargs( ['categorical(k=5)', 'normal', 'poisson', 'bernoulli']) T, Zv, Zc = tu.gen_data_table(50, [1], [[.33, .33, .34]], cctypes, distargs, [.95] * len(cctypes), rng=gu.gen_rng(0)) s = State(T.T, cctypes=cctypes, distargs=distargs, Zv={i: 0 for i in xrange(len(cctypes))}, rng=gu.gen_rng(0)) s.update_cctype(0, 'random_forest', distargs={'k': 5}) # XXX Uncomment me for a bug! # state.update_cctype(1, 'linear_regression') kernels = [ 'rows', 'view_alphas', 'alpha', 'column_params', 'column_hypers' ] s.transition(N=1, kernels=kernels) return s
def test_categorical_forest(): state = State( T, cctypes=CCTYPES, distargs=DISTARGS, rng=gu.gen_rng(1)) state.transition(N=1, progress=False) cat_id = CCTYPES.index('categorical') # If cat_id is singleton migrate first. if len(state.view_for(cat_id).dims) == 1: distargs = DISTARGS[cat_id].copy() state.unincorporate_dim(cat_id) state.incorporate_dim( T[:,cat_id], outputs=[cat_id], cctype='categorical', distargs=distargs, v=0) state.update_cctype(cat_id, 'random_forest', distargs=distargs) bernoulli_id = CCTYPES.index('bernoulli') state.incorporate_dim( T[:,bernoulli_id], outputs=[191], cctype='bernoulli', v=state.Zv(cat_id)) state.update_cctype(191, 'random_forest', distargs={'k':2}) # Run valid transitions. state.transition( N=2, kernels=['rows','column_params','column_hypers'], views=[state.Zv(cat_id)], progress=False) # Running column transition should raise. with pytest.raises(ValueError): state.transition(N=1, kernels=['columns'], progress=False) # Updating cctype in singleton View should raise. distargs = DISTARGS[cat_id].copy() state.incorporate_dim( T[:,CCTYPES.index('categorical')], outputs=[98], cctype='categorical', distargs=distargs, v=max(state.views)+1) with pytest.raises(Exception): state.update_cctype(98, 'random_forest', distargs=distargs)
def test_cmi_multivariate_crash(): X = np.eye(5) cctypes = ['normal'] * 5 s = State(X, Zv={0:0, 1:0, 2:0, 3:1, 4:1}, cctypes=cctypes) s.mutual_information([0,1], [0,1], {2:1}, T=10, N=10) s.mutual_information([0,1], [0,1], {2:None}, T=10, N=10) s.mutual_information([2,4], [0,1,3], {}, T=10, N=10) # Duplicate in 2 query and constraint. with pytest.raises(ValueError): s.mutual_information([2,4], [1,3], {0:1, 2:None}, T=10, N=10) # Duplicate in 3 query. with pytest.raises(ValueError): s.mutual_information([2,3,4], [1,3], {0:None}, T=10, N=10)
def test_categorical_forest_manual_inputs_errors(): state = State( T, cctypes=CCTYPES, distargs=DISTARGS, rng=gu.gen_rng(1)) state.transition(N=1, progress=False) cat_id = CCTYPES.index('categorical') # Put 1201 into the first view. view_idx = min(state.views) state.incorporate_dim( T[:,CCTYPES.index('categorical')], outputs=[1201], cctype='categorical', distargs=DISTARGS[cat_id], v=view_idx) # Updating cctype with completely invalid input should raise. with pytest.raises(Exception): distargs = DISTARGS[cat_id].copy() distargs['inputs'] = [10000] state.update_cctype(1201, 'random_forest', distargs=distargs) # Updating cctype with input dimensions outside the view should raise. cols_in_view = state.views[view_idx].dims.keys() cols_out_view = [c for c in state.outputs if c not in cols_in_view] assert len(cols_in_view) > 0 and len(cols_out_view) > 0 with pytest.raises(Exception): distargs = DISTARGS[cat_id].copy() distargs['inputs'] = cols_out_view state.update_cctype(1201, 'random_forest', distargs=distargs) # Updating cctype with no input dimensions should raise. with pytest.raises(Exception): distargs = DISTARGS[cat_id].copy() distargs['inputs'] = [] state.update_cctype(1201, 'random_forest', distargs=distargs)
def test_linreg_missing_data_ignore(): dataset = [ [1, 3, 1], [2, 4, 1.5], [float('nan'), 5, 1]] state = State(dataset, cctypes=['normal']*3, Zv={0:0, 1:0, 2:0}, rng=gu.gen_rng(1)) # Make sure that missing covariates are handles as missing cell. state.update_cctype(2, 'linear_regression', distargs={'inputs': [0,1]}) assert state.dim_for(2).inputs[1:] == [0,1] state.transition(N=5, kernels=['rows', 'column_hypers', 'view_alphas']) state.update_cctype(2, 'normal', distargs={'inputs': [0,1]}) # Make sure that specified inputs are set correctly. state.update_cctype(2, 'linear_regression', distargs={'inputs': [1]}) assert state.dim_for(2).inputs[1:] == [1]
def test_vonmises_normal(): state = State( T, cctypes=CCTYPES, distargs=DISTARGS, rng=gu.gen_rng(0)) state.transition(N=1, progress=False) state.update_cctype(CCTYPES.index('vonmises'), 'normal') state.transition(N=1, progress=False) state.update_cctype(CCTYPES.index('vonmises'), 'vonmises') # Incompatible numeric conversion. with pytest.raises(Exception): state.update_cctype(CCTYPES.index('normal'), 'vonmises')
def test_independence_inference_quality_lovecat(): rng = gu.gen_rng(584) column_view_1 = rng.normal(loc=0, size=(50, 1)) column_view_2 = np.concatenate(( rng.normal(loc=10, size=(25, 1)), rng.normal(loc=20, size=(25, 1)), )) data_view_1 = np.repeat(column_view_1, 4, axis=1) data_view_2 = np.repeat(column_view_2, 4, axis=1) data = np.column_stack((data_view_1, data_view_2)) Zv0 = {i: 0 for i in xrange(8)} state = State(data, Zv=Zv0, cctypes=['normal'] * 8, rng=gu.gen_rng(10)) state.transition_lovecat(N=100, progress=1) for col in [ 0, 1, 2, 3, ]: assert state.Zv(col) == state.Zv(0) for col in [4, 5, 6, 7]: assert state.Zv(col) == state.Zv(4) assert state.Zv(0) != state.Zv(4) # Get lovecat to merge the dependent columns into one view. Cd = [(0, 1), (2, 3), (4, 5), (6, 7)] Zv0 = {0: 0, 1: 0, 2: 1, 3: 1, 4: 2, 5: 2, 6: 3, 7: 3} state = State(data, Zv=Zv0, cctypes=['normal'] * 8, Cd=Cd, rng=gu.gen_rng(1)) state.transition_lovecat(N=100, progress=1) for col in [ 0, 1, 2, 3, ]: assert state.Zv(col) == state.Zv(0) for col in [4, 5, 6, 7]: assert state.Zv(col) == state.Zv(4) assert state.Zv(0) != state.Zv(4)
def test_incorporate_session(): rng = gu.gen_rng(4) state = State(X, cctypes=['normal'] * 5, Zv={ 0: 0, 1: 0, 2: 1, 3: 1, 4: 2 }, rng=rng) # Incorporate row into a singleton cluster for all views. previous = [len(state.views[v].Nk()) for v in [0, 1, 2]] data = {i: rng.normal() for i in xrange(5)} clusters = { state.views[0].outputs[0]: previous[0], state.views[1].outputs[0]: previous[1], state.views[2].outputs[0]: previous[2], } state.incorporate(state.n_rows(), gu.merged(data, clusters)) assert [len(state.views[v].Nk()) for v in [0,1,2]] == \ [p+1 for p in previous] # Incorporate row without specifying clusters, and some missing values data = {i: rng.normal() for i in xrange(2)} state.incorporate(state.n_rows(), data) state.transition(N=3) # Remove the incorporated rowid. state.unincorporate(state.n_rows() - 1) state.transition(N=3)
def test_incorporate_state(): state = State( T[:,:2], cctypes=CCTYPES[:2], distargs=DISTARGS[:2], rng=gu.gen_rng(0)) state.transition(N=5) target = state.views.keys()[0] # Incorporate a new dim into view[0]. state.incorporate_dim( T[:,2], outputs=[2], cctype=CCTYPES[2], distargs=DISTARGS[2], v=target) assert state.Zv(2) == target state.transition(N=1) # Incorporate a new dim into view[0] with a non-contiguous output. state.incorporate_dim( T[:,2], outputs=[10], cctype=CCTYPES[2], distargs=DISTARGS[2], v=target) assert state.Zv(10) == target state.transition(N=1) # Some crash testing queries. state.logpdf(-1, {10:1}, constraints={0:2, 1:1}) state.simulate(-1, [10], constraints={0:2}) # Incorporating with a duplicated output should raise. with pytest.raises(ValueError): state.incorporate_dim( T[:,2], outputs=[10], cctype=CCTYPES[2], distargs=DISTARGS[2], v=target) # Multivariate incorporate should raise. with pytest.raises(ValueError): state.incorporate_dim( T[:,2], outputs=[10, 2], cctype=CCTYPES[2], distargs=DISTARGS[2], v=target) # Missing output should raise. with pytest.raises(ValueError): state.incorporate_dim( T[:,2], outputs=[], cctype=CCTYPES[2], distargs=DISTARGS[2], v=target) # Wrong number of rows should raise. with pytest.raises(ValueError): state.incorporate_dim( T[:,2][:-1], outputs=[11], cctype=CCTYPES[2], distargs=DISTARGS[2], v=target) # Inputs should raise. with pytest.raises(ValueError): state.incorporate_dim( T[:,2], outputs=[11], inputs=[2], cctype=CCTYPES[2], distargs=DISTARGS[2], v=target) # Incorporate dim into a newly created singleton view. target = max(state.views)+1 state.incorporate_dim( T[:,3], outputs=[3], cctype=CCTYPES[3], distargs=DISTARGS[3], v=target) assert state.Zv(3) == target state.transition(N=1) # Incorporate dim without specifying a view. state.incorporate_dim(T[:,4], outputs=[4], cctype=CCTYPES[4], distargs=DISTARGS[4]) state.transition(N=1) # Unincorporate first dim. previous = state.n_cols() state.unincorporate_dim(0) assert state.n_cols() == previous-1 state.transition(N=1) # Reincorporate dim without specifying a view. state.incorporate_dim( T[:,0], outputs=[0], cctype=CCTYPES[0], distargs=DISTARGS[0]) state.transition(N=1) # Incorporate dim into singleton view, remove it, assert destroyed. target = max(state.views)+1 state.incorporate_dim( T[:,5], outputs=[5], cctype=CCTYPES[5], distargs=DISTARGS[5], v=target) previous = len(state.views) state.unincorporate_dim(5) assert len(state.views) == previous-1 state.transition(N=1) # Reincorporate dim into a singleton view. target = max(state.views)+1 state.incorporate_dim(T[:,5], outputs=[5], cctype=CCTYPES[5], distargs=DISTARGS[5], v=target) state.transition(N=1) # Incorporate the rest of the dims in the default way. for i in xrange(6, len(CCTYPES)): state.incorporate_dim( T[:,i], outputs=[max(state.outputs)+1], cctype=CCTYPES[i], distargs=DISTARGS[i]) state.transition(N=1) # Unincorporating non-existent dim should raise. with pytest.raises(ValueError): state.unincorporate_dim(9999) # Unincorporate all the dims, except the last one. for o in state.outputs[:-1]: state.unincorporate_dim(o) assert state.n_cols() == 1 state.transition(N=1) # Unincorporating last dim should raise. with pytest.raises(ValueError): state.unincorporate_dim(state.outputs[0])