def get_generative_clustering(M_c, M_r, T, data_inverse_permutation_indices, num_clusters, num_views): from crosscat.LocalEngine import LocalEngine import crosscat.cython_code.State as State # NOTE: this function only works because State.p_State doesn't use # column_component_suffstats num_rows = len(T) num_cols = len(T[0]) X_D_helper = numpy.repeat(range(num_clusters), (num_rows / num_clusters)) gen_X_D = [ X_D_helper[numpy.argsort(data_inverse_permutation_index)] for data_inverse_permutation_index in data_inverse_permutation_indices ] gen_X_L_assignments = numpy.repeat(range(num_views), (num_cols / num_views)) # initialize to generate an X_L to manipulate local_engine = LocalEngine() bad_X_L, bad_X_D = local_engine.initialize(M_c, M_r, T, initialization='apart') bad_X_L['column_partition']['assignments'] = gen_X_L_assignments # manually constrcut state in in generative configuration state = State.p_State(M_c, T, bad_X_L, gen_X_D) gen_X_L = state.get_X_L() gen_X_D = state.get_X_D() # run inference on hyperparameters to leave them in a reasonable state kernel_list = ( 'row_partition_hyperparameters', 'column_hyperparameters', 'column_partition_hyperparameter', ) gen_X_L, gen_X_D = local_engine.analyze(M_c, T, gen_X_L, gen_X_D, n_steps=1, kernel_list=kernel_list) # return gen_X_L, gen_X_D
def quick_le(seed, n_chains=1): # Specify synthetic dataset structure. cctypes = [ 'continuous', 'continuous', 'multinomial', 'multinomial', 'continuous' ] distargs = [None, None, dict(K=9), dict(K=7), None] cols_to_views = [0, 0, 0, 1, 1] separation = [0.6, 0.9] cluster_weights = [[.2, .3, .5], [.9, .1]] # Obtain the generated dataset and metadata/ T, M_c, M_r = sdg.gen_data(cctypes, N_ROWS, cols_to_views, cluster_weights, separation, seed=seed, distargs=distargs, return_structure=True) # Create, initialize, and analyze the engine. engine = LocalEngine() X_L, X_D = engine.initialize(M_c, M_r, T, seed, n_chains=n_chains) return T, M_r, M_c, X_L, X_D, engine
def get_generative_clustering(M_c, M_r, T, data_inverse_permutation_indices, num_clusters, num_views): from crosscat.LocalEngine import LocalEngine import crosscat.cython_code.State as State # NOTE: this function only works because State.p_State doesn't use # column_component_suffstats num_rows = len(T) num_cols = len(T[0]) X_D_helper = numpy.repeat(range(num_clusters), (num_rows / num_clusters)) gen_X_D = [ X_D_helper[numpy.argsort(data_inverse_permutation_index)] for data_inverse_permutation_index in data_inverse_permutation_indices ] gen_X_L_assignments = numpy.repeat(range(num_views), (num_cols / num_views)) # initialize to generate an X_L to manipulate local_engine = LocalEngine() bad_X_L, bad_X_D = local_engine.initialize(M_c, M_r, T, initialization='apart') bad_X_L['column_partition']['assignments'] = gen_X_L_assignments # manually constrcut state in in generative configuration state = State.p_State(M_c, T, bad_X_L, gen_X_D) gen_X_L = state.get_X_L() gen_X_D = state.get_X_D() # run inference on hyperparameters to leave them in a reasonable state kernel_list = ( 'row_partition_hyperparameters', 'column_hyperparameters', 'column_partition_hyperparameter', ) gen_X_L, gen_X_D = local_engine.analyze(M_c, T, gen_X_L, gen_X_D, n_steps=1, kernel_list=kernel_list) # return gen_X_L, gen_X_D
def runner(config): # helpers def munge_config(config): kwargs = config.copy() kwargs['num_splits'] = kwargs.pop('num_views') n_steps = kwargs.pop('n_steps') n_test = kwargs.pop('n_test') return kwargs, n_steps, n_test def calc_ll(T, p_State): log_likelihoods = map(p_State.calc_row_predictive_logp, T) mean_log_likelihood = numpy.mean(log_likelihoods) return mean_log_likelihood def gen_data(**kwargs): T, M_c, M_r, gen_X_L, gen_X_D = du.generate_clean_state(**kwargs) # engine = LocalEngine() sampled_T = gu.sample_T(engine, M_c, T, gen_X_L, gen_X_D) T_test = random.sample(sampled_T, n_test) gen_data_ll = ctu.calc_mean_test_log_likelihood( M_c, T, gen_X_L, gen_X_D, T) gen_test_set_ll = ctu.calc_mean_test_log_likelihood( M_c, T, gen_X_L, gen_X_D, T_test) # return T, M_c, M_r, T_test, gen_data_ll, gen_test_set_ll kwargs, n_steps, n_test = munge_config(config) T, M_c, M_r, T_test, gen_data_ll, gen_test_set_ll = gen_data(**kwargs) # set up to run inference calc_data_ll = partial(calc_ll, T) calc_test_set_ll = partial(calc_ll, T_test) diagnostic_func_dict = dict( data_ll=calc_data_ll, test_set_ll=calc_test_set_ll, ) # run inference engine = LocalEngine() X_L, X_D = engine.initialize(M_c, M_r, T) X_L, X_D, diagnostics_dict = engine.analyze( M_c, T, X_L, X_D, do_diagnostics=diagnostic_func_dict, n_steps=n_steps) # package result final_data_ll = diagnostics_dict['data_ll'][-1][-1] final_test_set_ll = diagnostics_dict['test_set_ll'][-1][-1] summary = dict( gen_data_ll=gen_data_ll, gen_test_set_ll=gen_test_set_ll, final_data_ll=final_data_ll, final_test_set_ll=final_test_set_ll, ) result = dict( config=config, summary=summary, diagnostics_dict=diagnostics_dict, ) return result
def runner(config): # helpers def munge_config(config): kwargs = config.copy() kwargs['num_splits'] = kwargs.pop('num_views') n_steps = kwargs.pop('n_steps') n_test = kwargs.pop('n_test') return kwargs, n_steps, n_test def calc_ll(T, p_State): log_likelihoods = map(p_State.calc_row_predictive_logp, T) mean_log_likelihood = numpy.mean(log_likelihoods) return mean_log_likelihood def gen_data(**kwargs): T, M_c, M_r, gen_X_L, gen_X_D = du.generate_clean_state(**kwargs) # engine = LocalEngine() sampled_T = gu.sample_T(engine, M_c, T, gen_X_L, gen_X_D) T_test = random.sample(sampled_T, n_test) gen_data_ll = ctu.calc_mean_test_log_likelihood(M_c, T, gen_X_L, gen_X_D, T) gen_test_set_ll = ctu.calc_mean_test_log_likelihood(M_c, T, gen_X_L, gen_X_D, T_test) # return T, M_c, M_r, T_test, gen_data_ll, gen_test_set_ll kwargs, n_steps, n_test = munge_config(config) T, M_c, M_r, T_test, gen_data_ll, gen_test_set_ll = gen_data(**kwargs) # set up to run inference calc_data_ll = partial(calc_ll, T) calc_test_set_ll = partial(calc_ll, T_test) diagnostic_func_dict = dict( data_ll=calc_data_ll, test_set_ll=calc_test_set_ll, ) # run inference engine = LocalEngine() X_L, X_D = engine.initialize(M_c, M_r, T) X_L, X_D, diagnostics_dict = engine.analyze(M_c, T, X_L, X_D, do_diagnostics=diagnostic_func_dict, n_steps=n_steps) # package result final_data_ll = diagnostics_dict['data_ll'][-1][-1] final_test_set_ll = diagnostics_dict['test_set_ll'][-1][-1] summary = dict( gen_data_ll=gen_data_ll, gen_test_set_ll=gen_test_set_ll, final_data_ll=final_data_ll, final_test_set_ll=final_test_set_ll, ) result = dict( config=config, summary=summary, diagnostics_dict=diagnostics_dict, ) return result
def quick_le(seed, n_chains=1): # Specify synthetic dataset structure. cctypes = ['continuous', 'continuous', 'multinomial', 'multinomial', 'continuous'] distargs = [None, None, dict(K=9), dict(K=7), None] cols_to_views = [0, 0, 0, 1, 1] separation = [0.6, 0.9] cluster_weights = [[.2, .3, .5],[.9, .1]] # Obtain the generated dataset and metadata/ T, M_c, M_r = sdg.gen_data(cctypes, N_ROWS, cols_to_views, cluster_weights, separation, seed=seed, distargs=distargs, return_structure=True) # Create, initialize, and analyze the engine. engine = LocalEngine(seed=seed) X_L, X_D = engine.initialize(M_c, M_r, T, n_chains=n_chains) return T, M_r, M_c, X_L, X_D, engine
from crosscat.LocalEngine import LocalEngine import crosscat.utils.data_utils as data_utils data_filename = 'T.csv' inference_seed = 0 num_full_transitions = 10 # read the data table into internal json representation data_table, row_metadata, column_metadata, header = \ data_utils.read_data_objects(data_filename) # create an engine to run analysis, inference engine = LocalEngine(seed=inference_seed) # initialize markov chain samples initial_latent_state, initial_latent_state_clustering = \ engine.initialize(column_metadata, row_metadata, data_table) # run markov chain transition kernels on samples latent_state, latent_state_clustering = engine.analyze(column_metadata, data_table, initial_latent_state, initial_latent_state_clustering, n_steps=num_full_transitions)
from crosscat.LocalEngine import LocalEngine import crosscat.utils.data_utils as data_utils data_filename = "T.csv" inference_seed = 0 num_full_transitions = 10 # read the data table into internal json representation data_table, row_metadata, column_metadata, header = data_utils.read_data_objects(data_filename) # create an engine to run analysis, inference engine = LocalEngine(seed=inference_seed) # initialize markov chain samples initial_latent_state, initial_latent_state_clustering = engine.initialize(column_metadata, row_metadata, data_table) # run markov chain transition kernels on samples latent_state, latent_state_clustering = engine.analyze( column_metadata, data_table, initial_latent_state, initial_latent_state_clustering, n_steps=num_full_transitions )
#!/usr/bin/env python from crosscat.LocalEngine import LocalEngine import crosscat.utils.data_utils as data_utils data_filename = '/vagrant/DREAM5_network_inference_challenge/Network1/input data/net1_chip_features.tsv' inference_seed = 0 num_full_transitions = 10 data_table, row_metadata, column_metadata, header = data_utils.read_data_objects(data_filename) engine = LocalEngine(seed=inference_seed) initial_latent_state, initial_latent_state_clustering = engine.initialize(column_metadata, row_metadata, data_table) latent_state, latent_state_clustering = engine.analyze( column_metadata, data_table, initial_latent_state, initial_latent_state_clustering, n_steps=num_full_transitions)