Exemple #1
0
def get_generative_clustering(M_c, M_r, T,
                              data_inverse_permutation_indices,
                              num_clusters, num_views):
    from crosscat.LocalEngine import LocalEngine
    import crosscat.cython_code.State as State
    # NOTE: this function only works because State.p_State doesn't use
    #       column_component_suffstats
    num_rows = len(T)
    num_cols = len(T[0])
    X_D_helper = numpy.repeat(range(num_clusters), (num_rows / num_clusters))
    gen_X_D = [
        X_D_helper[numpy.argsort(data_inverse_permutation_index)]
        for data_inverse_permutation_index in data_inverse_permutation_indices
        ]
    gen_X_L_assignments = numpy.repeat(range(num_views), (num_cols / num_views))
    # initialize to generate an X_L to manipulate
    local_engine = LocalEngine()
    bad_X_L, bad_X_D = local_engine.initialize(M_c, M_r, T,
                                                         initialization='apart')
    bad_X_L['column_partition']['assignments'] = gen_X_L_assignments
    # manually constrcut state in in generative configuration
    state = State.p_State(M_c, T, bad_X_L, gen_X_D)
    gen_X_L = state.get_X_L()
    gen_X_D = state.get_X_D()
    # run inference on hyperparameters to leave them in a reasonable state
    kernel_list = (
        'row_partition_hyperparameters',
        'column_hyperparameters',
        'column_partition_hyperparameter',
        )
    gen_X_L, gen_X_D = local_engine.analyze(M_c, T, gen_X_L, gen_X_D, n_steps=1,
                                            kernel_list=kernel_list)
    #
    return gen_X_L, gen_X_D
Exemple #2
0
def quick_le(seed, n_chains=1):
    # Specify synthetic dataset structure.
    cctypes = [
        'continuous', 'continuous', 'multinomial', 'multinomial', 'continuous'
    ]
    distargs = [None, None, dict(K=9), dict(K=7), None]
    cols_to_views = [0, 0, 0, 1, 1]
    separation = [0.6, 0.9]
    cluster_weights = [[.2, .3, .5], [.9, .1]]

    # Obtain the generated dataset and metadata/
    T, M_c, M_r = sdg.gen_data(cctypes,
                               N_ROWS,
                               cols_to_views,
                               cluster_weights,
                               separation,
                               seed=seed,
                               distargs=distargs,
                               return_structure=True)

    # Create, initialize, and analyze the engine.
    engine = LocalEngine()
    X_L, X_D = engine.initialize(M_c, M_r, T, seed, n_chains=n_chains)

    return T, M_r, M_c, X_L, X_D, engine
Exemple #3
0
def get_generative_clustering(M_c, M_r, T,
                              data_inverse_permutation_indices,
                              num_clusters, num_views):
    from crosscat.LocalEngine import LocalEngine
    import crosscat.cython_code.State as State
    # NOTE: this function only works because State.p_State doesn't use
    #       column_component_suffstats
    num_rows = len(T)
    num_cols = len(T[0])
    X_D_helper = numpy.repeat(range(num_clusters), (num_rows / num_clusters))
    gen_X_D = [
        X_D_helper[numpy.argsort(data_inverse_permutation_index)]
        for data_inverse_permutation_index in data_inverse_permutation_indices
        ]
    gen_X_L_assignments = numpy.repeat(range(num_views), (num_cols / num_views))
    # initialize to generate an X_L to manipulate
    local_engine = LocalEngine()
    bad_X_L, bad_X_D = local_engine.initialize(M_c, M_r, T,
                                                         initialization='apart')
    bad_X_L['column_partition']['assignments'] = gen_X_L_assignments
    # manually constrcut state in in generative configuration
    state = State.p_State(M_c, T, bad_X_L, gen_X_D)
    gen_X_L = state.get_X_L()
    gen_X_D = state.get_X_D()
    # run inference on hyperparameters to leave them in a reasonable state
    kernel_list = (
        'row_partition_hyperparameters',
        'column_hyperparameters',
        'column_partition_hyperparameter',
        )
    gen_X_L, gen_X_D = local_engine.analyze(M_c, T, gen_X_L, gen_X_D, n_steps=1,
                                            kernel_list=kernel_list)
    #
    return gen_X_L, gen_X_D
def runner(config):
    # helpers
    def munge_config(config):
        kwargs = config.copy()
        kwargs['num_splits'] = kwargs.pop('num_views')
        n_steps = kwargs.pop('n_steps')
        n_test = kwargs.pop('n_test')
        return kwargs, n_steps, n_test

    def calc_ll(T, p_State):
        log_likelihoods = map(p_State.calc_row_predictive_logp, T)
        mean_log_likelihood = numpy.mean(log_likelihoods)
        return mean_log_likelihood

    def gen_data(**kwargs):
        T, M_c, M_r, gen_X_L, gen_X_D = du.generate_clean_state(**kwargs)
        #
        engine = LocalEngine()
        sampled_T = gu.sample_T(engine, M_c, T, gen_X_L, gen_X_D)
        T_test = random.sample(sampled_T, n_test)
        gen_data_ll = ctu.calc_mean_test_log_likelihood(
            M_c, T, gen_X_L, gen_X_D, T)
        gen_test_set_ll = ctu.calc_mean_test_log_likelihood(
            M_c, T, gen_X_L, gen_X_D, T_test)
        #
        return T, M_c, M_r, T_test, gen_data_ll, gen_test_set_ll

    kwargs, n_steps, n_test = munge_config(config)
    T, M_c, M_r, T_test, gen_data_ll, gen_test_set_ll = gen_data(**kwargs)
    # set up to run inference
    calc_data_ll = partial(calc_ll, T)
    calc_test_set_ll = partial(calc_ll, T_test)
    diagnostic_func_dict = dict(
        data_ll=calc_data_ll,
        test_set_ll=calc_test_set_ll,
    )
    # run inference
    engine = LocalEngine()
    X_L, X_D = engine.initialize(M_c, M_r, T)
    X_L, X_D, diagnostics_dict = engine.analyze(
        M_c, T, X_L, X_D, do_diagnostics=diagnostic_func_dict, n_steps=n_steps)
    # package result
    final_data_ll = diagnostics_dict['data_ll'][-1][-1]
    final_test_set_ll = diagnostics_dict['test_set_ll'][-1][-1]
    summary = dict(
        gen_data_ll=gen_data_ll,
        gen_test_set_ll=gen_test_set_ll,
        final_data_ll=final_data_ll,
        final_test_set_ll=final_test_set_ll,
    )

    result = dict(
        config=config,
        summary=summary,
        diagnostics_dict=diagnostics_dict,
    )
    return result
def runner(config):
    # helpers
    def munge_config(config):
        kwargs = config.copy()
        kwargs['num_splits'] = kwargs.pop('num_views')
        n_steps = kwargs.pop('n_steps')
        n_test = kwargs.pop('n_test')
        return kwargs, n_steps, n_test
    def calc_ll(T, p_State):
        log_likelihoods = map(p_State.calc_row_predictive_logp, T)
        mean_log_likelihood = numpy.mean(log_likelihoods)
        return mean_log_likelihood
    def gen_data(**kwargs):
        T, M_c, M_r, gen_X_L, gen_X_D = du.generate_clean_state(**kwargs)
        #
        engine = LocalEngine()
        sampled_T = gu.sample_T(engine, M_c, T, gen_X_L, gen_X_D)
        T_test = random.sample(sampled_T, n_test)
        gen_data_ll = ctu.calc_mean_test_log_likelihood(M_c, T, gen_X_L, gen_X_D, T)
        gen_test_set_ll = ctu.calc_mean_test_log_likelihood(M_c, T, gen_X_L, gen_X_D, T_test)
        #
        return T, M_c, M_r, T_test, gen_data_ll, gen_test_set_ll
    kwargs, n_steps, n_test = munge_config(config)
    T, M_c, M_r, T_test, gen_data_ll, gen_test_set_ll = gen_data(**kwargs)
    # set up to run inference
    calc_data_ll = partial(calc_ll, T)
    calc_test_set_ll = partial(calc_ll, T_test)
    diagnostic_func_dict = dict(
            data_ll=calc_data_ll,
            test_set_ll=calc_test_set_ll,
            )
    # run inference
    engine = LocalEngine()
    X_L, X_D = engine.initialize(M_c, M_r, T)
    X_L, X_D, diagnostics_dict = engine.analyze(M_c, T, X_L, X_D,
            do_diagnostics=diagnostic_func_dict, n_steps=n_steps)
    # package result
    final_data_ll = diagnostics_dict['data_ll'][-1][-1]
    final_test_set_ll = diagnostics_dict['test_set_ll'][-1][-1]
    summary = dict(
            gen_data_ll=gen_data_ll,
            gen_test_set_ll=gen_test_set_ll,
            final_data_ll=final_data_ll,
            final_test_set_ll=final_test_set_ll,
            )

    result = dict(
            config=config,
            summary=summary,
            diagnostics_dict=diagnostics_dict,
            )
    return result
def quick_le(seed, n_chains=1):
    # Specify synthetic dataset structure.
    cctypes = ['continuous', 'continuous', 'multinomial', 'multinomial',
        'continuous']
    distargs = [None, None, dict(K=9), dict(K=7), None]
    cols_to_views = [0, 0, 0, 1, 1]
    separation = [0.6, 0.9]
    cluster_weights = [[.2, .3, .5],[.9, .1]]

    # Obtain the generated dataset and metadata/
    T, M_c, M_r = sdg.gen_data(cctypes, N_ROWS, cols_to_views, cluster_weights,
        separation, seed=seed, distargs=distargs, return_structure=True)

    # Create, initialize, and analyze the engine.
    engine = LocalEngine(seed=seed)
    X_L, X_D = engine.initialize(M_c, M_r, T, n_chains=n_chains)

    return T, M_r, M_c, X_L, X_D, engine
from crosscat.LocalEngine import LocalEngine
import crosscat.utils.data_utils as data_utils


data_filename = 'T.csv'
inference_seed = 0
num_full_transitions = 10

# read the data table into internal json representation
data_table, row_metadata, column_metadata, header = \
        data_utils.read_data_objects(data_filename)

# create an engine to run analysis, inference
engine = LocalEngine(seed=inference_seed)

# initialize markov chain samples
initial_latent_state, initial_latent_state_clustering = \
        engine.initialize(column_metadata, row_metadata, data_table)

# run markov chain transition kernels on samples
latent_state, latent_state_clustering = engine.analyze(column_metadata,
        data_table, initial_latent_state, initial_latent_state_clustering,
        n_steps=num_full_transitions)

from crosscat.LocalEngine import LocalEngine
import crosscat.utils.data_utils as data_utils


data_filename = "T.csv"
inference_seed = 0
num_full_transitions = 10

# read the data table into internal json representation
data_table, row_metadata, column_metadata, header = data_utils.read_data_objects(data_filename)

# create an engine to run analysis, inference
engine = LocalEngine(seed=inference_seed)

# initialize markov chain samples
initial_latent_state, initial_latent_state_clustering = engine.initialize(column_metadata, row_metadata, data_table)

# run markov chain transition kernels on samples
latent_state, latent_state_clustering = engine.analyze(
    column_metadata, data_table, initial_latent_state, initial_latent_state_clustering, n_steps=num_full_transitions
)
Exemple #9
0
#!/usr/bin/env python

from crosscat.LocalEngine import LocalEngine
import crosscat.utils.data_utils as data_utils

data_filename = '/vagrant/DREAM5_network_inference_challenge/Network1/input data/net1_chip_features.tsv'
inference_seed = 0
num_full_transitions = 10
data_table, row_metadata, column_metadata, header = data_utils.read_data_objects(data_filename)
engine = LocalEngine(seed=inference_seed)
initial_latent_state, initial_latent_state_clustering = engine.initialize(column_metadata, row_metadata, data_table)
latent_state, latent_state_clustering = engine.analyze(
	column_metadata, data_table, initial_latent_state, initial_latent_state_clustering, n_steps=num_full_transitions)