Beispiel #1
0
def run_geweke(config):
    num_rows = config['num_rows']
    num_cols = config['num_cols']
    inf_seed = config['inf_seed']
    gen_seed = config['gen_seed']
    num_chains = config['num_chains']
    num_iters = config['num_iters']
    row_crp_alpha_grid = config['row_crp_alpha_grid']
    column_crp_alpha_grid = config['column_crp_alpha_grid']
    max_mu_grid = config['max_mu_grid']
    max_s_grid = config['max_s_grid']
    n_grid = config['n_grid']
    cctypes = config['cctypes']
    num_multinomial_values = config['num_multinomial_values']
    probe_columns = config['probe_columns']
    CT_KERNEL=config['CT_KERNEL']


    num_values_list = [num_multinomial_values] * num_cols
    M_c = gen_M_c(cctypes, num_values_list)
    T = numpy.random.uniform(0, 10, (num_rows, num_cols)).tolist()
    # may be an issue if this n_grid doesn't match the other grids in the c++
    mu_grid = numpy.linspace(-max_mu_grid, max_mu_grid, n_grid)
    s_grid = numpy.linspace(1, max_s_grid, n_grid)

    # run geweke: forward sample only
    with gu.Timer('generating forward samples') as timer:
        forward_diagnostics_data = forward_sample_from_prior(inf_seed,
                num_iters, M_c, T, probe_columns,
                row_crp_alpha_grid, column_crp_alpha_grid,
                s_grid, mu_grid,
                do_multiprocessing=True,
                N_GRID=n_grid,
                )
    # run geweke: transition-erase loop
    with gu.Timer('generating posterior samples') as timer:
        diagnostics_data_list = run_posterior_chains(M_c, T, num_chains, num_iters, probe_columns,
                row_crp_alpha_grid, column_crp_alpha_grid,
                s_grid, mu_grid,
                N_GRID=n_grid,
                CT_KERNEL=CT_KERNEL,
                )
    # post process data
    with gu.Timer('post prcessing data') as timer:
        processed_data = post_process(forward_diagnostics_data, diagnostics_data_list)
    result = dict(
            config=config,
            summary=processed_data['summary_kls'],
            forward_diagnostics_data=forward_diagnostics_data,
            diagnostics_data_list=diagnostics_data_list,
            processed_data=processed_data,
            )
    return result
Beispiel #2
0
def _do_analyze_with_diagnostic(
    SEED,
    X_L,
    X_D,
    M_c,
    T,
    kernel_list,
    n_steps,
    c,
    r,
    max_iterations,
    max_time,
    diagnostic_func_dict,
    every_N,
    ROW_CRP_ALPHA_GRID,
    COLUMN_CRP_ALPHA_GRID,
    S_GRID,
    MU_GRID,
    N_GRID,
    do_timing,
    CT_KERNEL,
):
    diagnostics_dict = collections.defaultdict(list)
    if diagnostic_func_dict is None:
        diagnostic_func_dict = dict()
        every_N = None
    child_n_steps_list = get_child_n_steps_list(n_steps, every_N)
    #
    p_State = State.p_State(
        M_c,
        T,
        X_L,
        X_D,
        SEED=SEED,
        ROW_CRP_ALPHA_GRID=ROW_CRP_ALPHA_GRID,
        COLUMN_CRP_ALPHA_GRID=COLUMN_CRP_ALPHA_GRID,
        S_GRID=S_GRID,
        MU_GRID=MU_GRID,
        N_GRID=N_GRID,
        CT_KERNEL=CT_KERNEL,
    )
    with gu.Timer('all transitions', verbose=False) as timer:
        for child_n_steps in child_n_steps_list:
            p_State.transition(kernel_list, child_n_steps, c, r,
                               max_iterations, max_time)
            for diagnostic_name, diagnostic_func in diagnostic_func_dict.iteritems(
            ):
                diagnostic_value = diagnostic_func(p_State)
                diagnostics_dict[diagnostic_name].append(diagnostic_value)
                pass
            pass
        pass
    X_L_prime = p_State.get_X_L()
    X_D_prime = p_State.get_X_D()
    #
    if do_timing:
        # diagnostics and timing are exclusive
        diagnostics_dict = timer.elapsed_secs
        pass
    return X_L_prime, X_D_prime, diagnostics_dict
def time_analyze_helper(table_data, data_dict, command_dict):
    # FIXME: this is a kludge
    command_dict.update(data_dict)
    #
    gen_seed = data_dict['SEED']
    num_clusters = data_dict['num_clusters']
    num_cols = data_dict['num_cols']
    num_rows = data_dict['num_rows']
    num_views = data_dict['num_views']

    T, M_c, M_r, X_L, X_D = ttu.generate_clean_state(gen_seed,
                                                 num_clusters,
                                                 num_cols, num_rows,
                                                 num_views,
                                                 max_mean=10, max_std=1)
    table_data = dict(T=T,M_c=M_c)

    data_dict['X_L'] = X_L
    data_dict['X_D'] = X_D
    start_dims = du.get_state_shape(X_L)
    with gu.Timer('time_analyze_helper', verbose=False) as timer:
        inner_ret_dict = analyze_helper(table_data, data_dict, command_dict)
    end_dims = du.get_state_shape(inner_ret_dict['X_L'])
    T = table_data['T']
    table_shape = (len(T), len(T[0]))
    ret_dict = dict(
        table_shape=table_shape,
        start_dims=start_dims,
        end_dims=end_dims,
        elapsed_secs=timer.elapsed_secs,
        kernel_list=command_dict['kernel_list'],
        n_steps=command_dict['n_steps'],
        )
    return ret_dict
Beispiel #4
0
def _do_analyze_with_diagnostic(
        SEED, X_L, X_D, M_c, T, kernel_list, n_steps, c, r, max_iterations,
        max_time, diagnostic_func_dict, every_N, ROW_CRP_ALPHA_GRID,
        COLUMN_CRP_ALPHA_GRID, S_GRID, MU_GRID, N_GRID, do_timing, CT_KERNEL,
        progress,):

    diagnostics_dict = collections.defaultdict(list)

    if diagnostic_func_dict is None:
        diagnostic_func_dict = dict()
        every_N = None

    p_State = State.p_State(
        M_c, T, X_L, X_D, SEED=SEED, ROW_CRP_ALPHA_GRID=ROW_CRP_ALPHA_GRID,
        COLUMN_CRP_ALPHA_GRID=COLUMN_CRP_ALPHA_GRID, S_GRID=S_GRID,
        MU_GRID=MU_GRID, N_GRID=N_GRID, CT_KERNEL=CT_KERNEL)

    with gu.Timer('all transitions', verbose=False) as timer:
        p_State.transition(
            kernel_list, n_steps, c, r, max_iterations, max_time,
            progress=progress,
            diagnostic_func_dict=diagnostic_func_dict,
            diagnostics_dict=diagnostics_dict,
            diagnostics_every_N=every_N)

    X_L_prime = p_State.get_X_L()
    X_D_prime = p_State.get_X_D()

    if do_timing:
        # Diagnostics and timing are exclusive.
        diagnostics_dict = timer.elapsed_secs

    return X_L_prime, X_D_prime, diagnostics_dict
def time_analyze_helper(table_data, dict_in):
    start_dims = du.get_state_shape(dict_in['X_L'])
    with gu.Timer('time_analyze_helper', verbose=False) as timer:
        inner_ret_dict = analyze_helper(table_data, dict_in)
    end_dims = du.get_state_shape(inner_ret_dict['X_L'])
    T = table_data['T']
    table_shape = (len(T), len(T[0]))
    ret_dict = dict(
        table_shape=table_shape,
        start_dims=start_dims,
        end_dims=end_dims,
        elapsed_secs=timer.elapsed_secs,
        kernel_list=dict_in['kernel_list'],
        n_steps=dict_in['n_steps'],
    )
    return ret_dict
def convergence_analyze_helper(table_data, data_dict, command_dict):
    gen_seed = data_dict['SEED']
    num_clusters = data_dict['num_clusters']
    num_cols = data_dict['num_cols']
    num_rows = data_dict['num_rows']
    num_views = data_dict['num_views']
    max_mean = data_dict['max_mean']
    n_test = data_dict['n_test']
    num_transitions = data_dict['n_steps']
    block_size = data_dict['block_size']
    init_seed = data_dict['init_seed']

    # generate some data
    T, M_r, M_c, data_inverse_permutation_indices = \
            du.gen_factorial_data_objects(gen_seed, num_clusters,
                    num_cols, num_rows, num_views,
                    max_mean=max_mean, max_std=1,
                    send_data_inverse_permutation_indices=True)
    view_assignment_ground_truth = \
            ctu.determine_synthetic_column_ground_truth_assignments(num_cols,
                    num_views)
    X_L_gen, X_D_gen = ttu.get_generative_clustering(
        M_c, M_r, T, data_inverse_permutation_indices, num_clusters, num_views)
    T_test = ctu.create_test_set(M_c, T, X_L_gen, X_D_gen, n_test, seed_seed=0)
    generative_mean_test_log_likelihood = \
            ctu.calc_mean_test_log_likelihood(M_c, T, X_L_gen, X_D_gen, T_test)

    # additional set up
    engine = LE.LocalEngine(init_seed)
    column_ari_list = []
    mean_test_ll_list = []
    elapsed_seconds_list = []

    # get initial ARI, test_ll
    with gu.Timer('initialize', verbose=False) as timer:
        X_L, X_D = engine.initialize(M_c,
                                     M_r,
                                     T,
                                     initialization='from_the_prior')
    column_ari = ctu.get_column_ARI(X_L, view_assignment_ground_truth)
    column_ari_list.append(column_ari)
    mean_test_ll = ctu.calc_mean_test_log_likelihood(M_c, T, X_L, X_D, T_test)
    mean_test_ll_list.append(mean_test_ll)
    elapsed_seconds_list.append(timer.elapsed_secs)

    # run blocks of transitions, recording ARI, test_ll progression
    completed_transitions = 0
    n_steps = min(block_size, num_transitions)
    while (completed_transitions < num_transitions):
        # We won't be limiting by time in the convergence runs
        with gu.Timer('initialize', verbose=False) as timer:
            X_L, X_D = engine.analyze(M_c,
                                      T,
                                      X_L,
                                      X_D,
                                      kernel_list=(),
                                      n_steps=n_steps,
                                      max_time=-1)
        completed_transitions = completed_transitions + block_size
        #
        column_ari = ctu.get_column_ARI(X_L, view_assignment_ground_truth)
        column_ari_list.append(column_ari)
        mean_test_ll = ctu.calc_mean_test_log_likelihood(
            M_c, T, X_L, X_D, T_test)
        mean_test_ll_list.append(mean_test_ll)
        elapsed_seconds_list.append(timer.elapsed_secs)

    ret_dict = dict(
        num_rows=num_rows,
        num_cols=num_cols,
        num_views=num_views,
        num_clusters=num_clusters,
        max_mean=max_mean,
        column_ari_list=column_ari_list,
        mean_test_ll_list=mean_test_ll_list,
        generative_mean_test_log_likelihood=generative_mean_test_log_likelihood,
        elapsed_seconds_list=elapsed_seconds_list,
        n_steps=num_transitions,
        block_size=block_size,
    )
    return ret_dict