Ejemplo n.º 1
0
def runner(config):
    generate_args, analyze_args, inf_seed = _munge_config(config)
    # generate synthetic data
    T, M_c, M_r, X_L, X_D = du.generate_clean_state(max_mean=10, max_std=1,
            **generate_args)
    table_shape = map(len, (T, T[0]))
    start_dims = du.get_state_shape(X_L)
    # run engine with do_timing = True
    engine = LocalEngine(inf_seed)
    X_L, X_D, (elapsed_secs,) = engine.analyze(M_c, T, X_L, X_D,
            do_timing=True,
            **analyze_args
            )
    #
    end_dims = du.get_state_shape(X_L)
    same_shape = start_dims == end_dims
    summary = dict(
        elapsed_secs=elapsed_secs,
        same_shape=same_shape,
        )
    ret_dict = dict(
        config=config,
        summary=summary,
        table_shape=table_shape,
        start_dims=start_dims,
        end_dims=end_dims,
        )
    return ret_dict
Ejemplo n.º 2
0
 def gen_data(**kwargs):
     T, M_c, M_r, gen_X_L, gen_X_D = du.generate_clean_state(**kwargs)
     #
     engine = LocalEngine()
     sampled_T = gu.sample_T(engine, M_c, T, gen_X_L, gen_X_D)
     T_test = random.sample(sampled_T, n_test)
     gen_data_ll = ctu.calc_mean_test_log_likelihood(M_c, T, gen_X_L, gen_X_D, T)
     gen_test_set_ll = ctu.calc_mean_test_log_likelihood(M_c, T, gen_X_L, gen_X_D, T_test)
     #
     return T, M_c, M_r, T_test, gen_data_ll, gen_test_set_ll
Ejemplo n.º 3
0
 def gen_data(**kwargs):
     T, M_c, M_r, gen_X_L, gen_X_D = du.generate_clean_state(**kwargs)
     #
     engine = LocalEngine()
     sampled_T = gu.sample_T(engine, M_c, T, gen_X_L, gen_X_D)
     T_test = random.sample(sampled_T, n_test)
     gen_data_ll = ctu.calc_mean_test_log_likelihood(M_c, T, gen_X_L, gen_X_D, T)
     gen_test_set_ll = ctu.calc_mean_test_log_likelihood(M_c, T, gen_X_L, gen_X_D, T_test)
     #
     return T, M_c, M_r, T_test, gen_data_ll, gen_test_set_ll
Ejemplo n.º 4
0
def draw_a_cc_state(filename):
    rng_seed = random.randrange(10000)
    num_rows = 100
    num_cols = 50
    num_splits = 5
    num_clusters = 5

    nan_prop = .25

    table_name = 'plottest'
    generator_name = 'plottest_cc'

    # generate some clustered data
    ccmd = du.generate_clean_state(rng_seed, num_clusters, num_cols, num_rows,
                                   num_splits)
    T, _M_c, _M_r, _X_L, _X_D = ccmd

    for row in range(num_rows):
        for col in range(num_cols):
            if random.random() < nan_prop:
                T[row][col] = float('nan')

    input_df = pd.DataFrame(T, columns=['col_%i' % i for i in range(num_cols)])

    os.environ['BAYESDB_WIZARD_MODE'] = '1'
    bdb = bayeslite.bayesdb_open()
    bayesdb_read_pandas_df(bdb, table_name, input_df, create=True)
    bdb.execute('''
        create generator {} for {} using crosscat(guess(*))
    '''.format(generator_name, table_name))
    bdb.execute('initialize 4 models for {}'.format(generator_name))
    bdb.execute('analyze {} for 10 iterations wait'.format(generator_name))
    plt.figure(facecolor='white', tight_layout=False)
    draw_state(
        bdb,
        'plottest',
        'plottest_cc',
        0,
        separator_width=1,
        separator_color=(0., 0., 1., 1.),
        short_names=False,
        nan_color=(1, .15, .25, 1.))
    plt.savefig(filename)
Ejemplo n.º 5
0
def draw_a_cc_state(filename):
    rng_seed = random.randrange(10000)
    num_rows = 100
    num_cols = 50
    num_splits = 5
    num_clusters = 5

    nan_prop = .25

    table_name = 'plottest'
    generator_name = 'plottest_cc'

    # generate some clustered data
    ccmd = du.generate_clean_state(rng_seed, num_clusters, num_cols, num_rows,
                                   num_splits)
    T, _M_c, _M_r, _X_L, _X_D = ccmd

    for row in range(num_rows):
        for col in range(num_cols):
            if random.random() < nan_prop:
                T[row][col] = float('nan')

    input_df = pd.DataFrame(T, columns=['col_%i' % i for i in range(num_cols)])

    os.environ['BAYESDB_WIZARD_MODE']='1'
    bdb = bayeslite.bayesdb_open()
    bayesdb_read_pandas_df(bdb, table_name, input_df, create=True)
    bdb.execute('''
        create generator {} for {} using crosscat(guess(*))
    '''.format(generator_name, table_name))
    bdb.execute('initialize 4 models for {}'.format(generator_name))
    bdb.execute('analyze {} for 10 iterations wait'.format(generator_name))
    plt.figure(facecolor='white', tight_layout=False)
    draw_state(bdb, 'plottest', 'plottest_cc', 0,
               separator_width=1, separator_color=(0., 0., 1., 1.),
               short_names=False, nan_color=(1, .15, .25, 1.))
    plt.savefig(filename)