def runner(config): generate_args, analyze_args, inf_seed = _munge_config(config) # generate synthetic data T, M_c, M_r, X_L, X_D = du.generate_clean_state(max_mean=10, max_std=1, **generate_args) table_shape = map(len, (T, T[0])) start_dims = du.get_state_shape(X_L) # run engine with do_timing = True engine = LocalEngine(inf_seed) X_L, X_D, (elapsed_secs,) = engine.analyze(M_c, T, X_L, X_D, do_timing=True, **analyze_args ) # end_dims = du.get_state_shape(X_L) same_shape = start_dims == end_dims summary = dict( elapsed_secs=elapsed_secs, same_shape=same_shape, ) ret_dict = dict( config=config, summary=summary, table_shape=table_shape, start_dims=start_dims, end_dims=end_dims, ) return ret_dict
def gen_data(**kwargs): T, M_c, M_r, gen_X_L, gen_X_D = du.generate_clean_state(**kwargs) # engine = LocalEngine() sampled_T = gu.sample_T(engine, M_c, T, gen_X_L, gen_X_D) T_test = random.sample(sampled_T, n_test) gen_data_ll = ctu.calc_mean_test_log_likelihood(M_c, T, gen_X_L, gen_X_D, T) gen_test_set_ll = ctu.calc_mean_test_log_likelihood(M_c, T, gen_X_L, gen_X_D, T_test) # return T, M_c, M_r, T_test, gen_data_ll, gen_test_set_ll
def draw_a_cc_state(filename): rng_seed = random.randrange(10000) num_rows = 100 num_cols = 50 num_splits = 5 num_clusters = 5 nan_prop = .25 table_name = 'plottest' generator_name = 'plottest_cc' # generate some clustered data ccmd = du.generate_clean_state(rng_seed, num_clusters, num_cols, num_rows, num_splits) T, _M_c, _M_r, _X_L, _X_D = ccmd for row in range(num_rows): for col in range(num_cols): if random.random() < nan_prop: T[row][col] = float('nan') input_df = pd.DataFrame(T, columns=['col_%i' % i for i in range(num_cols)]) os.environ['BAYESDB_WIZARD_MODE'] = '1' bdb = bayeslite.bayesdb_open() bayesdb_read_pandas_df(bdb, table_name, input_df, create=True) bdb.execute(''' create generator {} for {} using crosscat(guess(*)) '''.format(generator_name, table_name)) bdb.execute('initialize 4 models for {}'.format(generator_name)) bdb.execute('analyze {} for 10 iterations wait'.format(generator_name)) plt.figure(facecolor='white', tight_layout=False) draw_state( bdb, 'plottest', 'plottest_cc', 0, separator_width=1, separator_color=(0., 0., 1., 1.), short_names=False, nan_color=(1, .15, .25, 1.)) plt.savefig(filename)
def draw_a_cc_state(filename): rng_seed = random.randrange(10000) num_rows = 100 num_cols = 50 num_splits = 5 num_clusters = 5 nan_prop = .25 table_name = 'plottest' generator_name = 'plottest_cc' # generate some clustered data ccmd = du.generate_clean_state(rng_seed, num_clusters, num_cols, num_rows, num_splits) T, _M_c, _M_r, _X_L, _X_D = ccmd for row in range(num_rows): for col in range(num_cols): if random.random() < nan_prop: T[row][col] = float('nan') input_df = pd.DataFrame(T, columns=['col_%i' % i for i in range(num_cols)]) os.environ['BAYESDB_WIZARD_MODE']='1' bdb = bayeslite.bayesdb_open() bayesdb_read_pandas_df(bdb, table_name, input_df, create=True) bdb.execute(''' create generator {} for {} using crosscat(guess(*)) '''.format(generator_name, table_name)) bdb.execute('initialize 4 models for {}'.format(generator_name)) bdb.execute('analyze {} for 10 iterations wait'.format(generator_name)) plt.figure(facecolor='white', tight_layout=False) draw_state(bdb, 'plottest', 'plottest_cc', 0, separator_width=1, separator_color=(0., 0., 1., 1.), short_names=False, nan_color=(1, .15, .25, 1.)) plt.savefig(filename)