def analyze_helper(table_data, data_dict, command_dict): M_c = table_data['M_c'] T = table_data['T'] SEED = data_dict['SEED'] X_L = data_dict['X_L'] X_D = data_dict['X_D'] kernel_list = command_dict['kernel_list'] n_steps = command_dict['n_steps'] c = command_dict['c'] r = command_dict['r'] max_time = command_dict['max_time'] engine = LE.LocalEngine(SEED) X_L_prime, X_D_prime = engine.analyze(M_c, T, X_L, X_D, kernel_list=kernel_list, n_steps=n_steps, c=c, r=r, max_time=max_time) SEED = engine.get_next_seed() # ret_dict = dict(SEED=SEED, X_L=X_L_prime, X_D=X_D_prime) return ret_dict
def mi_analyze_helper(table_data, data_dict, command_dict): gen_seed = data_dict['SEED'] crosscat_seed = data_dict['CCSEED'] num_clusters = data_dict['num_clusters'] num_cols = data_dict['num_cols'] num_rows = data_dict['num_rows'] num_views = data_dict['num_views'] corr = data_dict['corr'] burn_in = data_dict['burn_in'] mean_range = float(num_clusters)*2.0 # 32 bit signed int random.seed(gen_seed) get_next_seed = lambda : random.randrange(2147483647) # generate the stats T, M_c, M_r, X_L, X_D, view_assignment = mitu.generate_correlated_state(num_rows, num_cols, num_views, num_clusters, mean_range, corr, seed=gen_seed); table_data = dict(T=T,M_c=M_c) engine = LE.LocalEngine(crosscat_seed) X_L_prime, X_D_prime = engine.analyze(M_c, T, X_L, X_D, n_steps=burn_in) X_L = X_L_prime X_D = X_D_prime view_assignment = numpy.array(X_L['column_partition']['assignments']) # for each view calclate the average MI between all pairs of columns n_views = max(view_assignment)+1 MI = [] Linfoot = [] queries = [] MI = 0.0 pairs = 0.0 for view in range(n_views): columns_in_view = numpy.nonzero(view_assignment==view)[0] combinations = itertools.combinations(columns_in_view,2) for pair in combinations: any_pairs = True queries.append(pair) MI_i, Linfoot_i = iu.mutual_information(M_c, [X_L], [X_D], [pair], n_samples=1000) MI += MI_i[0][0] pairs += 1.0 if pairs > 0.0: MI /= pairs ret_dict = dict( id=data_dict['id'], dataset=data_dict['dataset'], sample=data_dict['sample'], mi=MI, ) return ret_dict
def initialize_helper(table_data, data_dict, command_dict): M_c = table_data['M_c'] M_r = table_data['M_r'] T = table_data['T'] SEED = data_dict['SEED'] initialization = command_dict['initialization'] engine = LE.LocalEngine(SEED) X_L, X_D = engine.initialize(M_c, M_r, T, initialization=initialization) SEED = engine.get_next_seed() # ret_dict = dict(SEED=SEED, X_L=X_L, X_D=X_D) return ret_dict
def convergence_analyze_helper(table_data, data_dict, command_dict): gen_seed = data_dict['SEED'] num_clusters = data_dict['num_clusters'] num_cols = data_dict['num_cols'] num_rows = data_dict['num_rows'] num_views = data_dict['num_views'] max_mean = data_dict['max_mean'] n_test = data_dict['n_test'] num_transitions = data_dict['n_steps'] block_size = data_dict['block_size'] init_seed = data_dict['init_seed'] # generate some data T, M_r, M_c, data_inverse_permutation_indices = \ du.gen_factorial_data_objects(gen_seed, num_clusters, num_cols, num_rows, num_views, max_mean=max_mean, max_std=1, send_data_inverse_permutation_indices=True) view_assignment_ground_truth = \ ctu.determine_synthetic_column_ground_truth_assignments(num_cols, num_views) X_L_gen, X_D_gen = ttu.get_generative_clustering( M_c, M_r, T, data_inverse_permutation_indices, num_clusters, num_views) T_test = ctu.create_test_set(M_c, T, X_L_gen, X_D_gen, n_test, seed_seed=0) generative_mean_test_log_likelihood = \ ctu.calc_mean_test_log_likelihood(M_c, T, X_L_gen, X_D_gen, T_test) # additional set up engine = LE.LocalEngine(init_seed) column_ari_list = [] mean_test_ll_list = [] elapsed_seconds_list = [] # get initial ARI, test_ll with gu.Timer('initialize', verbose=False) as timer: X_L, X_D = engine.initialize(M_c, M_r, T, initialization='from_the_prior') column_ari = ctu.get_column_ARI(X_L, view_assignment_ground_truth) column_ari_list.append(column_ari) mean_test_ll = ctu.calc_mean_test_log_likelihood(M_c, T, X_L, X_D, T_test) mean_test_ll_list.append(mean_test_ll) elapsed_seconds_list.append(timer.elapsed_secs) # run blocks of transitions, recording ARI, test_ll progression completed_transitions = 0 n_steps = min(block_size, num_transitions) while (completed_transitions < num_transitions): # We won't be limiting by time in the convergence runs with gu.Timer('initialize', verbose=False) as timer: X_L, X_D = engine.analyze(M_c, T, X_L, X_D, kernel_list=(), n_steps=n_steps, max_time=-1) completed_transitions = completed_transitions + block_size # column_ari = ctu.get_column_ARI(X_L, view_assignment_ground_truth) column_ari_list.append(column_ari) mean_test_ll = ctu.calc_mean_test_log_likelihood( M_c, T, X_L, X_D, T_test) mean_test_ll_list.append(mean_test_ll) elapsed_seconds_list.append(timer.elapsed_secs) ret_dict = dict( num_rows=num_rows, num_cols=num_cols, num_views=num_views, num_clusters=num_clusters, max_mean=max_mean, column_ari_list=column_ari_list, mean_test_ll_list=mean_test_ll_list, generative_mean_test_log_likelihood=generative_mean_test_log_likelihood, elapsed_seconds_list=elapsed_seconds_list, n_steps=num_transitions, block_size=block_size, ) return ret_dict