def generate_multinomial_data(next_seed, n_cols, n_rows, n_views): # generate the partitions random.seed(next_seed) cols_to_views = [0 for _ in range(n_cols)] rows_in_views_to_cols = [] for view in range(n_views): partition = eu.CRP(n_rows, 2.0) random.shuffle(partition) rows_in_views_to_cols.append(partition) # generate the data data = numpy.zeros((n_rows, n_cols), dtype=float) for col in range(n_cols): view = cols_to_views[col] for row in range(n_rows): cluster = rows_in_views_to_cols[view][row] data[row, col] = cluster T = data.tolist() M_r = du.gen_M_r_from_T(T) M_c = du.gen_M_c_from_T(T) T, M_c = du.convert_columns_to_multinomial(T, M_c, list(range(n_cols))) return T, M_r, M_c
def generate_multinomial_data(next_seed,n_cols,n_rows,n_views): # generate the partitions random.seed(next_seed) cols_to_views = [0 for _ in range(n_cols)] rows_in_views_to_cols = [] for view in range(n_views): partition = eu.CRP(n_rows,2.0) random.shuffle(partition) rows_in_views_to_cols.append(partition) # generate the data data = numpy.zeros((n_rows,n_cols),dtype=float) for col in range(n_cols): view = cols_to_views[col] for row in range(n_rows): cluster = rows_in_views_to_cols[view][row] data[row,col] = cluster T = data.tolist() M_r = du.gen_M_r_from_T(T) M_c = du.gen_M_c_from_T(T) T, M_c = du.convert_columns_to_multinomial(T, M_c, range(n_cols)) return T, M_r, M_c
num_cols, num_rows, num_splits, max_mean=max_mean, max_std=max_std, ) else: with open('SynData2.csv') as fh: import numpy import csv T = numpy.array([row for row in csv.reader(fh)], dtype=float).tolist() M_r = du.gen_M_r_from_T(T) M_c = du.gen_M_c_from_T(T) T = du.discretize_data(T, multinomial_column_indices) T, M_c = du.convert_columns_to_multinomial(T, M_c, multinomial_column_indices) # create the state p_State = State.p_State(M_c, T, N_GRID=N_GRID, SEED=inf_seed) p_State.plot_T(filename='T') print(M_c) print(numpy.array(T)) print(p_State) print("multinomial_column_indices: %s" % str(multinomial_column_indices)) def summarize_p_State(p_State): counts = [ view_state['row_partition_model']['counts'] for view_state in p_State.get_X_L()['view_state'] ]
# create the data if True: T, M_r, M_c = du.gen_factorial_data_objects( gen_seed, num_clusters, num_cols, num_rows, num_splits, max_mean=max_mean, max_std=max_std ) else: with open("SynData2.csv") as fh: import numpy import csv T = numpy.array([row for row in csv.reader(fh)], dtype=float).tolist() M_r = du.gen_M_r_from_T(T) M_c = du.gen_M_c_from_T(T) T = du.discretize_data(T, multinomial_column_indices) T, M_c = du.convert_columns_to_multinomial(T, M_c, multinomial_column_indices) # create the state p_State = State.p_State(M_c, T, N_GRID=N_GRID, SEED=inf_seed) p_State.plot_T(filename="T") print(M_c) print(numpy.array(T)) print(p_State) print("multinomial_column_indices: %s" % str(multinomial_column_indices)) def summarize_p_State(p_State): counts = [view_state["row_partition_model"]["counts"] for view_state in p_State.get_X_L()["view_state"]] format_list = "; ".join( ["s.num_views: %s", "cluster counts: %s", "s.column_crp_score: %.3f", "s.data_score: %.1f", "s.score:%.1f"] )