std_0_variable_flags = variables_train.std() == 0 variables_train = variables_train.drop( variables_train.columns[std_0_variable_flags], axis=1) variables_test = variables_test.drop( variables_test.columns[std_0_variable_flags], axis=1) numbers_of_x = np.arange(numbers_of_y[-1] + 1, variables_train.shape[1]) # standardize x and y autoscaled_variables_train = (variables_train - variables_train.mean(axis=0) ) / variables_train.std(axis=0, ddof=1) autoscaled_variables_test = (variables_test - variables_train.mean(axis=0) ) / variables_train.std(axis=0, ddof=1) # optimize hyperparameter in GTMR with CV model = GTM() model.cv_opt(autoscaled_variables_train, numbers_of_x, numbers_of_y, candidates_of_shape_of_map, candidates_of_shape_of_rbf_centers, candidates_of_variance_of_rbfs, candidates_of_lambda_in_em_algorithm, fold_number, number_of_iterations) model.display_flag = display_flag print('optimized shape of map :', model.shape_of_map) print('optimized shape of RBF centers :', model.shape_of_rbf_centers) print('optimized variance of RBFs :', model.variance_of_rbfs) print('optimized lambda in EM algorithm :', model.lambda_in_em_algorithm) # construct GTMR model model.fit(autoscaled_variables_train) if model.success_flag: # calculate of responsibilities
# load an iris dataset iris = load_iris() # input_dataset = pd.DataFrame(iris.data, columns=iris.feature_names) input_dataset = iris.data color = iris.target # autoscaling input_dataset = (input_dataset - input_dataset.mean(axis=0)) / input_dataset.std(axis=0, ddof=1) # construct SGTM model model = GTM(shape_of_map, shape_of_rbf_centers, variance_of_rbfs, lambda_in_em_algorithm, number_of_iterations, display_flag, sparse_flag=True) model.fit(input_dataset) if model.success_flag: # calculate of responsibilities responsibilities = model.responsibility(input_dataset) means, modes = model.means_modes(input_dataset) # plot the mean of responsibilities plt.rcParams['font.size'] = 18 plt.figure(figsize=figure.figaspect(1)) plt.scatter(means[:, 0], means[:, 1], c=color) plt.ylim(-1.1, 1.1)
plt.rcParams['font.size'] = 18 fig = plt.figure() ax = fig.add_subplot(111, projection='3d') p = ax.scatter(x[:, 0], x[:, 1], x[:, 2], c=y) fig.colorbar(p) plt.show() variables = np.c_[x, y] # standardize x and y autoscaled_variables = (variables - variables.mean(axis=0)) / variables.std( axis=0, ddof=1) autoscaled_target_y_value = (target_y_value - variables.mean( axis=0)[numbers_of_y]) / variables.std(axis=0, ddof=1)[numbers_of_y] # construct GTMR model model = GTM(shape_of_map, shape_of_rbf_centers, variance_of_rbfs, lambda_in_em_algorithm, number_of_iterations, display_flag) model.fit(autoscaled_variables) if model.success_flag: # calculate of responsibilities responsibilities = model.responsibility(autoscaled_variables) means, modes = model.means_modes(autoscaled_variables) plt.rcParams['font.size'] = 18 for y_number in numbers_of_y: # plot the mean of responsibilities plt.scatter(means[:, 0], means[:, 1], c=variables[:, y_number]) plt.colorbar() plt.ylim(-1.1, 1.1) plt.xlim(-1.1, 1.1) plt.xlabel('z1 (mean)')
parameters_and_k3nerror = [] all_calculation_numbers = len(candidates_of_shape_of_map) * len( candidates_of_shape_of_rbf_centers) * len( candidates_of_variance_of_rbfs) * len( candidates_of_lambda_in_em_algorithm) calculation_number = 0 for shape_of_map_grid in candidates_of_shape_of_map: for shape_of_rbf_centers_grid in candidates_of_shape_of_rbf_centers: for variance_of_rbfs_grid in candidates_of_variance_of_rbfs: for lambda_in_em_algorithm_grid in candidates_of_lambda_in_em_algorithm: calculation_number += 1 print([calculation_number, all_calculation_numbers]) # construct GTM model model = GTM( [shape_of_map_grid, shape_of_map_grid], [shape_of_rbf_centers_grid, shape_of_rbf_centers_grid], variance_of_rbfs_grid, lambda_in_em_algorithm_grid, number_of_iterations, display_flag) model.fit(input_dataset) if model.success_flag: # calculate of responsibilities responsibilities = model.responsibility(input_dataset) # calculate the mean of responsibilities means = responsibilities.dot(model.map_grids) # calculate k3n-error k3nerror_of_gtm = k3nerror( input_dataset, means, k_in_k3nerror) + k3nerror( means, input_dataset, k_in_k3nerror) else: k3nerror_of_gtm = 10**100 parameters_and_k3nerror.append([
k_in_k3nerror = 10 bo_iteration_number = 15 # load an iris dataset iris = load_iris() # input_dataset = pd.DataFrame(iris.data, columns=iris.feature_names) input_dataset = iris.data color = iris.target # autoscaling input_dataset = (input_dataset - input_dataset.mean(axis=0)) / input_dataset.std(axis=0, ddof=1) # optimize hyperparameter in GTMR with CV and BO model = GTM(display_flag=True) model.k3nerror_bo(input_dataset, candidates_of_shape_of_map, candidates_of_shape_of_rbf_centers, candidates_of_variance_of_rbfs, candidates_of_lambda_in_em_algorithm, number_of_iterations, k_in_k3nerror, bo_iteration_number) print('Optimized hyperparameters') print('optimized shape of map :', model.shape_of_map) print('optimized shape of RBF centers :', model.shape_of_rbf_centers) print('optimized variance of RBFs :', model.variance_of_rbfs) print('optimized lambda in EM algorithm :', model.lambda_in_em_algorithm) # construct GTM model model.fit(input_dataset) # calculate of responsibilities