def total_sobol_indices(self, nx_samples=None, directory_path=None, create_plot=True, batch_size=10): # Computes total sobol indices and generate bar plot. # Inputs: # nx_samples = the number of sample points for the Monte Carlo integration. Will default # to a multiple of the number of variables if not provided # directory_path := directory where to save the Sobol barplot if needed. Defaults to current directory # if not specified # create_plot := specifies if the total Sobol barplot should be generated are not # Outputs: # Sobol_total := dictionary containining containing the total Sobol indices values if self.n_inputs == 1: print('Not enough variables to perform sensitivity analysis.') return if len(self.hyperpar_samples) == 0: raise Exception( 'Hyperparameter samples must be generated or retrieved first.') if directory_path == None: directory_path = os.getcwd() if not (os.path.isdir(directory_path)): raise Exception('Invalid directory path ', directory_path) loc_samples = self.hyperpar_samples['gp_constant_mean_function'] varm_samples = self.hyperpar_samples['kernel_variance'] beta_samples = self.hyperpar_samples['kernel_inverse_lengthscales'] varc_samples = self.hyperpar_samples['common_kernel_variance'] hyperpar_samples = [ loc_samples, varm_samples, beta_samples, varc_samples ] if nx_samples == None: nx_samples = 300 * self.n_inputs selected_vars = [i for i in range(self.n_inputs)] M = self.n_tasks D = self.n_inputs ybase = sensitivity.allEffect(self.model, self.Rangenorm, nx_samples, hyperpar_samples) ey_square = sensitivity.direct_samples(self.model, self.Rangenorm, nx_samples, hyperpar_samples) ey_square = np.reshape(ey_square, (M, nx_samples, 2)) if D <= 6: y_remaining = sensitivity.compute_remaining_effect( self.model, self.Rangenorm, selected_vars, nx_samples, hyperpar_samples) else: y_remaining = {} n_batches = D // batch_size vars_groups = np.array_split(selected_vars, n_batches) completed = 0 for group in vars_groups: y_group = sensitivity.compute_remaining_effect( self.model, self.Rangenorm, group, nx_samples, hyperpar_samples) completed += len(group) progress = 100.0 * completed / D print( "Total Sobol indices computation: {:.2f}% complete".format( progress)) y_remaining.update(y_group) e1 = np.mean(ey_square[:, :, 1] + np.square(ey_square[:, :, 0]), axis=1) e2 = ybase[0][0, :, 1] + np.square(ybase[0][0, :, 0]) si_remaining = np.zeros((M, D)) for i in range(D): key = tuple([i]) si_remaining[:, i] = np.mean(y_remaining[key][:, :, 1] + np.square(y_remaining[key][:, :, 0]), axis=0) si_remaining = (si_remaining - e2[:, np.newaxis]) / ( e1[:, np.newaxis] - e2[:, np.newaxis]) si_remaining = np.maximum(si_remaining, 0) si_total = 1 - si_remaining si_total = np.maximum(si_total, 0) if create_plot: # generating the plot n_selected = min(40, D) y_pos = np.arange(n_selected) for j in range(M): order = np.argsort(-si_total[j, :]) selected = order[: n_selected] # taking the top 40 values to plot plt.figure(figsize=(12, 12)) # Create bars plt.barh(y_pos, si_total[j, selected]) new_labels = [ self.input_labels[selected[i]] for i in range(n_selected) ] title = 'top_total_sobol_indices_for_' + self.output_labels[j] plt.title(title) # Create names on the x-axis plt.yticks(y_pos, new_labels) figpath = title + '.png' figpath = os.path.join(directory_path, figpath) plt.savefig(figpath) plt.close() Sobol_total = {} for i in range(self.n_inputs): l = self.input_labels[i] Sobol_total[l] = si_total[:, i] return Sobol_total
def sobol_indices(self, max_order=2, S=None, nx_samples=None, directory_path=None, create_plot=True, batch_size=10): # Computes sobol indices and generate bar plot. # Inputs: # Sobol_store := dictionary containing previously computed Sobol indices. The computation of # Sobol indices is a recursive computation # max_order := maximum order of sobol indices to compute # nx_samples = the number of sample points for the Monte Carlo integration. Will default # to a multiple of the number of variables if not provided # directory_path := directory where to save the Sobol barplot if needed. Defaults to current directory # if not specified # create_plot := specifies if the Sobol barplot should be generated are not # Outputs: # Sobol := dictionary containing the Sobol indices values if max_order > self.n_inputs: raise Exception( 'max_order cannot be greater than the number of variables') if self.n_inputs == 1: print('Not enough variables to perform sensitivity analysis.') return if len(self.hyperpar_samples) == 0: raise Exception( 'Hyperparameter samples must be generated or retrieved first.') if directory_path == None: directory_path = os.getcwd() if not (os.path.isdir(directory_path)): raise Exception('Invalid directory path ', directory_path) # get list of gpu devices to parallelize computation if possible loc_samples = self.hyperpar_samples['gp_constant_mean_function'] varm_samples = self.hyperpar_samples['kernel_variance'] beta_samples = self.hyperpar_samples['kernel_inverse_lengthscales'] varc_samples = self.hyperpar_samples['common_kernel_variance'] hyperpar_samples = [ loc_samples, varm_samples, beta_samples, varc_samples ] if nx_samples == None: nx_samples = 300 * self.n_inputs selected_vars = [i for i in range(self.n_inputs)] initial_list = sensitivity.powerset(selected_vars, 1, max_order) subsets_list = [] if S != None: print('Initial number of Sobol computations: ', len(initial_list)) try: for item in initial_list: l = sensitivity.generate_label(item, self.input_labels) if not (l in S.keys()): subsets_list.append(item) print('New number of Sobol computations: ', len(subsets_list)) except Exception as e: traceback.print_exc() print('Invalid Sobol indices dictionary') else: subsets_list = initial_list n_subset = len(subsets_list) M = self.n_tasks if n_subset > 0: ybase = sensitivity.allEffect(self.model, self.Rangenorm, nx_samples, hyperpar_samples) ey_square = sensitivity.direct_samples(self.model, self.Rangenorm, nx_samples, hyperpar_samples) ey_square = np.reshape(ey_square, (M, nx_samples, 2)) if n_subset <= batch_size: y_higher_order = sensitivity.mainHigherOrder( self.model, self.Rangenorm, subsets_list, nx_samples, hyperpar_samples) else: y_higher_order = {} completed = 0 n_groups = math.ceil(n_subset / batch_size) for i in range(n_groups): group = subsets_list[i * batch_size:(i + 1) * batch_size] y_group = sensitivity.mainHigherOrder( self.model, self.Rangenorm, group, nx_samples, hyperpar_samples) completed += len(group) progress = 100.0 * completed / n_subset print("Sobol indices computation: {:.2f}% complete".format( progress)) y_higher_order.update(y_group) e1 = np.mean(ey_square[:, :, 1] + np.square(ey_square[:, :, 0]), axis=1) e2 = ybase[0][0, :, 1] + np.square(ybase[0][0, :, 0]) # This will store the quantities E*[Vsub]/E*(Var(Y)) where Vsub = E[Y|Xsub] and Y is normalized quotient_variances = {} for idx in range(n_subset): key = tuple(subsets_list[idx]) quotient_variances[key] = np.mean( y_higher_order[key][:, :, 1] + np.square(y_higher_order[key][:, :, 0]), axis=0) quotient_variances[key] = (quotient_variances[key] - e2) / (e1 - e2) if S != None: Sobol = S else: Sobol = {} for i in range(n_subset): key = tuple(subsets_list[i]) sensitivity.compute_Sobol(Sobol, quotient_variances, key, self.input_labels) all_labels = list(Sobol.keys()) # plotting n_selected = min(40, len(all_labels)) y_pos = np.arange(n_selected) if create_plot: si_all = {} for j in range(M): si_all[j] = [] for i in range(len(all_labels)): key = all_labels[i] si_all[j].append(Sobol[key][j]) si_all[j] = np.array(si_all[j]) order = np.argsort(-si_all[j]) selected = order[: n_selected] # taking the top 40 values to plot plt.figure(figsize=(12, 12)) # Create bars plt.barh(y_pos, si_all[j][selected]) new_labels = [ all_labels[selected[i]] for i in range(n_selected) ] title = 'top_sobol_indices_for_' + self.output_labels[j] plt.title(title) # Create names on the x-axis plt.yticks(y_pos, new_labels) figpath = title + '.png' figpath = os.path.join(directory_path, figpath) plt.savefig(figpath) plt.close() return Sobol
def maineffect_and_interaction(self, grid_points=30, nx_samples=None, directory_path1=None, directory_path2=None, create_plot=True, batch_size=10): # Computes and generate main_effect function plots # Inputs: # grid_points:= the number of grid poinst for the plots # nx_samples = the number of sample points for the Monte Carlo integration. Will default # to a multiple of the number of variables if not provided # directory_path1 := directory where to save the main effect plots if needed. Defaults to current directory # if not specified # directory_path2 := directory where to save the interaction surface plots if needed. Defaults to current directory # if not specified # create_plot := specifies if the plost should be generated are not # Outputs: # main, interaction: = dictionaries containing values for the mean and interaction functions # Main effect if self.n_inputs == 1: print('Not enough variables to perform sensitivity analysis.') return if len(self.hyperpar_samples) == 0: raise Exception( 'Hyperparameter samples must be generated or retrieved first.') if directory_path1 == None: directory_path1 = os.getcwd() if not (os.path.isdir(directory_path1)): raise Exception('Invalid directory path ', directory_path1) if directory_path2 == None: directory_path2 = os.getcwd() if not (os.path.isdir(directory_path2)): raise Exception('Invalid directory path ', directory_path2) # get list of gpu devices to parallelize computation if possible M = self.n_tasks D = self.n_inputs mean_y, std_y = self.scaling_output loc_samples = self.hyperpar_samples['gp_constant_mean_function'] varm_samples = self.hyperpar_samples['kernel_variance'] beta_samples = self.hyperpar_samples['kernel_inverse_lengthscales'] varc_samples = self.hyperpar_samples['common_kernel_variance'] hyperpar_samples = [ loc_samples, varm_samples, beta_samples, varc_samples ] if nx_samples == None: nx_samples = 400 * D selected_vars = [i for i in range(D)] if D <= 6: y_main = sensitivity.mainEffect(self.model, self.Rangenorm, selected_vars, nx_samples, hyperpar_samples, grid_points) else: y_main = {} n_batches = D // batch_size vars_groups = np.array_split(selected_vars, n_batches) completed = 0 for group in vars_groups: y_group = sensitivity.mainEffect(self.model, self.Rangenorm, group, nx_samples, hyperpar_samples, grid_points) completed += len(group) progress = 100.0 * completed / self.n_inputs print("Main effect computation: {:.2f}% complete".format( progress)) y_main.update(y_group) ybase = sensitivity.allEffect(self.model, self.Rangenorm, nx_samples, hyperpar_samples) z_mean = np.zeros((M, D, grid_points)) z_std = np.zeros((M, D, grid_points)) for i in range(D): for j in range(M): key = tuple([i]) z_mean[j, i, :] = y_main[key][:, j, 0] - ybase[0][0, j, 0] # The next 3 lines give an approximation of the standard deviation of the normalized main effect function E[Y|xi] lower_app = np.sqrt( np.abs( np.sqrt(y_main[key][:, j, 1]) - np.sqrt(ybase[0][0, j, 1]))) upper_app = np.sqrt(y_main[key][:, j, 1]) + np.sqrt( ybase[0][0, j, 1]) z_std[j, i, :] = (lower_app + upper_app) / 2.0 # Converting to the proper scale and plotting main = {} for i in range(D): for j in range(M): y = z_mean[j, i, :] * std_y[0, j] + mean_y[j] y_std = z_std[j, i, :] * std_y[0, j] x = np.linspace(self.Range[i, 0], self.Range[i, 1], grid_points) key = self.output_labels[j] + '_vs_' + self.input_labels[i] main[key] = {} main[key]['inputs'] = x main[key]['output_mean'] = y main[key]['output_std'] = y_std if create_plot: fig, axes = plt.subplots(nrows=M, ncols=D, sharex='col', sharey='row', figsize=(15, 15)) for i in range(D): for j in range(M): key = self.output_labels[j] + '_vs_' + self.input_labels[i] x = main[key]['inputs'] y = main[key]['output_mean'] y_std = main[key]['output_std'] axes[j, i].plot(x, y, label=self.input_labels[i]) axes[j, i].fill_between(x, y - 2 * y_std, y + 2 * y_std, alpha=0.2, color='orange') axes[j, i].grid() axes[j, i].legend() title = 'main_effects' plt.title(title) figpath = title + '.png' figpath = os.path.join(directory_path1, figpath) plt.savefig(figpath) plt.close(fig) #--------------------------------------------------------------------- # Interaction effect selected_pairs = [] for i in range(D - 1): for j in range(i + 1, D): selected_pairs.append([i, j]) selected_pairs = np.array(selected_pairs) n_pairs = len(selected_pairs) if n_pairs <= batch_size: y_int = sensitivity.mainInteraction(self.model, self.Rangenorm, selected_pairs, nx_samples, hyperpar_samples, grid_points) else: y_int = {} n_batches = n_pairs // batch_size pairs_groups = np.array_split(selected_pairs, n_batches, axis=0) completed = 0 for group in pairs_groups: y_group = sensitivity.mainInteraction(self.model, self.Rangenorm, group, nx_samples, hyperpar_samples, grid_points) completed += len(group) progress = 100.0 * completed / n_pairs print("Main interaction computation: {:.2f}% complete".format( progress)) y_int.update(y_group) z_intmean = np.zeros((M, n_pairs, grid_points, grid_points)) z_intstd = np.zeros((M, n_pairs, grid_points, grid_points)) for k in range(n_pairs): key = tuple(selected_pairs[k]) j1, j2 = selected_pairs[k] idx1 = selected_vars.index(j1) idx2 = selected_vars.index(j2) key1 = tuple([idx1]) key2 = tuple([idx2]) y_slice = np.reshape(y_int[key], (grid_points, grid_points, M, 2)) for j in range(M): v1 = y_main[key1][:, j, 1] v2 = y_main[key2][:, j, 0] p1, p2 = np.meshgrid(v1, v2) w1 = np.sqrt(y_main[key1][:, j, 1]) w2 = np.sqrt(y_main[key2][:, j, 1]) q1, q2 = np.meshgrid(w1, w2) z_intmean[j, k, :, :] = y_slice[:, :, j, 0] - p1 - p2 + ybase[0][0, j, 0] upper_app = np.sqrt(y_slice[:, :, j, 1]) + q1 + q2 + np.sqrt( ybase[0][0, j, 1]) lower_app = np.abs( np.sqrt(y_slice[:, :, j, 1]) - q1 - q2 + np.sqrt(ybase[0][0, j, 1])) z_intstd[j, k, :, :] = (upper_app + lower_app) / 2.0 # Converting to the proper scale and storing interaction = {} for k in range(n_pairs): for j in range(M): item = selected_pairs[k] j1, j2 = item x = np.linspace(self.Range[j1, 0], self.Range[j1, 1], grid_points) y = np.linspace(self.Range[j2, 0], self.Range[j2, 1], grid_points) Z = z_intmean[j, k, :, :] * std_y[0, j] + mean_y[j] Zstd = z_intstd[j, k, :, :] * std_y[0, j] key = self.output_labels[j] + '_vs_' + self.input_labels[ j1] + '_&_' + self.input_labels[j2] X, Y = np.meshgrid(x, y) interaction[key] = {} interaction[key]['input1'] = X interaction[key]['input2'] = Y interaction[key]['output_mean'] = Z interaction[key]['output_std'] = Zstd if create_plot: # Bounds for the interaction surface plot zmin = np.amin(z_intmean, axis=(1, 2, 3)) * std_y[0, :] + mean_y zmax = np.amax(z_intmean, axis=(1, 2, 3)) * std_y[0, :] + mean_y minn = np.amin(z_intstd, axis=(1, 2, 3)) * std_y[0, :] maxx = np.amax(z_intstd, axis=(1, 2, 3)) * std_y[0, :] for k in range(n_pairs): for j in range(M): item = selected_pairs[k] j1, j2 = item key = self.output_labels[j] + '_vs_' + self.input_labels[ j1] + '_&_' + self.input_labels[j2] X = interaction[key]['input1'] Y = interaction[key]['input2'] Z = interaction[key]['output_mean'] Zstd = interaction[key]['output_std'] fig = plt.figure(figsize=(20, 10)) norm = mpl.colors.Normalize(minn[j], maxx[j]) m = plt.cm.ScalarMappable(norm=norm, cmap='jet') m.set_array(Zstd) m.set_clim(minn[j], maxx[j]) color_dimension = Zstd fcolors = m.to_rgba(color_dimension) ax = fig.gca(projection='3d') ax.plot_surface(Y, X, Z, rstride=1, cstride=1, facecolors=fcolors, shade=False) title = key ax.set_title(title) ax.set_xlabel(self.input_labels[j2]) ax.set_ylabel(self.input_labels[j1]) ax.set_zlim(zmin[j], zmax[j]) plt.gca().invert_xaxis() plt.colorbar(m) figpath = title + '.png' figpath = os.path.join(directory_path2, figpath) plt.savefig(figpath) plt.close(fig) return main, interaction
def total_sobol_indices(self, type='simulator', nx_samples=None, directory_path=None, create_plot=True, batch_size=10): # Computes total sobol indices and generate bar plot. # Inputs: # nx_samples = the number of sample points for the Monte Carlo integration. Will default # to a multiple of the number of variables if not provided # directory_path := directory where to save the Sobol barplot if needed. Defaults to current directory # if not specified # create_plot := specifies if the Sobol barplot should be generated are not # Outputs: # Sobol_total := dictionary containining containing the total Sobol indices values if len(self.hyperpar_samples) == 0: raise Exception( 'Hyperparameter samples must be generated or retrieved first.') if directory_path == None: directory_path = os.getcwd() if not (os.path.isdir(directory_path)): raise Exception('Invalid directory path ', directory_path) mean_sim, std_sim = self.scaling_input mean_y, std_y = self.scaling_output if type == 'simulator': used_labels = self.labels used_Range = self.Range used_Rangenorm = self.Rangenorm n_vars = self.n_inputs + self.n_pars elif type == 'discrepancy': used_labels = self.input_labels used_Range = self.Range[:self.n_inputs, :] used_Rangenorm = self.Rangenorm[:self.n_inputs, :] n_vars = self.n_inputs else: raise Exception('Invalid type') if n_vars == 1: print('Not enough variables to perform sensitivity analysis.') return varsim_samples = self.hyperpar_samples['sim_kernel_variance'] vard_samples = self.hyperpar_samples['disc_kernel_variance'] betasx_samples = self.hyperpar_samples[ 'sim_inputs_kernel_inverse_lengthscales'] betaspar_samples = self.hyperpar_samples[ 'sim_pars_kernel_inverse_lengthscales'] betad_samples = self.hyperpar_samples[ 'disc_kernel_inverse_lengthscales'] loc_samples = self.hyperpar_samples['sim_gp_constant_mean_function'] par_samples = self.par_samples hyperpar_samples = [ loc_samples, varsim_samples, betaspar_samples, betasx_samples, betad_samples, vard_samples ] if nx_samples == None: nx_samples = 300 * n_vars selected_vars = [i for i in range(n_vars)] ybase = sensitivity.allEffect(self.model, used_Rangenorm, nx_samples, hyperpar_samples, par_samples, type) ey_square = sensitivity.direct_samples(self.model, used_Rangenorm, nx_samples, hyperpar_samples, par_samples, type) # y_remaining = sensitivity.compute_remaining_effect(self.model, used_Rangenorm, selected_vars, nx_samples, hyperpar_samples, devices_list, par_samples, type) if n_vars <= batch_size: y_remaining = sensitivity.compute_remaining_effect( self.model, used_Rangenorm, selected_vars, nx_samples, hyperpar_samples, 60, par_samples, type) else: y_remaining = {} n_batches = n_vars // batch_size vars_groups = np.array_split(selected_vars, n_batches) completed = 0 for group in vars_groups: y_group = sensitivity.compute_remaining_effect( self.model, used_Rangenorm, group, nx_samples, hyperpar_samples, 60, par_samples, type) completed += len(group) progress = 100.0 * completed / n_vars print( "Total Sobol indices computation: {:.2f}% complete".format( progress)) y_remaining.update(y_group) e1 = np.mean(ey_square[:, 1] + np.square(ey_square[:, 0])) e2 = ybase[0][0, 1] + np.square(ybase[0][0, 0]) si_remaining = np.zeros(n_vars) for i in range(n_vars): key = tuple([i]) si_remaining[i] = np.mean(y_remaining[key][:, 1] + np.square(y_remaining[key][:, 0])) si_remaining = (si_remaining - e2) / (e1 - e2) si_remaining = np.maximum(si_remaining, 0) si_total = 1 - si_remaining si_total = np.maximum(si_total, 0) if create_plot: # generating the plot order = np.argsort(-si_total) n_selected = min(40, len(si_total)) selected = order[:n_selected] # taking the top 40 values to plot y_pos = np.arange(n_selected) plt.figure(figsize=(12, 12)) # Create bars plt.barh(y_pos, si_total[selected]) new_labels = [used_labels[selected[i]] for i in range(n_selected)] title = 'top_total_sobol_indices' plt.title(title) # Create names on the x-axis plt.yticks(y_pos, new_labels) figpath = title + '.png' figpath = os.path.join(directory_path, figpath) plt.savefig(figpath) plt.close() Sobol_total = {} for i in range(n_vars): l = used_labels[i] Sobol_total[l] = si_total[i] return Sobol_total
def maineffect_and_interaction(self, type='simulator', grid_points=30, nx_samples=None, directory_path1=None, directory_path2=None, create_plot=True, batch_size=10): # Computes and generate main_effect function plots # Inputs: # type := string that specifies for which gaussian process we are performing the sensitivity analysis. # allowed values = 'simulator', 'discrepancy' # grid_points:= the number of grid poinst for the plots # nx_samples = the number of sample points for the Monte Carlo integration. Will default # to a multiple of the number of variables if not provided # directory_path1 := directory where to save the main effect plots if needed. Defaults to current directory # if not specified # directory_path2 := directory where to save the interaction surface plots if needed. Defaults to current directory # if not specified # create_plot := specifies if the plost should be generated are not # Outputs: # main, interaction: = dictionaries containing values for the mean and interaction functions if len(self.hyperpar_samples) == 0: raise Exception('Execute run_mcmc first.') if directory_path1 == None: directory_path1 = os.getcwd() if not (os.path.isdir(directory_path1)): raise Exception('Invalid directory path ', directory_path1) if directory_path2 == None: directory_path2 = os.getcwd() if not (os.path.isdir(directory_path2)): raise Exception('Invalid directory path ', directory_path2) mean_sim, std_sim = self.scaling_input mean_y, std_y = self.scaling_output if type == 'simulator': used_labels = self.labels used_Range = self.Range used_Rangenorm = self.Rangenorm n_vars = self.n_inputs + self.n_pars elif type == 'discrepancy': used_labels = self.input_labels used_Range = self.Range[:self.n_inputs, :] used_Rangenorm = self.Rangenorm[:self.n_inputs, :] n_vars = self.n_inputs else: raise Exception('Invalid type') if n_vars == 1: print('Not enough variables to perform sensitivity analysis.') return varsim_samples = self.hyperpar_samples['sim_kernel_variance'] vard_samples = self.hyperpar_samples['disc_kernel_variance'] betasx_samples = self.hyperpar_samples[ 'sim_inputs_kernel_inverse_lengthscales'] betaspar_samples = self.hyperpar_samples[ 'sim_pars_kernel_inverse_lengthscales'] betad_samples = self.hyperpar_samples[ 'disc_kernel_inverse_lengthscales'] loc_samples = self.hyperpar_samples['sim_gp_constant_mean_function'] par_samples = self.par_samples hyperpar_samples = [ loc_samples, varsim_samples, betaspar_samples, betasx_samples, betad_samples, vard_samples ] # Main effect if nx_samples == None: nx_samples = 300 * n_vars selected_vars = [i for i in range(n_vars)] ybase = sensitivity.allEffect(self.model, used_Rangenorm, nx_samples, hyperpar_samples, par_samples, type) if n_vars <= batch_size: y_main = sensitivity.mainEffect(self.model, used_Rangenorm, selected_vars, nx_samples, hyperpar_samples, grid_points, par_samples, type) else: y_main = {} n_batches = n_vars // batch_size vars_groups = np.array_split(selected_vars, n_batches) completed = 0 for group in var_groups: y_group = sensitivity.mainEffect(self.model, used_Rangenorm, group, nx_samples, hyperpar_samples, grid_points, par_samples, type) completed += len(group) progress = 100.0 * completed / n_vars print("Main effect computation: {:.2f}% complete".format( progress)) y_main.update(y_group) z_mean = np.zeros((n_vars, grid_points)) z_std = np.zeros((n_vars, grid_points)) for i in range(n_vars): key = tuple([i]) z_mean[i, :] = y_main[key][:, 0] - ybase[0][0, 0] # The next 3 lines give an approximation of the standard deviation of the normalized main effect function E[Y|xi] - E[Y] lower_app = np.sqrt( np.abs(np.sqrt(y_main[key][:, 1]) - np.sqrt(ybase[0][0, 1]))) upper_app = np.sqrt(y_main[key][:, 1]) + np.sqrt(ybase[0][0, 1]) z_std[i, :] = (lower_app + upper_app) / 2.0 # Converting to the proper scale and storing main = {} for i in range(n_vars): y = z_mean[i, :] * std_y + mean_y y_std = z_std[i, :] * std_y x = np.linspace(used_Range[i, 0], used_Range[i, 1], grid_points) key = used_labels[i] main[key] = {} main[key]['inputs'] = x main[key]['output_mean'] = y main[key]['output_std'] = y_std if create_plot: print('Generating main effect plots.') if n_vars <= 6: fig, axes = plt.subplots(nrows=1, ncols=n_vars, sharey=True, figsize=(20, 10)) for i in range(n_vars): key = self.labels[i] x = main[key]['inputs'] y = main[key]['output_mean'] y_std = main[key]['output_std'] axes[i].plot(x, y, label=self.labels[i]) axes[i].fill_between(x, y - 2 * y_std, y + 2 * y_std, alpha=0.2, color='orange') axes[i].grid() axes[i].legend() title = 'main_effects' plt.title(title) figpath = title + '.png' figpath = os.path.join(directory_path1, figpath) plt.savefig(figpath) plt.close(fig) else: plot_rows = math.ceil(self.n_inputs / 6) fig, axes = plt.subplots(nrows=plot_rows, ncols=6, sharey=True, figsize=(20, 15)) for i in range(n_vars): row_idx = i // 6 col_idx = i % 6 key = self.labels[i] x = main[key]['inputs'] y = main[key]['output_mean'] y_std = main[key]['output_std'] axes[row_idx, col_idx].plot(x, y, label=self.labels[i]) axes[row_idx, col_idx].fill_between(x, y - 2 * y_std, y + 2 * y_std, alpha=0.2, color='orange') axes[row_idx, col_idx].grid() axes[row_idx, col_idx].legend() title = 'main_effects' plt.title(title) figpath = title + '.png' figpath = os.path.join(directory_path1, figpath) plt.savefig(figpath) plt.close(fig) #--------------------------------------------------------------------- # Interaction effect print("Starting interaction computations.") selected_pairs = [] for i in range(n_vars - 1): for j in range(i + 1, n_vars): selected_pairs.append([i, j]) selected_pairs = np.array(selected_pairs) n_pairs = len(selected_pairs) if n_pairs <= batch_size: y_int = sensitivity.mainInteraction(self.model, used_Rangenorm, selected_pairs, nx_samples, hyperpar_samples, grid_points, par_samples, type) else: y_int = {} n_batches = n_pairs // batch_size pairs_groups = np.array_split(selected_pairs, n_batches, axis=0) completed = 0 for group in pairs_groups: y_group = sensitivity.mainInteraction( self.model, used_Rangenorm, group, nx_samples, hyperpar_samples, grid_points, par_samples, type) completed += len(group) progress = 100.0 * completed / n_pairs print( "Interaction effect computation: {:.2f}% complete".format( progress)) y_int.update(y_group) z_intmean = np.zeros((n_pairs, grid_points, grid_points)) z_intstd = np.zeros((n_pairs, grid_points, grid_points)) for k in range(n_pairs): key = tuple(selected_pairs[k]) y_slice = np.reshape(y_int[key], (grid_points, grid_points, 2)) j1, j2 = selected_pairs[k] key1 = tuple([j1]) key2 = tuple([j2]) v1 = y_main[key1][:, 0] v2 = y_main[key2][:, 0] p1, p2 = np.meshgrid(v1, v2) w1 = np.sqrt(y_main[key1][:, 1]) w2 = np.sqrt(y_main[key2][:, 1]) q1, q2 = np.meshgrid(w1, w2) z_intmean[k, :, :] = y_slice[:, :, 0] - p1 - p2 + ybase[0][0, 0] upper_app = np.sqrt(y_slice[:, :, 1]) + q1 + q2 + np.sqrt( ybase[0][0, 1]) lower_app = np.abs( np.sqrt(y_slice[:, :, 1]) - q1 - q2 + np.sqrt(ybase[0][0, 1])) z_intstd[k, :, :] = (upper_app + lower_app) / 2.0 # Converting to the proper scale and storing interaction = {} for k in range(n_pairs): item = selected_pairs[k] j1, j2 = item x = np.linspace(used_Range[j1, 0], used_Range[j1, 1], grid_points) y = np.linspace(used_Range[j2, 0], used_Range[j2, 1], grid_points) Z = z_intmean[k, :, :] * std_y + mean_y Zstd = z_intstd[k, :, :] * std_y X, Y = np.meshgrid(x, y) key = used_labels[j1] + '_&_' + used_labels[j2] X, Y = np.meshgrid(x, y) interaction[key] = {} interaction[key]['input1'] = X interaction[key]['input2'] = Y interaction[key]['output_mean'] = Z interaction[key]['output_std'] = Zstd if create_plot: print('Generating interaction surfaces plots.') # Bounds for the interaction surface plot zmin = np.min(z_intmean) * std_y + mean_y zmax = np.max(z_intmean) * std_y + mean_y minn = np.min(z_intstd) * std_y maxx = np.max(z_intstd) * std_y for k in range(n_pairs): item = selected_pairs[k] j1, j2 = item key = used_labels[j1] + '_&_' + used_labels[j2] X = interaction[key]['input1'] Y = interaction[key]['input2'] Z = interaction[key]['output_mean'] Zstd = interaction[key]['output_std'] fig = plt.figure(figsize=(20, 10)) norm = mpl.colors.Normalize(minn, maxx) m = plt.cm.ScalarMappable(norm=norm, cmap='jet') m.set_array(Zstd) m.set_clim(minn, maxx) color_dimension = Zstd fcolors = m.to_rgba(color_dimension) ax = fig.gca(projection='3d') ax.plot_surface(Y, X, Z, rstride=1, cstride=1, facecolors=fcolors, shade=False) title = key ax.set_title(title) ax.set_xlabel(used_labels[j2]) ax.set_ylabel(used_labels[j1]) ax.set_zlim(zmin, zmax) plt.gca().invert_xaxis() plt.colorbar(m) figpath = title + '.png' figpath = os.path.join(directory_path2, figpath) plt.savefig(figpath) plt.close(fig) return main, interaction
def group_sobol_indices(self, max_order=2, partition=None, S=None, nx_samples=None, directory_path=None, create_plot=True, batch_size=10): # Computes group sobol indices and generate bar plot. # Inputs: # partition := list fo lists specifying the partition of the variables # S := dictionary containing previously computed Sobol indices. The computation of # Sobol indices is a recursive computation # max_order := maximum order of sobol indices to compute # nx_samples = the number of sample points for the Monte Carlo integration. Will default # to a multiple of the number of variables if not provided # directory_path := directory where to save the Sobol barplot if needed. Defaults to current directory # if not specified # create_plot := specifies if the Sobol barplot should be generated are not # Outputs: # Sobol := dictionary consistsing of two main keys: # 'mapping':= dictionary specifying the mapping between groups and variable indices # 'results':= dictionary containing the Sobol inidces values # label_mapping := dictionary specifying the mapping between group labels and variable labels if S == None: # generating the group mapping if partition == None: raise ValueError( 'specify a partition of the labels or provide previoulsy computed group Sobol indices dictionary' ) groups_map = sensitivity.create_groups(partition, self.labels) else: groups_map = S['mapping'] n_group = len(groups_map) if max_order > n_group: raise Exception( 'max_order cannot be greater than the number of groups') if n_group == 1: print('Not enough groups to perform sensitivity analysis.') return if len(self.hyperpar_samples) == 0: raise Exception( 'Hyperparameter samples must be generated or retrieved first.') if directory_path == None: directory_path = os.getcwd() if not (os.path.isdir(directory_path)): raise Exception('Invalid directory path ', directory_path) loc_samples = self.hyperpar_samples['gp_constant_mean_function'] varm_samples = self.hyperpar_samples['kernel_variance'] beta_samples = self.hyperpar_samples['kernel_inverse_lengthscales'] hyperpar_samples = [loc_samples, varm_samples, beta_samples] if nx_samples == None: nx_samples = 300 * self.n_inputs selected_groups = [i for i in range(n_group)] initial_list = sensitivity.powerset(selected_groups, 1, max_order) subsets_of_groups = [] if S != None: print('Initial number of Sobol computations: ', len(initial_list)) try: for item in initial_list: l = sensitivity.generate_group_label(item) if not (l in S['results'].keys()): subsets_of_groups.append(item) print('New number of Sobol computations: ', len(subsets_of_groups)) except Exception as e: traceback.print_exc() print('Invalid Sobol indices dictionary') else: subsets_of_groups = initial_list variable_subsets = [] for entry in subsets_of_groups: variable_subsets.append( sensitivity.get_variable_indices_list(entry, groups_map)) n_subset = len(subsets_of_groups) if n_subset > 0: ybase = sensitivity.allEffect(self.model, self.Rangenorm, nx_samples, hyperpar_samples) ey_square = sensitivity.direct_samples(self.model, self.Rangenorm, nx_samples, hyperpar_samples) if n_subset <= batch_size: y_higher_order = sensitivity.mainHigherOrder( self.model, self.Rangenorm, variable_subsets, nx_samples, hyperpar_samples) else: y_higher_order = {} completed = 0 n_batches = math.ceil(n_subset / batch_size) for i in range(n_batches): batch = variable_subsets[i * batch_size:(i + 1) * batch_size] y_batch = sensitivity.mainHigherOrder( self.model, self.Rangenorm, batch, nx_samples, hyperpar_samples) completed += len(batch) progress = 100.0 * completed / n_subset print("Sobol indices computation: {:.2f}% complete".format( progress)) y_higher_order.update(y_batch) e1 = np.mean(ey_square[:, 1] + np.square(ey_square[:, 0])) e2 = ybase[0][0, 1] + np.square(ybase[0][0, 0]) y_higher_order_group = {} for entry in subsets_of_groups: subset = sensitivity.get_variable_indices_list( entry, groups_map) k_group = tuple(entry) k_subset = tuple(subset) y_higher_order_group[k_group] = y_higher_order[k_subset] quotient_variances = {} for entry in subsets_of_groups: k_group = tuple(entry) quotient_variances[k_group] = np.mean( y_higher_order_group[k_group][:, 1] + np.square(y_higher_order_group[k_group][:, 0])) quotient_variances[k_group] = (quotient_variances[k_group] - e2) / (e1 - e2) if S != None: Sobol = S else: Sobol = {} Sobol['mapping'] = groups_map Sobol['results'] = {} for i in range(n_subset): key = tuple(subsets_of_groups[i]) sensitivity.compute_group_Sobol(Sobol['results'], quotient_variances, key) all_labels = list(Sobol['results'].keys()) si_all = list(Sobol['results'].values()) # plotting si_all = np.array(si_all) order = np.argsort(-si_all) n_selected = min(40, len(si_all)) selected = order[:n_selected] # taking the top 40 values to plot y_pos = np.arange(n_selected) if create_plot: print('Generating group Sobol indices barplot.') plt.figure(figsize=(12, 12)) # Create bars plt.barh(y_pos, si_all[selected]) new_labels = [all_labels[selected[i]] for i in range(n_selected)] title = 'top_group_sobol_indices' plt.title(title) # Create names on the x-axis plt.yticks(y_pos, new_labels) figpath = title + '.png' figpath = os.path.join(directory_path, figpath) plt.savefig(figpath) plt.close() # generate label_mapping label_mapping = {} for k in groups_map: label_mapping[k] = [self.labels[i] for i in groups_map[k]] return Sobol, label_mapping