Exemple #1
0
    def total_sobol_indices(self,
                            nx_samples=None,
                            directory_path=None,
                            create_plot=True,
                            batch_size=10):
        # Computes total sobol indices and generate bar plot.
        # Inputs:
        #   nx_samples = the number of sample points for the Monte Carlo integration. Will default
        #           to a multiple of the number of variables if not provided
        #  directory_path :=  directory where to save the Sobol barplot if needed. Defaults to current directory
        #       if not specified
        #    create_plot := specifies if the total Sobol barplot should be generated are not
        # Outputs:
        #   Sobol_total := dictionary containining containing the total Sobol indices values

        if self.n_inputs == 1:
            print('Not enough variables to perform sensitivity analysis.')
            return
        if len(self.hyperpar_samples) == 0:
            raise Exception(
                'Hyperparameter samples must be generated or retrieved first.')
        if directory_path == None:
            directory_path = os.getcwd()
        if not (os.path.isdir(directory_path)):
            raise Exception('Invalid directory path ', directory_path)

        loc_samples = self.hyperpar_samples['gp_constant_mean_function']
        varm_samples = self.hyperpar_samples['kernel_variance']
        beta_samples = self.hyperpar_samples['kernel_inverse_lengthscales']
        varc_samples = self.hyperpar_samples['common_kernel_variance']
        hyperpar_samples = [
            loc_samples, varm_samples, beta_samples, varc_samples
        ]
        if nx_samples == None:
            nx_samples = 300 * self.n_inputs
        selected_vars = [i for i in range(self.n_inputs)]
        M = self.n_tasks
        D = self.n_inputs

        ybase = sensitivity.allEffect(self.model, self.Rangenorm, nx_samples,
                                      hyperpar_samples)
        ey_square = sensitivity.direct_samples(self.model, self.Rangenorm,
                                               nx_samples, hyperpar_samples)
        ey_square = np.reshape(ey_square, (M, nx_samples, 2))
        if D <= 6:
            y_remaining = sensitivity.compute_remaining_effect(
                self.model, self.Rangenorm, selected_vars, nx_samples,
                hyperpar_samples)
        else:
            y_remaining = {}
            n_batches = D // batch_size
            vars_groups = np.array_split(selected_vars, n_batches)
            completed = 0
            for group in vars_groups:
                y_group = sensitivity.compute_remaining_effect(
                    self.model, self.Rangenorm, group, nx_samples,
                    hyperpar_samples)
                completed += len(group)
                progress = 100.0 * completed / D
                print(
                    "Total Sobol indices computation: {:.2f}% complete".format(
                        progress))
                y_remaining.update(y_group)

        e1 = np.mean(ey_square[:, :, 1] + np.square(ey_square[:, :, 0]),
                     axis=1)
        e2 = ybase[0][0, :, 1] + np.square(ybase[0][0, :, 0])

        si_remaining = np.zeros((M, D))
        for i in range(D):
            key = tuple([i])
            si_remaining[:, i] = np.mean(y_remaining[key][:, :, 1] +
                                         np.square(y_remaining[key][:, :, 0]),
                                         axis=0)
            si_remaining = (si_remaining - e2[:, np.newaxis]) / (
                e1[:, np.newaxis] - e2[:, np.newaxis])
            si_remaining = np.maximum(si_remaining, 0)
        si_total = 1 - si_remaining
        si_total = np.maximum(si_total, 0)

        if create_plot:
            #  generating the plot
            n_selected = min(40, D)
            y_pos = np.arange(n_selected)
            for j in range(M):
                order = np.argsort(-si_total[j, :])
                selected = order[:
                                 n_selected]  # taking the top 40 values to plot
                plt.figure(figsize=(12, 12))
                # Create bars
                plt.barh(y_pos, si_total[j, selected])
                new_labels = [
                    self.input_labels[selected[i]] for i in range(n_selected)
                ]
                title = 'top_total_sobol_indices_for_' + self.output_labels[j]
                plt.title(title)
                # Create names on the x-axis
                plt.yticks(y_pos, new_labels)
                figpath = title + '.png'
                figpath = os.path.join(directory_path, figpath)
                plt.savefig(figpath)
                plt.close()

        Sobol_total = {}
        for i in range(self.n_inputs):
            l = self.input_labels[i]
            Sobol_total[l] = si_total[:, i]

        return Sobol_total
Exemple #2
0
    def sobol_indices(self,
                      max_order=2,
                      S=None,
                      nx_samples=None,
                      directory_path=None,
                      create_plot=True,
                      batch_size=10):
        # Computes sobol indices and generate bar plot.
        # Inputs:
        #   Sobol_store := dictionary containing previously computed Sobol indices. The computation of
        #      Sobol indices is a recursive computation
        #   max_order := maximum order of sobol indices to compute
        #   nx_samples = the number of sample points for the Monte Carlo integration. Will default
        #           to a multiple of the number of variables if not provided
        #  directory_path :=  directory where to save the Sobol barplot if needed. Defaults to current directory
        #       if not specified
        #  create_plot := specifies if the Sobol barplot should be generated are not
        # Outputs:
        #       Sobol := dictionary containing the Sobol indices values
        if max_order > self.n_inputs:
            raise Exception(
                'max_order cannot be greater than the number of variables')
        if self.n_inputs == 1:
            print('Not enough variables to perform sensitivity analysis.')
            return
        if len(self.hyperpar_samples) == 0:
            raise Exception(
                'Hyperparameter samples must be generated or retrieved first.')
        if directory_path == None:
            directory_path = os.getcwd()
        if not (os.path.isdir(directory_path)):
            raise Exception('Invalid directory path ', directory_path)

        # get list of gpu devices to parallelize computation if possible

        loc_samples = self.hyperpar_samples['gp_constant_mean_function']
        varm_samples = self.hyperpar_samples['kernel_variance']
        beta_samples = self.hyperpar_samples['kernel_inverse_lengthscales']
        varc_samples = self.hyperpar_samples['common_kernel_variance']
        hyperpar_samples = [
            loc_samples, varm_samples, beta_samples, varc_samples
        ]
        if nx_samples == None:
            nx_samples = 300 * self.n_inputs
        selected_vars = [i for i in range(self.n_inputs)]

        initial_list = sensitivity.powerset(selected_vars, 1, max_order)
        subsets_list = []
        if S != None:
            print('Initial number of Sobol computations: ', len(initial_list))
            try:
                for item in initial_list:
                    l = sensitivity.generate_label(item, self.input_labels)
                    if not (l in S.keys()):
                        subsets_list.append(item)
                print('New number of Sobol computations: ', len(subsets_list))
            except Exception as e:
                traceback.print_exc()
                print('Invalid Sobol indices dictionary')
        else:
            subsets_list = initial_list

        n_subset = len(subsets_list)
        M = self.n_tasks
        if n_subset > 0:
            ybase = sensitivity.allEffect(self.model, self.Rangenorm,
                                          nx_samples, hyperpar_samples)
            ey_square = sensitivity.direct_samples(self.model, self.Rangenorm,
                                                   nx_samples,
                                                   hyperpar_samples)
            ey_square = np.reshape(ey_square, (M, nx_samples, 2))
            if n_subset <= batch_size:
                y_higher_order = sensitivity.mainHigherOrder(
                    self.model, self.Rangenorm, subsets_list, nx_samples,
                    hyperpar_samples)
            else:
                y_higher_order = {}
                completed = 0
                n_groups = math.ceil(n_subset / batch_size)
                for i in range(n_groups):
                    group = subsets_list[i * batch_size:(i + 1) * batch_size]
                    y_group = sensitivity.mainHigherOrder(
                        self.model, self.Rangenorm, group, nx_samples,
                        hyperpar_samples)
                    completed += len(group)
                    progress = 100.0 * completed / n_subset
                    print("Sobol indices computation: {:.2f}% complete".format(
                        progress))
                    y_higher_order.update(y_group)

            e1 = np.mean(ey_square[:, :, 1] + np.square(ey_square[:, :, 0]),
                         axis=1)
            e2 = ybase[0][0, :, 1] + np.square(ybase[0][0, :, 0])
            # This will store the quantities E*[Vsub]/E*(Var(Y)) where Vsub = E[Y|Xsub] and Y is normalized
            quotient_variances = {}

            for idx in range(n_subset):
                key = tuple(subsets_list[idx])
                quotient_variances[key] = np.mean(
                    y_higher_order[key][:, :, 1] +
                    np.square(y_higher_order[key][:, :, 0]),
                    axis=0)
                quotient_variances[key] = (quotient_variances[key] -
                                           e2) / (e1 - e2)
        if S != None:
            Sobol = S
        else:
            Sobol = {}
        for i in range(n_subset):
            key = tuple(subsets_list[i])
            sensitivity.compute_Sobol(Sobol, quotient_variances, key,
                                      self.input_labels)

        all_labels = list(Sobol.keys())

        # plotting
        n_selected = min(40, len(all_labels))
        y_pos = np.arange(n_selected)
        if create_plot:
            si_all = {}
            for j in range(M):
                si_all[j] = []
                for i in range(len(all_labels)):
                    key = all_labels[i]
                    si_all[j].append(Sobol[key][j])
                si_all[j] = np.array(si_all[j])
                order = np.argsort(-si_all[j])
                selected = order[:
                                 n_selected]  # taking the top 40 values to plot
                plt.figure(figsize=(12, 12))
                # Create bars
                plt.barh(y_pos, si_all[j][selected])
                new_labels = [
                    all_labels[selected[i]] for i in range(n_selected)
                ]
                title = 'top_sobol_indices_for_' + self.output_labels[j]
                plt.title(title)
                # Create names on the x-axis
                plt.yticks(y_pos, new_labels)
                figpath = title + '.png'
                figpath = os.path.join(directory_path, figpath)
                plt.savefig(figpath)
                plt.close()

        return Sobol
Exemple #3
0
    def maineffect_and_interaction(self,
                                   grid_points=30,
                                   nx_samples=None,
                                   directory_path1=None,
                                   directory_path2=None,
                                   create_plot=True,
                                   batch_size=10):
        # Computes  and generate main_effect function plots
        # Inputs:
        #   grid_points:= the number of grid poinst for the plots
        #   nx_samples = the number of sample points for the Monte Carlo integration. Will default
        #       to a multiple of the number of variables if not provided
        #   directory_path1 :=  directory where to save the main effect plots if needed. Defaults to current directory
        #       if not specified
        #   directory_path2 :=  directory where to save the interaction surface plots if needed. Defaults to current directory
        #       if not specified
        #  create_plot := specifies if the plost should be generated are not
        # Outputs:
        #      main, interaction: = dictionaries containing values for the mean and interaction functions

        # Main effect
        if self.n_inputs == 1:
            print('Not enough variables to perform sensitivity analysis.')
            return
        if len(self.hyperpar_samples) == 0:
            raise Exception(
                'Hyperparameter samples must be generated or retrieved first.')
        if directory_path1 == None:
            directory_path1 = os.getcwd()
        if not (os.path.isdir(directory_path1)):
            raise Exception('Invalid directory path ', directory_path1)
        if directory_path2 == None:
            directory_path2 = os.getcwd()
        if not (os.path.isdir(directory_path2)):
            raise Exception('Invalid directory path ', directory_path2)

        # get list of gpu devices to parallelize computation if possible
        M = self.n_tasks
        D = self.n_inputs
        mean_y, std_y = self.scaling_output
        loc_samples = self.hyperpar_samples['gp_constant_mean_function']
        varm_samples = self.hyperpar_samples['kernel_variance']
        beta_samples = self.hyperpar_samples['kernel_inverse_lengthscales']
        varc_samples = self.hyperpar_samples['common_kernel_variance']
        hyperpar_samples = [
            loc_samples, varm_samples, beta_samples, varc_samples
        ]
        if nx_samples == None:
            nx_samples = 400 * D
        selected_vars = [i for i in range(D)]
        if D <= 6:
            y_main = sensitivity.mainEffect(self.model, self.Rangenorm,
                                            selected_vars, nx_samples,
                                            hyperpar_samples, grid_points)
        else:
            y_main = {}
            n_batches = D // batch_size
            vars_groups = np.array_split(selected_vars, n_batches)
            completed = 0
            for group in vars_groups:
                y_group = sensitivity.mainEffect(self.model, self.Rangenorm,
                                                 group, nx_samples,
                                                 hyperpar_samples, grid_points)
                completed += len(group)
                progress = 100.0 * completed / self.n_inputs
                print("Main effect computation: {:.2f}% complete".format(
                    progress))
                y_main.update(y_group)

        ybase = sensitivity.allEffect(self.model, self.Rangenorm, nx_samples,
                                      hyperpar_samples)
        z_mean = np.zeros((M, D, grid_points))
        z_std = np.zeros((M, D, grid_points))
        for i in range(D):
            for j in range(M):
                key = tuple([i])
                z_mean[j, i, :] = y_main[key][:, j, 0] - ybase[0][0, j, 0]
                # The next 3 lines give an approximation of the standard deviation of the normalized main effect function E[Y|xi]
                lower_app = np.sqrt(
                    np.abs(
                        np.sqrt(y_main[key][:, j, 1]) -
                        np.sqrt(ybase[0][0, j, 1])))
                upper_app = np.sqrt(y_main[key][:, j, 1]) + np.sqrt(
                    ybase[0][0, j, 1])
                z_std[j, i, :] = (lower_app + upper_app) / 2.0

        # Converting to the proper scale and plotting
        main = {}
        for i in range(D):
            for j in range(M):
                y = z_mean[j, i, :] * std_y[0, j] + mean_y[j]
                y_std = z_std[j, i, :] * std_y[0, j]
                x = np.linspace(self.Range[i, 0], self.Range[i, 1],
                                grid_points)
                key = self.output_labels[j] + '_vs_' + self.input_labels[i]
                main[key] = {}
                main[key]['inputs'] = x
                main[key]['output_mean'] = y
                main[key]['output_std'] = y_std
        if create_plot:
            fig, axes = plt.subplots(nrows=M,
                                     ncols=D,
                                     sharex='col',
                                     sharey='row',
                                     figsize=(15, 15))
            for i in range(D):
                for j in range(M):
                    key = self.output_labels[j] + '_vs_' + self.input_labels[i]
                    x = main[key]['inputs']
                    y = main[key]['output_mean']
                    y_std = main[key]['output_std']
                    axes[j, i].plot(x, y, label=self.input_labels[i])
                    axes[j, i].fill_between(x,
                                            y - 2 * y_std,
                                            y + 2 * y_std,
                                            alpha=0.2,
                                            color='orange')
                    axes[j, i].grid()
                    axes[j, i].legend()
            title = 'main_effects'
            plt.title(title)
            figpath = title + '.png'
            figpath = os.path.join(directory_path1, figpath)
            plt.savefig(figpath)
            plt.close(fig)

        #---------------------------------------------------------------------
        # Interaction effect
        selected_pairs = []
        for i in range(D - 1):
            for j in range(i + 1, D):
                selected_pairs.append([i, j])
        selected_pairs = np.array(selected_pairs)
        n_pairs = len(selected_pairs)
        if n_pairs <= batch_size:
            y_int = sensitivity.mainInteraction(self.model, self.Rangenorm,
                                                selected_pairs, nx_samples,
                                                hyperpar_samples, grid_points)
        else:
            y_int = {}
            n_batches = n_pairs // batch_size
            pairs_groups = np.array_split(selected_pairs, n_batches, axis=0)
            completed = 0
            for group in pairs_groups:
                y_group = sensitivity.mainInteraction(self.model,
                                                      self.Rangenorm, group,
                                                      nx_samples,
                                                      hyperpar_samples,
                                                      grid_points)
                completed += len(group)
                progress = 100.0 * completed / n_pairs
                print("Main interaction computation: {:.2f}% complete".format(
                    progress))
                y_int.update(y_group)
        z_intmean = np.zeros((M, n_pairs, grid_points, grid_points))
        z_intstd = np.zeros((M, n_pairs, grid_points, grid_points))
        for k in range(n_pairs):
            key = tuple(selected_pairs[k])
            j1, j2 = selected_pairs[k]
            idx1 = selected_vars.index(j1)
            idx2 = selected_vars.index(j2)
            key1 = tuple([idx1])
            key2 = tuple([idx2])
            y_slice = np.reshape(y_int[key], (grid_points, grid_points, M, 2))
            for j in range(M):
                v1 = y_main[key1][:, j, 1]
                v2 = y_main[key2][:, j, 0]
                p1, p2 = np.meshgrid(v1, v2)
                w1 = np.sqrt(y_main[key1][:, j, 1])
                w2 = np.sqrt(y_main[key2][:, j, 1])
                q1, q2 = np.meshgrid(w1, w2)
                z_intmean[j,
                          k, :, :] = y_slice[:, :, j,
                                             0] - p1 - p2 + ybase[0][0, j, 0]
                upper_app = np.sqrt(y_slice[:, :, j, 1]) + q1 + q2 + np.sqrt(
                    ybase[0][0, j, 1])
                lower_app = np.abs(
                    np.sqrt(y_slice[:, :, j, 1]) - q1 - q2 +
                    np.sqrt(ybase[0][0, j, 1]))
                z_intstd[j, k, :, :] = (upper_app + lower_app) / 2.0

        # Converting to the proper scale and storing
        interaction = {}
        for k in range(n_pairs):
            for j in range(M):
                item = selected_pairs[k]
                j1, j2 = item
                x = np.linspace(self.Range[j1, 0], self.Range[j1, 1],
                                grid_points)
                y = np.linspace(self.Range[j2, 0], self.Range[j2, 1],
                                grid_points)
                Z = z_intmean[j, k, :, :] * std_y[0, j] + mean_y[j]
                Zstd = z_intstd[j, k, :, :] * std_y[0, j]
                key = self.output_labels[j] + '_vs_' + self.input_labels[
                    j1] + '_&_' + self.input_labels[j2]
                X, Y = np.meshgrid(x, y)
                interaction[key] = {}
                interaction[key]['input1'] = X
                interaction[key]['input2'] = Y
                interaction[key]['output_mean'] = Z
                interaction[key]['output_std'] = Zstd

        if create_plot:
            # Bounds for the interaction surface plot
            zmin = np.amin(z_intmean, axis=(1, 2, 3)) * std_y[0, :] + mean_y
            zmax = np.amax(z_intmean, axis=(1, 2, 3)) * std_y[0, :] + mean_y
            minn = np.amin(z_intstd, axis=(1, 2, 3)) * std_y[0, :]
            maxx = np.amax(z_intstd, axis=(1, 2, 3)) * std_y[0, :]

            for k in range(n_pairs):
                for j in range(M):
                    item = selected_pairs[k]
                    j1, j2 = item
                    key = self.output_labels[j] + '_vs_' + self.input_labels[
                        j1] + '_&_' + self.input_labels[j2]
                    X = interaction[key]['input1']
                    Y = interaction[key]['input2']
                    Z = interaction[key]['output_mean']
                    Zstd = interaction[key]['output_std']
                    fig = plt.figure(figsize=(20, 10))
                    norm = mpl.colors.Normalize(minn[j], maxx[j])
                    m = plt.cm.ScalarMappable(norm=norm, cmap='jet')
                    m.set_array(Zstd)
                    m.set_clim(minn[j], maxx[j])
                    color_dimension = Zstd
                    fcolors = m.to_rgba(color_dimension)
                    ax = fig.gca(projection='3d')
                    ax.plot_surface(Y,
                                    X,
                                    Z,
                                    rstride=1,
                                    cstride=1,
                                    facecolors=fcolors,
                                    shade=False)
                    title = key
                    ax.set_title(title)
                    ax.set_xlabel(self.input_labels[j2])
                    ax.set_ylabel(self.input_labels[j1])
                    ax.set_zlim(zmin[j], zmax[j])
                    plt.gca().invert_xaxis()
                    plt.colorbar(m)
                    figpath = title + '.png'
                    figpath = os.path.join(directory_path2, figpath)
                    plt.savefig(figpath)
                    plt.close(fig)

        return main, interaction
Exemple #4
0
    def total_sobol_indices(self,
                            type='simulator',
                            nx_samples=None,
                            directory_path=None,
                            create_plot=True,
                            batch_size=10):
        # Computes total sobol indices and generate bar plot.
        # Inputs:
        #   nx_samples = the number of sample points for the Monte Carlo integration. Will default
        #           to a multiple of the number of variables if not provided
        #  directory_path :=  directory where to save the Sobol barplot if needed. Defaults to current directory
        #       if not specified
        #    create_plot := specifies if the Sobol barplot should be generated are not
        # Outputs:
        #   Sobol_total := dictionary containining containing the total Sobol indices values

        if len(self.hyperpar_samples) == 0:
            raise Exception(
                'Hyperparameter samples must be generated or retrieved first.')
        if directory_path == None:
            directory_path = os.getcwd()
        if not (os.path.isdir(directory_path)):
            raise Exception('Invalid directory path ', directory_path)

        mean_sim, std_sim = self.scaling_input
        mean_y, std_y = self.scaling_output

        if type == 'simulator':
            used_labels = self.labels
            used_Range = self.Range
            used_Rangenorm = self.Rangenorm
            n_vars = self.n_inputs + self.n_pars
        elif type == 'discrepancy':
            used_labels = self.input_labels
            used_Range = self.Range[:self.n_inputs, :]
            used_Rangenorm = self.Rangenorm[:self.n_inputs, :]
            n_vars = self.n_inputs
        else:
            raise Exception('Invalid type')

        if n_vars == 1:
            print('Not enough variables to perform sensitivity analysis.')
            return

        varsim_samples = self.hyperpar_samples['sim_kernel_variance']
        vard_samples = self.hyperpar_samples['disc_kernel_variance']
        betasx_samples = self.hyperpar_samples[
            'sim_inputs_kernel_inverse_lengthscales']
        betaspar_samples = self.hyperpar_samples[
            'sim_pars_kernel_inverse_lengthscales']
        betad_samples = self.hyperpar_samples[
            'disc_kernel_inverse_lengthscales']
        loc_samples = self.hyperpar_samples['sim_gp_constant_mean_function']
        par_samples = self.par_samples

        hyperpar_samples = [
            loc_samples, varsim_samples, betaspar_samples, betasx_samples,
            betad_samples, vard_samples
        ]

        if nx_samples == None:
            nx_samples = 300 * n_vars
        selected_vars = [i for i in range(n_vars)]

        ybase = sensitivity.allEffect(self.model, used_Rangenorm, nx_samples,
                                      hyperpar_samples, par_samples, type)
        ey_square = sensitivity.direct_samples(self.model, used_Rangenorm,
                                               nx_samples, hyperpar_samples,
                                               par_samples, type)
        # y_remaining  = sensitivity.compute_remaining_effect(self.model, used_Rangenorm, selected_vars, nx_samples, hyperpar_samples, devices_list, par_samples, type)

        if n_vars <= batch_size:
            y_remaining = sensitivity.compute_remaining_effect(
                self.model, used_Rangenorm, selected_vars, nx_samples,
                hyperpar_samples, 60, par_samples, type)
        else:
            y_remaining = {}
            n_batches = n_vars // batch_size
            vars_groups = np.array_split(selected_vars, n_batches)
            completed = 0
            for group in vars_groups:
                y_group = sensitivity.compute_remaining_effect(
                    self.model, used_Rangenorm, group, nx_samples,
                    hyperpar_samples, 60, par_samples, type)
                completed += len(group)
                progress = 100.0 * completed / n_vars
                print(
                    "Total Sobol indices computation: {:.2f}% complete".format(
                        progress))
                y_remaining.update(y_group)

        e1 = np.mean(ey_square[:, 1] + np.square(ey_square[:, 0]))
        e2 = ybase[0][0, 1] + np.square(ybase[0][0, 0])
        si_remaining = np.zeros(n_vars)
        for i in range(n_vars):
            key = tuple([i])
            si_remaining[i] = np.mean(y_remaining[key][:, 1] +
                                      np.square(y_remaining[key][:, 0]))
        si_remaining = (si_remaining - e2) / (e1 - e2)
        si_remaining = np.maximum(si_remaining, 0)
        si_total = 1 - si_remaining
        si_total = np.maximum(si_total, 0)
        if create_plot:
            #  generating the plot
            order = np.argsort(-si_total)
            n_selected = min(40, len(si_total))
            selected = order[:n_selected]  # taking the top 40 values to plot
            y_pos = np.arange(n_selected)
            plt.figure(figsize=(12, 12))
            # Create bars
            plt.barh(y_pos, si_total[selected])
            new_labels = [used_labels[selected[i]] for i in range(n_selected)]
            title = 'top_total_sobol_indices'
            plt.title(title)
            # Create names on the x-axis
            plt.yticks(y_pos, new_labels)
            figpath = title + '.png'
            figpath = os.path.join(directory_path, figpath)
            plt.savefig(figpath)
            plt.close()

        Sobol_total = {}
        for i in range(n_vars):
            l = used_labels[i]
            Sobol_total[l] = si_total[i]

        return Sobol_total
Exemple #5
0
    def maineffect_and_interaction(self,
                                   type='simulator',
                                   grid_points=30,
                                   nx_samples=None,
                                   directory_path1=None,
                                   directory_path2=None,
                                   create_plot=True,
                                   batch_size=10):
        # Computes  and generate main_effect function plots
        # Inputs:
        #   type := string that specifies for which gaussian process we are performing the sensitivity analysis.
        #           allowed values = 'simulator', 'discrepancy'
        #   grid_points:= the number of grid poinst for the plots
        #   nx_samples = the number of sample points for the Monte Carlo integration. Will default
        #       to a multiple of the number of variables if not provided
        #   directory_path1 :=  directory where to save the main effect plots if needed. Defaults to current directory
        #       if not specified
        #   directory_path2 :=  directory where to save the interaction surface plots if needed. Defaults to current directory
        #       if not specified
        #  create_plot := specifies if the plost should be generated are not
        # Outputs:
        #      main, interaction: = dictionaries containing values for the mean and interaction functions

        if len(self.hyperpar_samples) == 0:
            raise Exception('Execute run_mcmc first.')
        if directory_path1 == None:
            directory_path1 = os.getcwd()
        if not (os.path.isdir(directory_path1)):
            raise Exception('Invalid directory path ', directory_path1)
        if directory_path2 == None:
            directory_path2 = os.getcwd()
        if not (os.path.isdir(directory_path2)):
            raise Exception('Invalid directory path ', directory_path2)

        mean_sim, std_sim = self.scaling_input
        mean_y, std_y = self.scaling_output

        if type == 'simulator':
            used_labels = self.labels
            used_Range = self.Range
            used_Rangenorm = self.Rangenorm
            n_vars = self.n_inputs + self.n_pars
        elif type == 'discrepancy':
            used_labels = self.input_labels
            used_Range = self.Range[:self.n_inputs, :]
            used_Rangenorm = self.Rangenorm[:self.n_inputs, :]
            n_vars = self.n_inputs
        else:
            raise Exception('Invalid type')

        if n_vars == 1:
            print('Not enough variables to perform sensitivity analysis.')
            return
        varsim_samples = self.hyperpar_samples['sim_kernel_variance']
        vard_samples = self.hyperpar_samples['disc_kernel_variance']
        betasx_samples = self.hyperpar_samples[
            'sim_inputs_kernel_inverse_lengthscales']
        betaspar_samples = self.hyperpar_samples[
            'sim_pars_kernel_inverse_lengthscales']
        betad_samples = self.hyperpar_samples[
            'disc_kernel_inverse_lengthscales']
        loc_samples = self.hyperpar_samples['sim_gp_constant_mean_function']
        par_samples = self.par_samples

        hyperpar_samples = [
            loc_samples, varsim_samples, betaspar_samples, betasx_samples,
            betad_samples, vard_samples
        ]
        # Main effect
        if nx_samples == None:
            nx_samples = 300 * n_vars
        selected_vars = [i for i in range(n_vars)]
        ybase = sensitivity.allEffect(self.model, used_Rangenorm, nx_samples,
                                      hyperpar_samples, par_samples, type)

        if n_vars <= batch_size:
            y_main = sensitivity.mainEffect(self.model, used_Rangenorm,
                                            selected_vars, nx_samples,
                                            hyperpar_samples, grid_points,
                                            par_samples, type)
        else:
            y_main = {}
            n_batches = n_vars // batch_size
            vars_groups = np.array_split(selected_vars, n_batches)
            completed = 0
            for group in var_groups:
                y_group = sensitivity.mainEffect(self.model, used_Rangenorm,
                                                 group, nx_samples,
                                                 hyperpar_samples, grid_points,
                                                 par_samples, type)
                completed += len(group)
                progress = 100.0 * completed / n_vars
                print("Main effect computation: {:.2f}% complete".format(
                    progress))
                y_main.update(y_group)

        z_mean = np.zeros((n_vars, grid_points))
        z_std = np.zeros((n_vars, grid_points))
        for i in range(n_vars):
            key = tuple([i])
            z_mean[i, :] = y_main[key][:, 0] - ybase[0][0, 0]
            # The next 3 lines give an approximation of the standard deviation of the normalized main effect function E[Y|xi] - E[Y]
            lower_app = np.sqrt(
                np.abs(np.sqrt(y_main[key][:, 1]) - np.sqrt(ybase[0][0, 1])))
            upper_app = np.sqrt(y_main[key][:, 1]) + np.sqrt(ybase[0][0, 1])
            z_std[i, :] = (lower_app + upper_app) / 2.0

        # Converting to the proper scale and storing
        main = {}
        for i in range(n_vars):
            y = z_mean[i, :] * std_y + mean_y
            y_std = z_std[i, :] * std_y
            x = np.linspace(used_Range[i, 0], used_Range[i, 1], grid_points)
            key = used_labels[i]
            main[key] = {}
            main[key]['inputs'] = x
            main[key]['output_mean'] = y
            main[key]['output_std'] = y_std
        if create_plot:
            print('Generating main effect plots.')
            if n_vars <= 6:
                fig, axes = plt.subplots(nrows=1,
                                         ncols=n_vars,
                                         sharey=True,
                                         figsize=(20, 10))
                for i in range(n_vars):
                    key = self.labels[i]
                    x = main[key]['inputs']
                    y = main[key]['output_mean']
                    y_std = main[key]['output_std']
                    axes[i].plot(x, y, label=self.labels[i])
                    axes[i].fill_between(x,
                                         y - 2 * y_std,
                                         y + 2 * y_std,
                                         alpha=0.2,
                                         color='orange')
                    axes[i].grid()
                    axes[i].legend()
                title = 'main_effects'
                plt.title(title)
                figpath = title + '.png'
                figpath = os.path.join(directory_path1, figpath)
                plt.savefig(figpath)
                plt.close(fig)
            else:
                plot_rows = math.ceil(self.n_inputs / 6)
                fig, axes = plt.subplots(nrows=plot_rows,
                                         ncols=6,
                                         sharey=True,
                                         figsize=(20, 15))
                for i in range(n_vars):
                    row_idx = i // 6
                    col_idx = i % 6
                    key = self.labels[i]
                    x = main[key]['inputs']
                    y = main[key]['output_mean']
                    y_std = main[key]['output_std']
                    axes[row_idx, col_idx].plot(x, y, label=self.labels[i])
                    axes[row_idx, col_idx].fill_between(x,
                                                        y - 2 * y_std,
                                                        y + 2 * y_std,
                                                        alpha=0.2,
                                                        color='orange')
                    axes[row_idx, col_idx].grid()
                    axes[row_idx, col_idx].legend()
                title = 'main_effects'
                plt.title(title)
                figpath = title + '.png'
                figpath = os.path.join(directory_path1, figpath)
                plt.savefig(figpath)
                plt.close(fig)

        #---------------------------------------------------------------------
        # Interaction effect
        print("Starting interaction computations.")
        selected_pairs = []
        for i in range(n_vars - 1):
            for j in range(i + 1, n_vars):
                selected_pairs.append([i, j])
        selected_pairs = np.array(selected_pairs)
        n_pairs = len(selected_pairs)
        if n_pairs <= batch_size:
            y_int = sensitivity.mainInteraction(self.model, used_Rangenorm,
                                                selected_pairs, nx_samples,
                                                hyperpar_samples, grid_points,
                                                par_samples, type)
        else:
            y_int = {}
            n_batches = n_pairs // batch_size
            pairs_groups = np.array_split(selected_pairs, n_batches, axis=0)
            completed = 0
            for group in pairs_groups:
                y_group = sensitivity.mainInteraction(
                    self.model, used_Rangenorm, group, nx_samples,
                    hyperpar_samples, grid_points, par_samples, type)
                completed += len(group)
                progress = 100.0 * completed / n_pairs
                print(
                    "Interaction effect computation: {:.2f}% complete".format(
                        progress))
                y_int.update(y_group)
        z_intmean = np.zeros((n_pairs, grid_points, grid_points))
        z_intstd = np.zeros((n_pairs, grid_points, grid_points))
        for k in range(n_pairs):
            key = tuple(selected_pairs[k])
            y_slice = np.reshape(y_int[key], (grid_points, grid_points, 2))
            j1, j2 = selected_pairs[k]
            key1 = tuple([j1])
            key2 = tuple([j2])
            v1 = y_main[key1][:, 0]
            v2 = y_main[key2][:, 0]
            p1, p2 = np.meshgrid(v1, v2)
            w1 = np.sqrt(y_main[key1][:, 1])
            w2 = np.sqrt(y_main[key2][:, 1])
            q1, q2 = np.meshgrid(w1, w2)
            z_intmean[k, :, :] = y_slice[:, :, 0] - p1 - p2 + ybase[0][0, 0]
            upper_app = np.sqrt(y_slice[:, :, 1]) + q1 + q2 + np.sqrt(
                ybase[0][0, 1])
            lower_app = np.abs(
                np.sqrt(y_slice[:, :, 1]) - q1 - q2 + np.sqrt(ybase[0][0, 1]))
            z_intstd[k, :, :] = (upper_app + lower_app) / 2.0

        # Converting to the proper scale and storing
        interaction = {}
        for k in range(n_pairs):
            item = selected_pairs[k]
            j1, j2 = item
            x = np.linspace(used_Range[j1, 0], used_Range[j1, 1], grid_points)
            y = np.linspace(used_Range[j2, 0], used_Range[j2, 1], grid_points)
            Z = z_intmean[k, :, :] * std_y + mean_y
            Zstd = z_intstd[k, :, :] * std_y
            X, Y = np.meshgrid(x, y)
            key = used_labels[j1] + '_&_' + used_labels[j2]
            X, Y = np.meshgrid(x, y)
            interaction[key] = {}
            interaction[key]['input1'] = X
            interaction[key]['input2'] = Y
            interaction[key]['output_mean'] = Z
            interaction[key]['output_std'] = Zstd

        if create_plot:
            print('Generating interaction surfaces plots.')
            # Bounds for the interaction surface plot
            zmin = np.min(z_intmean) * std_y + mean_y
            zmax = np.max(z_intmean) * std_y + mean_y
            minn = np.min(z_intstd) * std_y
            maxx = np.max(z_intstd) * std_y

            for k in range(n_pairs):
                item = selected_pairs[k]
                j1, j2 = item
                key = used_labels[j1] + '_&_' + used_labels[j2]
                X = interaction[key]['input1']
                Y = interaction[key]['input2']
                Z = interaction[key]['output_mean']
                Zstd = interaction[key]['output_std']
                fig = plt.figure(figsize=(20, 10))
                norm = mpl.colors.Normalize(minn, maxx)
                m = plt.cm.ScalarMappable(norm=norm, cmap='jet')
                m.set_array(Zstd)
                m.set_clim(minn, maxx)
                color_dimension = Zstd
                fcolors = m.to_rgba(color_dimension)
                ax = fig.gca(projection='3d')
                ax.plot_surface(Y,
                                X,
                                Z,
                                rstride=1,
                                cstride=1,
                                facecolors=fcolors,
                                shade=False)
                title = key
                ax.set_title(title)
                ax.set_xlabel(used_labels[j2])
                ax.set_ylabel(used_labels[j1])
                ax.set_zlim(zmin, zmax)
                plt.gca().invert_xaxis()
                plt.colorbar(m)
                figpath = title + '.png'
                figpath = os.path.join(directory_path2, figpath)
                plt.savefig(figpath)
                plt.close(fig)

        return main, interaction
Exemple #6
0
    def group_sobol_indices(self,
                            max_order=2,
                            partition=None,
                            S=None,
                            nx_samples=None,
                            directory_path=None,
                            create_plot=True,
                            batch_size=10):
        # Computes group sobol indices and generate bar plot.
        # Inputs:
        #   partition := list fo lists specifying the partition of the variables
        #   S := dictionary containing previously computed Sobol indices. The computation of
        #      Sobol indices is a recursive computation
        #   max_order := maximum order of sobol indices to compute
        #   nx_samples = the number of sample points for the Monte Carlo integration. Will default
        #           to a multiple of the number of variables if not provided
        #  directory_path :=  directory where to save the Sobol barplot if needed. Defaults to current directory
        #       if not specified
        #  create_plot := specifies if the Sobol barplot should be generated are not
        # Outputs:
        #       Sobol := dictionary consistsing of two main keys:
        #               'mapping':= dictionary specifying the mapping between groups and variable indices
        #               'results':= dictionary containing the Sobol inidces values
        #       label_mapping := dictionary specifying the mapping between group labels and variable labels

        if S == None:
            # generating the group mapping
            if partition == None:
                raise ValueError(
                    'specify a partition of the labels or provide previoulsy computed group Sobol indices dictionary'
                )
            groups_map = sensitivity.create_groups(partition, self.labels)
        else:
            groups_map = S['mapping']
        n_group = len(groups_map)

        if max_order > n_group:
            raise Exception(
                'max_order cannot be greater than the number of groups')
        if n_group == 1:
            print('Not enough groups to perform sensitivity analysis.')
            return
        if len(self.hyperpar_samples) == 0:
            raise Exception(
                'Hyperparameter samples must be generated or retrieved first.')
        if directory_path == None:
            directory_path = os.getcwd()
        if not (os.path.isdir(directory_path)):
            raise Exception('Invalid directory path ', directory_path)

        loc_samples = self.hyperpar_samples['gp_constant_mean_function']
        varm_samples = self.hyperpar_samples['kernel_variance']
        beta_samples = self.hyperpar_samples['kernel_inverse_lengthscales']
        hyperpar_samples = [loc_samples, varm_samples, beta_samples]
        if nx_samples == None:
            nx_samples = 300 * self.n_inputs
        selected_groups = [i for i in range(n_group)]

        initial_list = sensitivity.powerset(selected_groups, 1, max_order)
        subsets_of_groups = []
        if S != None:
            print('Initial number of Sobol computations: ', len(initial_list))
            try:
                for item in initial_list:
                    l = sensitivity.generate_group_label(item)
                    if not (l in S['results'].keys()):
                        subsets_of_groups.append(item)
                print('New number of Sobol computations: ',
                      len(subsets_of_groups))
            except Exception as e:
                traceback.print_exc()
                print('Invalid Sobol indices dictionary')
        else:
            subsets_of_groups = initial_list
        variable_subsets = []
        for entry in subsets_of_groups:
            variable_subsets.append(
                sensitivity.get_variable_indices_list(entry, groups_map))

        n_subset = len(subsets_of_groups)
        if n_subset > 0:
            ybase = sensitivity.allEffect(self.model, self.Rangenorm,
                                          nx_samples, hyperpar_samples)
            ey_square = sensitivity.direct_samples(self.model, self.Rangenorm,
                                                   nx_samples,
                                                   hyperpar_samples)
            if n_subset <= batch_size:
                y_higher_order = sensitivity.mainHigherOrder(
                    self.model, self.Rangenorm, variable_subsets, nx_samples,
                    hyperpar_samples)
            else:
                y_higher_order = {}
                completed = 0
                n_batches = math.ceil(n_subset / batch_size)
                for i in range(n_batches):
                    batch = variable_subsets[i * batch_size:(i + 1) *
                                             batch_size]
                    y_batch = sensitivity.mainHigherOrder(
                        self.model, self.Rangenorm, batch, nx_samples,
                        hyperpar_samples)
                    completed += len(batch)
                    progress = 100.0 * completed / n_subset
                    print("Sobol indices computation: {:.2f}% complete".format(
                        progress))
                    y_higher_order.update(y_batch)

            e1 = np.mean(ey_square[:, 1] + np.square(ey_square[:, 0]))
            e2 = ybase[0][0, 1] + np.square(ybase[0][0, 0])
            y_higher_order_group = {}
            for entry in subsets_of_groups:
                subset = sensitivity.get_variable_indices_list(
                    entry, groups_map)
                k_group = tuple(entry)
                k_subset = tuple(subset)
                y_higher_order_group[k_group] = y_higher_order[k_subset]
            quotient_variances = {}
            for entry in subsets_of_groups:
                k_group = tuple(entry)
                quotient_variances[k_group] = np.mean(
                    y_higher_order_group[k_group][:, 1] +
                    np.square(y_higher_order_group[k_group][:, 0]))
                quotient_variances[k_group] = (quotient_variances[k_group] -
                                               e2) / (e1 - e2)

        if S != None:
            Sobol = S
        else:
            Sobol = {}
            Sobol['mapping'] = groups_map
            Sobol['results'] = {}
        for i in range(n_subset):
            key = tuple(subsets_of_groups[i])
            sensitivity.compute_group_Sobol(Sobol['results'],
                                            quotient_variances, key)

        all_labels = list(Sobol['results'].keys())
        si_all = list(Sobol['results'].values())

        # plotting
        si_all = np.array(si_all)
        order = np.argsort(-si_all)
        n_selected = min(40, len(si_all))
        selected = order[:n_selected]  # taking the top 40 values to plot
        y_pos = np.arange(n_selected)
        if create_plot:
            print('Generating group Sobol indices barplot.')
            plt.figure(figsize=(12, 12))
            # Create bars
            plt.barh(y_pos, si_all[selected])
            new_labels = [all_labels[selected[i]] for i in range(n_selected)]
            title = 'top_group_sobol_indices'
            plt.title(title)
            # Create names on the x-axis
            plt.yticks(y_pos, new_labels)
            figpath = title + '.png'
            figpath = os.path.join(directory_path, figpath)
            plt.savefig(figpath)
            plt.close()
        # generate label_mapping
        label_mapping = {}
        for k in groups_map:
            label_mapping[k] = [self.labels[i] for i in groups_map[k]]

        return Sobol, label_mapping