Esempio n. 1
0
    def total_sobol_indices(self,
                            nx_samples=None,
                            directory_path=None,
                            create_plot=True,
                            batch_size=10):
        # Computes total sobol indices and generate bar plot.
        # Inputs:
        #   nx_samples = the number of sample points for the Monte Carlo integration. Will default
        #           to a multiple of the number of variables if not provided
        #  directory_path :=  directory where to save the Sobol barplot if needed. Defaults to current directory
        #       if not specified
        #    create_plot := specifies if the total Sobol barplot should be generated are not
        # Outputs:
        #   Sobol_total := dictionary containining containing the total Sobol indices values

        if self.n_inputs == 1:
            print('Not enough variables to perform sensitivity analysis.')
            return
        if len(self.hyperpar_samples) == 0:
            raise Exception(
                'Hyperparameter samples must be generated or retrieved first.')
        if directory_path == None:
            directory_path = os.getcwd()
        if not (os.path.isdir(directory_path)):
            raise Exception('Invalid directory path ', directory_path)

        loc_samples = self.hyperpar_samples['gp_constant_mean_function']
        varm_samples = self.hyperpar_samples['kernel_variance']
        beta_samples = self.hyperpar_samples['kernel_inverse_lengthscales']
        varc_samples = self.hyperpar_samples['common_kernel_variance']
        hyperpar_samples = [
            loc_samples, varm_samples, beta_samples, varc_samples
        ]
        if nx_samples == None:
            nx_samples = 300 * self.n_inputs
        selected_vars = [i for i in range(self.n_inputs)]
        M = self.n_tasks
        D = self.n_inputs

        ybase = sensitivity.allEffect(self.model, self.Rangenorm, nx_samples,
                                      hyperpar_samples)
        ey_square = sensitivity.direct_samples(self.model, self.Rangenorm,
                                               nx_samples, hyperpar_samples)
        ey_square = np.reshape(ey_square, (M, nx_samples, 2))
        if D <= 6:
            y_remaining = sensitivity.compute_remaining_effect(
                self.model, self.Rangenorm, selected_vars, nx_samples,
                hyperpar_samples)
        else:
            y_remaining = {}
            n_batches = D // batch_size
            vars_groups = np.array_split(selected_vars, n_batches)
            completed = 0
            for group in vars_groups:
                y_group = sensitivity.compute_remaining_effect(
                    self.model, self.Rangenorm, group, nx_samples,
                    hyperpar_samples)
                completed += len(group)
                progress = 100.0 * completed / D
                print(
                    "Total Sobol indices computation: {:.2f}% complete".format(
                        progress))
                y_remaining.update(y_group)

        e1 = np.mean(ey_square[:, :, 1] + np.square(ey_square[:, :, 0]),
                     axis=1)
        e2 = ybase[0][0, :, 1] + np.square(ybase[0][0, :, 0])

        si_remaining = np.zeros((M, D))
        for i in range(D):
            key = tuple([i])
            si_remaining[:, i] = np.mean(y_remaining[key][:, :, 1] +
                                         np.square(y_remaining[key][:, :, 0]),
                                         axis=0)
            si_remaining = (si_remaining - e2[:, np.newaxis]) / (
                e1[:, np.newaxis] - e2[:, np.newaxis])
            si_remaining = np.maximum(si_remaining, 0)
        si_total = 1 - si_remaining
        si_total = np.maximum(si_total, 0)

        if create_plot:
            #  generating the plot
            n_selected = min(40, D)
            y_pos = np.arange(n_selected)
            for j in range(M):
                order = np.argsort(-si_total[j, :])
                selected = order[:
                                 n_selected]  # taking the top 40 values to plot
                plt.figure(figsize=(12, 12))
                # Create bars
                plt.barh(y_pos, si_total[j, selected])
                new_labels = [
                    self.input_labels[selected[i]] for i in range(n_selected)
                ]
                title = 'top_total_sobol_indices_for_' + self.output_labels[j]
                plt.title(title)
                # Create names on the x-axis
                plt.yticks(y_pos, new_labels)
                figpath = title + '.png'
                figpath = os.path.join(directory_path, figpath)
                plt.savefig(figpath)
                plt.close()

        Sobol_total = {}
        for i in range(self.n_inputs):
            l = self.input_labels[i]
            Sobol_total[l] = si_total[:, i]

        return Sobol_total
Esempio n. 2
0
    def total_sobol_indices(self,
                            type='simulator',
                            nx_samples=None,
                            directory_path=None,
                            create_plot=True,
                            batch_size=10):
        # Computes total sobol indices and generate bar plot.
        # Inputs:
        #   nx_samples = the number of sample points for the Monte Carlo integration. Will default
        #           to a multiple of the number of variables if not provided
        #  directory_path :=  directory where to save the Sobol barplot if needed. Defaults to current directory
        #       if not specified
        #    create_plot := specifies if the Sobol barplot should be generated are not
        # Outputs:
        #   Sobol_total := dictionary containining containing the total Sobol indices values

        if len(self.hyperpar_samples) == 0:
            raise Exception(
                'Hyperparameter samples must be generated or retrieved first.')
        if directory_path == None:
            directory_path = os.getcwd()
        if not (os.path.isdir(directory_path)):
            raise Exception('Invalid directory path ', directory_path)

        mean_sim, std_sim = self.scaling_input
        mean_y, std_y = self.scaling_output

        if type == 'simulator':
            used_labels = self.labels
            used_Range = self.Range
            used_Rangenorm = self.Rangenorm
            n_vars = self.n_inputs + self.n_pars
        elif type == 'discrepancy':
            used_labels = self.input_labels
            used_Range = self.Range[:self.n_inputs, :]
            used_Rangenorm = self.Rangenorm[:self.n_inputs, :]
            n_vars = self.n_inputs
        else:
            raise Exception('Invalid type')

        if n_vars == 1:
            print('Not enough variables to perform sensitivity analysis.')
            return

        varsim_samples = self.hyperpar_samples['sim_kernel_variance']
        vard_samples = self.hyperpar_samples['disc_kernel_variance']
        betasx_samples = self.hyperpar_samples[
            'sim_inputs_kernel_inverse_lengthscales']
        betaspar_samples = self.hyperpar_samples[
            'sim_pars_kernel_inverse_lengthscales']
        betad_samples = self.hyperpar_samples[
            'disc_kernel_inverse_lengthscales']
        loc_samples = self.hyperpar_samples['sim_gp_constant_mean_function']
        par_samples = self.par_samples

        hyperpar_samples = [
            loc_samples, varsim_samples, betaspar_samples, betasx_samples,
            betad_samples, vard_samples
        ]

        if nx_samples == None:
            nx_samples = 300 * n_vars
        selected_vars = [i for i in range(n_vars)]

        ybase = sensitivity.allEffect(self.model, used_Rangenorm, nx_samples,
                                      hyperpar_samples, par_samples, type)
        ey_square = sensitivity.direct_samples(self.model, used_Rangenorm,
                                               nx_samples, hyperpar_samples,
                                               par_samples, type)
        # y_remaining  = sensitivity.compute_remaining_effect(self.model, used_Rangenorm, selected_vars, nx_samples, hyperpar_samples, devices_list, par_samples, type)

        if n_vars <= batch_size:
            y_remaining = sensitivity.compute_remaining_effect(
                self.model, used_Rangenorm, selected_vars, nx_samples,
                hyperpar_samples, 60, par_samples, type)
        else:
            y_remaining = {}
            n_batches = n_vars // batch_size
            vars_groups = np.array_split(selected_vars, n_batches)
            completed = 0
            for group in vars_groups:
                y_group = sensitivity.compute_remaining_effect(
                    self.model, used_Rangenorm, group, nx_samples,
                    hyperpar_samples, 60, par_samples, type)
                completed += len(group)
                progress = 100.0 * completed / n_vars
                print(
                    "Total Sobol indices computation: {:.2f}% complete".format(
                        progress))
                y_remaining.update(y_group)

        e1 = np.mean(ey_square[:, 1] + np.square(ey_square[:, 0]))
        e2 = ybase[0][0, 1] + np.square(ybase[0][0, 0])
        si_remaining = np.zeros(n_vars)
        for i in range(n_vars):
            key = tuple([i])
            si_remaining[i] = np.mean(y_remaining[key][:, 1] +
                                      np.square(y_remaining[key][:, 0]))
        si_remaining = (si_remaining - e2) / (e1 - e2)
        si_remaining = np.maximum(si_remaining, 0)
        si_total = 1 - si_remaining
        si_total = np.maximum(si_total, 0)
        if create_plot:
            #  generating the plot
            order = np.argsort(-si_total)
            n_selected = min(40, len(si_total))
            selected = order[:n_selected]  # taking the top 40 values to plot
            y_pos = np.arange(n_selected)
            plt.figure(figsize=(12, 12))
            # Create bars
            plt.barh(y_pos, si_total[selected])
            new_labels = [used_labels[selected[i]] for i in range(n_selected)]
            title = 'top_total_sobol_indices'
            plt.title(title)
            # Create names on the x-axis
            plt.yticks(y_pos, new_labels)
            figpath = title + '.png'
            figpath = os.path.join(directory_path, figpath)
            plt.savefig(figpath)
            plt.close()

        Sobol_total = {}
        for i in range(n_vars):
            l = used_labels[i]
            Sobol_total[l] = si_total[i]

        return Sobol_total
Esempio n. 3
0
    def sobol_indices(self,
                      max_order=2,
                      S=None,
                      nx_samples=None,
                      directory_path=None,
                      create_plot=True,
                      batch_size=10):
        # Computes sobol indices and generate bar plot.
        # Inputs:
        #   Sobol_store := dictionary containing previously computed Sobol indices. The computation of
        #      Sobol indices is a recursive computation
        #   max_order := maximum order of sobol indices to compute
        #   nx_samples = the number of sample points for the Monte Carlo integration. Will default
        #           to a multiple of the number of variables if not provided
        #  directory_path :=  directory where to save the Sobol barplot if needed. Defaults to current directory
        #       if not specified
        #  create_plot := specifies if the Sobol barplot should be generated are not
        # Outputs:
        #       Sobol := dictionary containing the Sobol indices values
        if max_order > self.n_inputs:
            raise Exception(
                'max_order cannot be greater than the number of variables')
        if self.n_inputs == 1:
            print('Not enough variables to perform sensitivity analysis.')
            return
        if len(self.hyperpar_samples) == 0:
            raise Exception(
                'Hyperparameter samples must be generated or retrieved first.')
        if directory_path == None:
            directory_path = os.getcwd()
        if not (os.path.isdir(directory_path)):
            raise Exception('Invalid directory path ', directory_path)

        # get list of gpu devices to parallelize computation if possible

        loc_samples = self.hyperpar_samples['gp_constant_mean_function']
        varm_samples = self.hyperpar_samples['kernel_variance']
        beta_samples = self.hyperpar_samples['kernel_inverse_lengthscales']
        varc_samples = self.hyperpar_samples['common_kernel_variance']
        hyperpar_samples = [
            loc_samples, varm_samples, beta_samples, varc_samples
        ]
        if nx_samples == None:
            nx_samples = 300 * self.n_inputs
        selected_vars = [i for i in range(self.n_inputs)]

        initial_list = sensitivity.powerset(selected_vars, 1, max_order)
        subsets_list = []
        if S != None:
            print('Initial number of Sobol computations: ', len(initial_list))
            try:
                for item in initial_list:
                    l = sensitivity.generate_label(item, self.input_labels)
                    if not (l in S.keys()):
                        subsets_list.append(item)
                print('New number of Sobol computations: ', len(subsets_list))
            except Exception as e:
                traceback.print_exc()
                print('Invalid Sobol indices dictionary')
        else:
            subsets_list = initial_list

        n_subset = len(subsets_list)
        M = self.n_tasks
        if n_subset > 0:
            ybase = sensitivity.allEffect(self.model, self.Rangenorm,
                                          nx_samples, hyperpar_samples)
            ey_square = sensitivity.direct_samples(self.model, self.Rangenorm,
                                                   nx_samples,
                                                   hyperpar_samples)
            ey_square = np.reshape(ey_square, (M, nx_samples, 2))
            if n_subset <= batch_size:
                y_higher_order = sensitivity.mainHigherOrder(
                    self.model, self.Rangenorm, subsets_list, nx_samples,
                    hyperpar_samples)
            else:
                y_higher_order = {}
                completed = 0
                n_groups = math.ceil(n_subset / batch_size)
                for i in range(n_groups):
                    group = subsets_list[i * batch_size:(i + 1) * batch_size]
                    y_group = sensitivity.mainHigherOrder(
                        self.model, self.Rangenorm, group, nx_samples,
                        hyperpar_samples)
                    completed += len(group)
                    progress = 100.0 * completed / n_subset
                    print("Sobol indices computation: {:.2f}% complete".format(
                        progress))
                    y_higher_order.update(y_group)

            e1 = np.mean(ey_square[:, :, 1] + np.square(ey_square[:, :, 0]),
                         axis=1)
            e2 = ybase[0][0, :, 1] + np.square(ybase[0][0, :, 0])
            # This will store the quantities E*[Vsub]/E*(Var(Y)) where Vsub = E[Y|Xsub] and Y is normalized
            quotient_variances = {}

            for idx in range(n_subset):
                key = tuple(subsets_list[idx])
                quotient_variances[key] = np.mean(
                    y_higher_order[key][:, :, 1] +
                    np.square(y_higher_order[key][:, :, 0]),
                    axis=0)
                quotient_variances[key] = (quotient_variances[key] -
                                           e2) / (e1 - e2)
        if S != None:
            Sobol = S
        else:
            Sobol = {}
        for i in range(n_subset):
            key = tuple(subsets_list[i])
            sensitivity.compute_Sobol(Sobol, quotient_variances, key,
                                      self.input_labels)

        all_labels = list(Sobol.keys())

        # plotting
        n_selected = min(40, len(all_labels))
        y_pos = np.arange(n_selected)
        if create_plot:
            si_all = {}
            for j in range(M):
                si_all[j] = []
                for i in range(len(all_labels)):
                    key = all_labels[i]
                    si_all[j].append(Sobol[key][j])
                si_all[j] = np.array(si_all[j])
                order = np.argsort(-si_all[j])
                selected = order[:
                                 n_selected]  # taking the top 40 values to plot
                plt.figure(figsize=(12, 12))
                # Create bars
                plt.barh(y_pos, si_all[j][selected])
                new_labels = [
                    all_labels[selected[i]] for i in range(n_selected)
                ]
                title = 'top_sobol_indices_for_' + self.output_labels[j]
                plt.title(title)
                # Create names on the x-axis
                plt.yticks(y_pos, new_labels)
                figpath = title + '.png'
                figpath = os.path.join(directory_path, figpath)
                plt.savefig(figpath)
                plt.close()

        return Sobol
Esempio n. 4
0
    def group_sobol_indices(self,
                            max_order=2,
                            partition=None,
                            S=None,
                            nx_samples=None,
                            directory_path=None,
                            create_plot=True,
                            batch_size=10):
        # Computes group sobol indices and generate bar plot.
        # Inputs:
        #   partition := list fo lists specifying the partition of the variables
        #   S := dictionary containing previously computed Sobol indices. The computation of
        #      Sobol indices is a recursive computation
        #   max_order := maximum order of sobol indices to compute
        #   nx_samples = the number of sample points for the Monte Carlo integration. Will default
        #           to a multiple of the number of variables if not provided
        #  directory_path :=  directory where to save the Sobol barplot if needed. Defaults to current directory
        #       if not specified
        #  create_plot := specifies if the Sobol barplot should be generated are not
        # Outputs:
        #       Sobol := dictionary consistsing of two main keys:
        #               'mapping':= dictionary specifying the mapping between groups and variable indices
        #               'results':= dictionary containing the Sobol inidces values
        #       label_mapping := dictionary specifying the mapping between group labels and variable labels

        if S == None:
            # generating the group mapping
            if partition == None:
                raise ValueError(
                    'specify a partition of the labels or provide previoulsy computed group Sobol indices dictionary'
                )
            groups_map = sensitivity.create_groups(partition, self.labels)
        else:
            groups_map = S['mapping']
        n_group = len(groups_map)

        if max_order > n_group:
            raise Exception(
                'max_order cannot be greater than the number of groups')
        if n_group == 1:
            print('Not enough groups to perform sensitivity analysis.')
            return
        if len(self.hyperpar_samples) == 0:
            raise Exception(
                'Hyperparameter samples must be generated or retrieved first.')
        if directory_path == None:
            directory_path = os.getcwd()
        if not (os.path.isdir(directory_path)):
            raise Exception('Invalid directory path ', directory_path)

        loc_samples = self.hyperpar_samples['gp_constant_mean_function']
        varm_samples = self.hyperpar_samples['kernel_variance']
        beta_samples = self.hyperpar_samples['kernel_inverse_lengthscales']
        hyperpar_samples = [loc_samples, varm_samples, beta_samples]
        if nx_samples == None:
            nx_samples = 300 * self.n_inputs
        selected_groups = [i for i in range(n_group)]

        initial_list = sensitivity.powerset(selected_groups, 1, max_order)
        subsets_of_groups = []
        if S != None:
            print('Initial number of Sobol computations: ', len(initial_list))
            try:
                for item in initial_list:
                    l = sensitivity.generate_group_label(item)
                    if not (l in S['results'].keys()):
                        subsets_of_groups.append(item)
                print('New number of Sobol computations: ',
                      len(subsets_of_groups))
            except Exception as e:
                traceback.print_exc()
                print('Invalid Sobol indices dictionary')
        else:
            subsets_of_groups = initial_list
        variable_subsets = []
        for entry in subsets_of_groups:
            variable_subsets.append(
                sensitivity.get_variable_indices_list(entry, groups_map))

        n_subset = len(subsets_of_groups)
        if n_subset > 0:
            ybase = sensitivity.allEffect(self.model, self.Rangenorm,
                                          nx_samples, hyperpar_samples)
            ey_square = sensitivity.direct_samples(self.model, self.Rangenorm,
                                                   nx_samples,
                                                   hyperpar_samples)
            if n_subset <= batch_size:
                y_higher_order = sensitivity.mainHigherOrder(
                    self.model, self.Rangenorm, variable_subsets, nx_samples,
                    hyperpar_samples)
            else:
                y_higher_order = {}
                completed = 0
                n_batches = math.ceil(n_subset / batch_size)
                for i in range(n_batches):
                    batch = variable_subsets[i * batch_size:(i + 1) *
                                             batch_size]
                    y_batch = sensitivity.mainHigherOrder(
                        self.model, self.Rangenorm, batch, nx_samples,
                        hyperpar_samples)
                    completed += len(batch)
                    progress = 100.0 * completed / n_subset
                    print("Sobol indices computation: {:.2f}% complete".format(
                        progress))
                    y_higher_order.update(y_batch)

            e1 = np.mean(ey_square[:, 1] + np.square(ey_square[:, 0]))
            e2 = ybase[0][0, 1] + np.square(ybase[0][0, 0])
            y_higher_order_group = {}
            for entry in subsets_of_groups:
                subset = sensitivity.get_variable_indices_list(
                    entry, groups_map)
                k_group = tuple(entry)
                k_subset = tuple(subset)
                y_higher_order_group[k_group] = y_higher_order[k_subset]
            quotient_variances = {}
            for entry in subsets_of_groups:
                k_group = tuple(entry)
                quotient_variances[k_group] = np.mean(
                    y_higher_order_group[k_group][:, 1] +
                    np.square(y_higher_order_group[k_group][:, 0]))
                quotient_variances[k_group] = (quotient_variances[k_group] -
                                               e2) / (e1 - e2)

        if S != None:
            Sobol = S
        else:
            Sobol = {}
            Sobol['mapping'] = groups_map
            Sobol['results'] = {}
        for i in range(n_subset):
            key = tuple(subsets_of_groups[i])
            sensitivity.compute_group_Sobol(Sobol['results'],
                                            quotient_variances, key)

        all_labels = list(Sobol['results'].keys())
        si_all = list(Sobol['results'].values())

        # plotting
        si_all = np.array(si_all)
        order = np.argsort(-si_all)
        n_selected = min(40, len(si_all))
        selected = order[:n_selected]  # taking the top 40 values to plot
        y_pos = np.arange(n_selected)
        if create_plot:
            print('Generating group Sobol indices barplot.')
            plt.figure(figsize=(12, 12))
            # Create bars
            plt.barh(y_pos, si_all[selected])
            new_labels = [all_labels[selected[i]] for i in range(n_selected)]
            title = 'top_group_sobol_indices'
            plt.title(title)
            # Create names on the x-axis
            plt.yticks(y_pos, new_labels)
            figpath = title + '.png'
            figpath = os.path.join(directory_path, figpath)
            plt.savefig(figpath)
            plt.close()
        # generate label_mapping
        label_mapping = {}
        for k in groups_map:
            label_mapping[k] = [self.labels[i] for i in groups_map[k]]

        return Sobol, label_mapping