Ejemplo n.º 1
0
 def test_get_cover_array_and_size(self):
     sel = ps.EqualitySelector('checking_status', b'no checking')
     _, size = ps.get_cover_array_and_size(sel, None, self.data)
     self.assertEqual(size, 394)
     _, size = ps.get_cover_array_and_size(slice(None), len(self.data),
                                           None)
     self.assertEqual(size, len(self.data))
     _, size = ps.get_cover_array_and_size(slice(0, 10), len(self.data))
     self.assertEqual(size, 10)
     _, size = ps.get_cover_array_and_size(
         np.array([1, 3, 5, 7, 11], dtype=int))
     self.assertEqual(size, 5)
Ejemplo n.º 2
0
    def calculate_statistics(self, subgroup, data, cached_statistics=None):
        if cached_statistics is None or not isinstance(cached_statistics,
                                                       dict):
            statistics = dict()
        elif all(k in cached_statistics
                 for k in PredictionTarget.statistic_types):
            return cached_statistics
        else:
            statistics = cached_statistics

        cover_arr, size_sg = ps.get_cover_array_and_size(
            subgroup, len(data), data)

        statistics['size_sg'] = size_sg
        statistics['size_dataset'] = data.shape[0]

        statistics['pos_sg'] = self.target_variable[cover_arr].sum()
        statistics['pos_dataset'] = self.target_variable.sum()
        statistics['neg_sg'] = (1 - self.target_variable[cover_arr]).sum()
        statistics['neg_dataset'] = (1 - self.target_variable).sum()

        statistics['metric_sg'] = self.evaluation_metric(
            self.target_variable[cover_arr], self.target_estimate[cover_arr])
        statistics['metric_dataset'] = self.evaluation_metric(
            self.target_variable, self.target_estimate)

        if not self.eval_dict is None:
            for key in self.eval_dict.keys():
                statistics[key + "_sg"] = self.eval_dict[key](
                    self.target_variable[cover_arr],
                    self.target_estimate[cover_arr])
                statistics[key + "_dataset"] = self.eval_dict[key](
                    self.target_variable, self.target_estimate)

        return statistics
Ejemplo n.º 3
0
    def calculate_statistics(self, subgroup, data, cached_statistics=None):
        if cached_statistics is None or not isinstance(cached_statistics, dict):
            statistics = dict()
        elif all(k in cached_statistics for k in NumericTarget.statistic_types):
            return cached_statistics
        else:
            statistics = cached_statistics

        cover_arr, _ = ps.get_cover_array_and_size(subgroup, len(data), data)
        all_target_values = data[self.target_variable].to_numpy()
        sg_target_values = all_target_values[cover_arr]

        statistics['size_sg'] = len(sg_target_values)
        statistics['size_dataset'] = len(data)
        statistics['mean_sg'] = np.mean(sg_target_values)
        statistics['mean_dataset'] = np.mean(all_target_values)
        statistics['std_sg'] = np.std(sg_target_values)
        statistics['std_dataset'] = np.std(all_target_values)
        statistics['median_sg'] = np.median(sg_target_values)
        statistics['median_dataset'] = np.median(all_target_values)
        statistics['max_sg'] = np.max(sg_target_values)
        statistics['max_dataset'] = np.max(all_target_values)
        statistics['min_sg'] = np.min(sg_target_values)
        statistics['min_dataset'] = np.min(all_target_values)
        statistics['mean_lift'] = statistics['mean_sg'] / statistics['mean_dataset']
        statistics['median_lift'] = statistics['median_sg'] / statistics['median_dataset']
        return statistics
Ejemplo n.º 4
0
    def calculate_statistics(self, subgroup, data):
        _, size = ps.get_cover_array_and_size(subgroup, len(data), data)

        statistics = {}
        statistics['size_sg'] = size
        statistics['size_dataset'] = len(data)
        return statistics
Ejemplo n.º 5
0
 def calculate_statistics(self,
                          subgroup_description,
                          target,
                          data,
                          statistics=None):
     _, size = ps.get_cover_array_and_size(subgroup_description,
                                           self.size_dataset, data)
     return SimpleCountQF.tpl(size)
Ejemplo n.º 6
0
 def get_base_statistics(self, subgroup, data):
     cover_arr, size_sg = ps.get_cover_array_and_size(subgroup)
     size_dataset = data.shape[0]
     metric_sg = self.evaluation_metric(self.target_variable[cover_arr],
                                        self.target_estimate[cover_arr])
     metric_dataset = self.evaluation_metric(self.target_variable,
                                             self.target_estimate)
     return (size_sg, size_dataset, metric_sg, metric_dataset)
Ejemplo n.º 7
0
 def fit(self, subgroup, data=None):
     cover_arr, size = ps.get_cover_array_and_size(subgroup, len(self.x),
                                                   data)
     if size <= self.degree + 1:
         return beta_tuple(np.full(self.degree + 1, np.nan), size)
     return beta_tuple(
         np.polyfit(self.x[cover_arr], self.y[cover_arr], deg=self.degree),
         size)
Ejemplo n.º 8
0
 def get_base_statistics(self, subgroup, data):
     cover_arr, size_sg = ps.get_cover_array_and_size(
         subgroup, len(data), data)
     positives = self.covers(data)
     instances_subgroup = size_sg
     positives_dataset = np.sum(positives)
     instances_dataset = len(data)
     positives_subgroup = np.sum(positives[cover_arr])
     return instances_dataset, positives_dataset, instances_subgroup, positives_subgroup
Ejemplo n.º 9
0
 def covers(self, instance):
     cover_arr, _ = ps.get_cover_array_and_size(self.subgroup_description,
                                                len(instance), instance)
     if not isinstance(cover_arr, type(np.array)):
         arr = np.zeros(len(instance), dtype=bool)
         arr[cover_arr] = True
         return arr
     else:
         return cover_arr
Ejemplo n.º 10
0
 def get_base_statistics(self, subgroup, data):
     cover_arr, size_sg = ps.get_cover_array_and_size(subgroup, len(data), data)
     all_target_values = data[self.target_variable]
     sg_target_values = all_target_values[cover_arr]
     instances_dataset = len(data)
     instances_subgroup = size_sg
     mean_sg = np.mean(sg_target_values)
     mean_dataset = np.mean(all_target_values)
     return (instances_dataset, mean_dataset, instances_subgroup, mean_sg)
Ejemplo n.º 11
0
 def calculate_statistics(self, subgroup, target, data, statistics=None):
     cover_arr, sg_size = ps.get_cover_array_and_size(subgroup, len(self.all_target_values), data)
     sg_median = np.array([0])
     sg_target_values = 0
     if sg_size > 0:
         sg_target_values = self.all_target_values[cover_arr]
         sg_median = np.median(sg_target_values)
         estimate = self.estimator.get_estimate(subgroup, sg_size, sg_median, cover_arr, sg_target_values)
     else:
         estimate = float('-inf')
     return StandardQFNumericMedian.tpl(sg_size, sg_median, estimate)
Ejemplo n.º 12
0
 def calculate_statistics(self, subgroup, target, data, statistics=None):
     cover_arr, sg_size = ps.get_cover_array_and_size(
         subgroup, len(self.all_target_variable), data)
     if sg_size > 0 and np.std(self.all_target_variable[cover_arr]) != 0:
         sg_target_variable = self.all_target_variable[cover_arr]
         sg_target_estimate = self.all_target_estimate[cover_arr]
         estimate = self.estimator.get_estimate(sg_size, self.a)
         metric_sg = target.evaluation_metric(sg_target_variable,
                                              sg_target_estimate)
     else:
         estimate = float('-inf')
         metric_sg = 0  # float('-inf')
     return PredictionQFNumeric.tpl(sg_size, metric_sg, estimate)
Ejemplo n.º 13
0
    def calculate_statistics(self,
                             subgroup_description,
                             data,
                             cached_statistics=None):
        if cached_statistics is None or not isinstance(cached_statistics,
                                                       dict):
            statistics = dict()
        elif all(k in cached_statistics for k in FITarget.statistic_types):
            return cached_statistics
        else:
            statistics = cached_statistics

        _, size = ps.get_cover_array_and_size(subgroup_description, len(data),
                                              data)

        statistics['size_sg'] = size
        statistics['size_dataset'] = len(data)
        return statistics
Ejemplo n.º 14
0
 def calculate_statistics(self, subgroup, data):
     cover_arr, _ = ps.get_cover_array_and_size(subgroup, len(data), data)
     all_target_values = data[self.target_variable].to_numpy()
     sg_target_values = all_target_values[cover_arr]
     statistics = {}
     statistics['size_sg'] = len(sg_target_values)
     statistics['size_dataset'] = len(data)
     statistics['mean_sg'] = np.mean(sg_target_values)
     statistics['mean_dataset'] = np.mean(all_target_values)
     statistics['std_sg'] = np.std(sg_target_values)
     statistics['std_dataset'] = np.std(all_target_values)
     statistics['median_sg'] = np.median(sg_target_values)
     statistics['median_dataset'] = np.median(all_target_values)
     statistics['max_sg'] = np.max(sg_target_values)
     statistics['max_dataset'] = np.max(all_target_values)
     statistics['min_sg'] = np.min(sg_target_values)
     statistics['min_dataset'] = np.min(all_target_values)
     statistics[
         'mean_lift'] = statistics['mean_sg'] / statistics['mean_dataset']
     statistics['median_lift'] = statistics['median_sg'] / statistics[
         'median_dataset']
     return statistics
Ejemplo n.º 15
0
    def calculate_statistics(self, subgroup, data=None):
        cover_arr, sg_size = ps.get_cover_array_and_size(
            subgroup, self.data_size, data)

        params = self.model.fit(cover_arr, data)
        return self.get_tuple(sg_size, params, cover_arr)
Ejemplo n.º 16
0
 def get_base_statistics(self, subgroup, data):
     _, size = ps.get_cover_array_and_size(subgroup, len(data), data)
     return size
Ejemplo n.º 17
0
    def calculate_statistics(self, subgroup, data=None):
        cover_arr, size_sg = ps.get_cover_array_and_size(
            subgroup, len(self.positives), data)

        return SimplePositivesQF.tpl(
            size_sg, np.count_nonzero(self.positives[cover_arr]))