Beispiel #1
0
 def test_multi(self):
     for n in range(5):
         self.table = np.random.randn(self.data_num)
         compress_thres = 10000
         head_size = 5000
         self.quantile_summaries = QuantileSummaries(
             compress_thres=compress_thres,
             head_size=head_size,
             error=self.error)
         self.test_correctness()
Beispiel #2
0
    def setUp(self):
        self.percentile_rate = [90]
        self.data_num = 1000

        self.table = np.random.randn(self.data_num)
        compress_thres = 1000
        head_size = 500
        self.error = 0.001
        self.quantile_summaries = QuantileSummaries(
            compress_thres=compress_thres,
            head_size=head_size,
            error=self.error)
class TestQuantileSummaries(unittest.TestCase):
    def setUp(self):
        self.percentile_rate = list(range(0, 100, 1))
        self.data_num = 10000
        np.random.seed(15)
        self.table = np.random.randn(self.data_num)
        compress_thres = 1000
        head_size = 500
        self.error = 0.00001
        self.quantile_summaries = QuantileSummaries(
            compress_thres=compress_thres,
            head_size=head_size,
            error=self.error)

    def test_correctness(self):
        for num in self.table:
            self.quantile_summaries.insert(num)

        x = sorted(self.table)

        for q_num in self.percentile_rate:
            percent = q_num / 100
            sk2 = self.quantile_summaries.query(percent)
            min_rank = math.floor((percent - 2 * self.error) * self.data_num)
            max_rank = math.ceil((percent + 2 * self.error) * self.data_num)
            if min_rank < 0:
                min_rank = 0
            if max_rank > len(x) - 1:
                max_rank = len(x) - 1
            min_value, max_value = x[min_rank], x[max_rank]
            try:
                self.assertTrue(min_value <= sk2 <= max_value)
            except AssertionError as e:
                print(
                    f"min_value: {min_value}, max_value: {max_value}, sk2: {sk2}, percent: {percent},"
                    f"total_max_value: {x[-1]}")
                raise AssertionError(e)

    def test_multi(self):
        for n in range(5):
            self.table = np.random.randn(self.data_num)
            compress_thres = 10000
            head_size = 5000
            self.quantile_summaries = QuantileSummaries(
                compress_thres=compress_thres,
                head_size=head_size,
                error=self.error)
            self.test_correctness()
Beispiel #4
0
class TestQuantileSummaries(unittest.TestCase):
    def setUp(self):
        self.percentile_rate = [90]
        self.data_num = 1000

        self.table = np.random.randn(self.data_num)
        compress_thres = 1000
        head_size = 500
        self.error = 0.001
        self.quantile_summaries = QuantileSummaries(
            compress_thres=compress_thres,
            head_size=head_size,
            error=self.error)

    def test_correctness(self):
        for num in self.table:
            self.quantile_summaries.insert(num)

        x = sorted(self.table)

        for q_num in self.percentile_rate:
            percent = q_num / 100
            sk2 = self.quantile_summaries.query(percent)
            min_rank = math.floor((percent - 2 * self.error) * self.data_num)
            max_rank = math.ceil((percent + 2 * self.error) * self.data_num)
            if min_rank < 0:
                min_rank = 0
            if max_rank > len(x) - 1:
                max_rank = len(x) - 1
            self.assertTrue(x[min_rank] <= sk2 <= x[max_rank])

    def test_multi(self):
        for n in range(5):
            self.table = np.random.randn(self.data_num)
            compress_thres = 10000
            head_size = 5000
            self.quantile_summaries = QuantileSummaries(
                compress_thres=compress_thres,
                head_size=head_size,
                error=self.error)
            self.test_correctness()
Beispiel #5
0
    def static_summaries_in_partition(data_instances, cols_dict, abnormal_list,
                                      error):
        """
        Statics sums, sum_square, max and min value through one traversal

        Parameters
        ----------
        data_instances : DTable
            The input data

        cols_dict : dict
            Specify which column(s) need to apply statistic.

        abnormal_list: list
            Specify which values are not permitted.

        Returns
        -------
        Dict of SummaryStatistics object

        """
        summary_dict = {}
        for col_name in cols_dict:
            summary_dict[col_name] = QuantileSummaries(
                abnormal_list=abnormal_list, error=error)

        for k, instances in data_instances:
            if isinstance(instances, Instance):
                features = instances.features
            else:
                features = instances

            for col_name, col_index in cols_dict.items():
                value = features[col_index]
                summary_obj = summary_dict[col_name]
                summary_obj.insert(value)

        return summary_dict