Beispiel #1
0
 def test_multi(self):
     for n in range(5):
         self.table = np.random.randn(self.data_num)
         compress_thres = 10000
         head_size = 5000
         self.quantile_summaries = QuantileSummaries(
             compress_thres=compress_thres,
             head_size=head_size,
             error=self.error)
         self.test_correctness()
Beispiel #2
0
    def setUp(self):
        self.percentile_rate = [90]
        self.data_num = 1000

        self.table = np.random.randn(self.data_num)
        compress_thres = 1000
        head_size = 500
        self.error = 0.001
        self.quantile_summaries = QuantileSummaries(
            compress_thres=compress_thres,
            head_size=head_size,
            error=self.error)
Beispiel #3
0
    def static_summaries_in_partition(data_instances, cols_dict, abnormal_list,
                                      error):
        """
        Statics sums, sum_square, max and min value through one traversal

        Parameters
        ----------
        data_instances : DTable
            The input data

        cols_dict : dict
            Specify which column(s) need to apply statistic.

        abnormal_list: list
            Specify which values are not permitted.

        Returns
        -------
        Dict of SummaryStatistics object

        """
        summary_dict = {}
        for col_name in cols_dict:
            summary_dict[col_name] = QuantileSummaries(
                abnormal_list=abnormal_list, error=error)

        for k, instances in data_instances:
            if isinstance(instances, Instance):
                features = instances.features
            else:
                features = instances

            for col_name, col_index in cols_dict.items():
                value = features[col_index]
                summary_obj = summary_dict[col_name]
                summary_obj.insert(value)

        return summary_dict