def test_multi(self): for n in range(5): self.table = np.random.randn(self.data_num) compress_thres = 10000 head_size = 5000 self.quantile_summaries = QuantileSummaries( compress_thres=compress_thres, head_size=head_size, error=self.error) self.test_correctness()
def setUp(self): self.percentile_rate = [90] self.data_num = 1000 self.table = np.random.randn(self.data_num) compress_thres = 1000 head_size = 500 self.error = 0.001 self.quantile_summaries = QuantileSummaries( compress_thres=compress_thres, head_size=head_size, error=self.error)
def static_summaries_in_partition(data_instances, cols_dict, abnormal_list, error): """ Statics sums, sum_square, max and min value through one traversal Parameters ---------- data_instances : DTable The input data cols_dict : dict Specify which column(s) need to apply statistic. abnormal_list: list Specify which values are not permitted. Returns ------- Dict of SummaryStatistics object """ summary_dict = {} for col_name in cols_dict: summary_dict[col_name] = QuantileSummaries( abnormal_list=abnormal_list, error=error) for k, instances in data_instances: if isinstance(instances, Instance): features = instances.features else: features = instances for col_name, col_index in cols_dict.items(): value = features[col_index] summary_obj = summary_dict[col_name] summary_obj.insert(value) return summary_dict