def test_metrics_and_post_aggs_tree(self): metrics = ["A", "B", "m1", "m2"] metrics_dict = {} for i in range(ord("A"), ord("K") + 1): emplace(metrics_dict, chr(i), True) for i in range(1, 10): emplace(metrics_dict, "m" + str(i), False) def depends_on(index, fields): dependents = fields if isinstance(fields, list) else [fields] metrics_dict[index].json_obj = {"fieldNames": dependents} depends_on("A", ["m1", "D", "C"]) depends_on("B", ["B", "C", "E", "F", "m3"]) depends_on("C", ["H", "I"]) depends_on("D", ["m2", "m5", "G", "C"]) depends_on("E", ["H", "I", "J"]) depends_on("F", ["J", "m5"]) depends_on("G", ["m4", "m7", "m6", "A"]) depends_on("H", ["A", "m4", "I"]) depends_on("I", ["H", "K"]) depends_on("J", "K") depends_on("K", ["m8", "m9"]) aggs, postaggs = DruidDatasource.metrics_and_post_aggs( metrics, metrics_dict) expected_metrics = set(aggs.keys()) self.assertEqual(9, len(aggs)) for i in range(1, 10): expected_metrics.remove("m" + str(i)) self.assertEqual(0, len(expected_metrics)) self.assertEqual(11, len(postaggs)) for i in range(ord("A"), ord("K") + 1): del postaggs[chr(i)] self.assertEqual(0, len(postaggs))
def test_metrics_and_post_aggs_tree(self): metrics = ['A', 'B', 'm1', 'm2'] metrics_dict = {} for i in range(ord('A'), ord('K') + 1): emplace(metrics_dict, chr(i), True) for i in range(1, 10): emplace(metrics_dict, 'm' + str(i), False) def depends_on(index, fields): dependents = fields if isinstance(fields, list) else [fields] metrics_dict[index].json_obj = {'fieldNames': dependents} depends_on('A', ['m1', 'D', 'C']) depends_on('B', ['B', 'C', 'E', 'F', 'm3']) depends_on('C', ['H', 'I']) depends_on('D', ['m2', 'm5', 'G', 'C']) depends_on('E', ['H', 'I', 'J']) depends_on('F', ['J', 'm5']) depends_on('G', ['m4', 'm7', 'm6', 'A']) depends_on('H', ['A', 'm4', 'I']) depends_on('I', ['H', 'K']) depends_on('J', 'K') depends_on('K', ['m8', 'm9']) aggs, postaggs = DruidDatasource.metrics_and_post_aggs( metrics, metrics_dict) expected_metrics = set(aggs.keys()) self.assertEqual(9, len(aggs)) for i in range(1, 10): expected_metrics.remove('m' + str(i)) self.assertEqual(0, len(expected_metrics)) self.assertEqual(11, len(postaggs)) for i in range(ord('A'), ord('K') + 1): del postaggs[chr(i)] self.assertEqual(0, len(postaggs))
def test_metrics_and_post_aggs_tree(self): metrics = ['A', 'B', 'm1', 'm2'] metrics_dict = {} for i in range(ord('A'), ord('K') + 1): emplace(metrics_dict, chr(i), True) for i in range(1, 10): emplace(metrics_dict, 'm' + str(i), False) def depends_on(index, fields): dependents = fields if isinstance(fields, list) else [fields] metrics_dict[index].json_obj = {'fieldNames': dependents} depends_on('A', ['m1', 'D', 'C']) depends_on('B', ['B', 'C', 'E', 'F', 'm3']) depends_on('C', ['H', 'I']) depends_on('D', ['m2', 'm5', 'G', 'C']) depends_on('E', ['H', 'I', 'J']) depends_on('F', ['J', 'm5']) depends_on('G', ['m4', 'm7', 'm6', 'A']) depends_on('H', ['A', 'm4', 'I']) depends_on('I', ['H', 'K']) depends_on('J', 'K') depends_on('K', ['m8', 'm9']) all_metrics, postaggs = DruidDatasource.metrics_and_post_aggs( metrics, metrics_dict) expected_metrics = set(all_metrics) self.assertEqual(9, len(all_metrics)) for i in range(1, 10): expected_metrics.remove('m' + str(i)) self.assertEqual(0, len(expected_metrics)) self.assertEqual(11, len(postaggs)) for i in range(ord('A'), ord('K') + 1): del postaggs[chr(i)] self.assertEqual(0, len(postaggs))
def test_metrics_and_post_aggs(self): """ Test generation of metrics and post-aggregations from an initial list of superset metrics (which may include the results of either). This primarily tests that specifying a post-aggregator metric will also require the raw aggregation of the associated druid metric column. """ metrics_dict = { "unused_count": DruidMetric( metric_name="unused_count", verbose_name="COUNT(*)", metric_type="count", json=json.dumps({ "type": "count", "name": "unused_count" }), ), "some_sum": DruidMetric( metric_name="some_sum", verbose_name="SUM(*)", metric_type="sum", json=json.dumps({ "type": "sum", "name": "sum" }), ), "a_histogram": DruidMetric( metric_name="a_histogram", verbose_name="APPROXIMATE_HISTOGRAM(*)", metric_type="approxHistogramFold", json=json.dumps({ "type": "approxHistogramFold", "name": "a_histogram" }), ), "aCustomMetric": DruidMetric( metric_name="aCustomMetric", verbose_name="MY_AWESOME_METRIC(*)", metric_type="aCustomType", json=json.dumps({ "type": "customMetric", "name": "aCustomMetric" }), ), "quantile_p95": DruidMetric( metric_name="quantile_p95", verbose_name="P95(*)", metric_type="postagg", json=json.dumps({ "type": "quantile", "probability": 0.95, "name": "p95", "fieldName": "a_histogram", }), ), "aCustomPostAgg": DruidMetric( metric_name="aCustomPostAgg", verbose_name="CUSTOM_POST_AGG(*)", metric_type="postagg", json=json.dumps({ "type": "customPostAgg", "name": "aCustomPostAgg", "field": { "type": "fieldAccess", "fieldName": "aCustomMetric" }, }), ), } adhoc_metric = { "expressionType": "SIMPLE", "column": { "type": "DOUBLE", "column_name": "value" }, "aggregate": "SUM", "label": "My Adhoc Metric", } metrics = ["some_sum"] saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs( metrics, metrics_dict) assert set(saved_metrics.keys()) == {"some_sum"} assert post_aggs == {} metrics = [adhoc_metric] saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs( metrics, metrics_dict) assert set(saved_metrics.keys()) == set([adhoc_metric["label"]]) assert post_aggs == {} metrics = ["some_sum", adhoc_metric] saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs( metrics, metrics_dict) assert set(saved_metrics.keys()) == {"some_sum", adhoc_metric["label"]} assert post_aggs == {} metrics = ["quantile_p95"] saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs( metrics, metrics_dict) result_postaggs = set(["quantile_p95"]) assert set(saved_metrics.keys()) == {"a_histogram"} assert set(post_aggs.keys()) == result_postaggs metrics = ["aCustomPostAgg"] saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs( metrics, metrics_dict) result_postaggs = set(["aCustomPostAgg"]) assert set(saved_metrics.keys()) == {"aCustomMetric"} assert set(post_aggs.keys()) == result_postaggs
def test_metrics_and_post_aggs(self): """ Test generation of metrics and post-aggregations from an initial list of superset metrics (which may include the results of either). This primarily tests that specifying a post-aggregator metric will also require the raw aggregation of the associated druid metric column. """ metrics_dict = { 'unused_count': DruidMetric( metric_name='unused_count', verbose_name='COUNT(*)', metric_type='count', json=json.dumps({ 'type': 'count', 'name': 'unused_count' }), ), 'some_sum': DruidMetric( metric_name='some_sum', verbose_name='SUM(*)', metric_type='sum', json=json.dumps({ 'type': 'sum', 'name': 'sum' }), ), 'a_histogram': DruidMetric( metric_name='a_histogram', verbose_name='APPROXIMATE_HISTOGRAM(*)', metric_type='approxHistogramFold', json=json.dumps( { 'type': 'approxHistogramFold', 'name': 'a_histogram' }, ), ), 'aCustomMetric': DruidMetric( metric_name='aCustomMetric', verbose_name='MY_AWESOME_METRIC(*)', metric_type='aCustomType', json=json.dumps( { 'type': 'customMetric', 'name': 'aCustomMetric' }, ), ), 'quantile_p95': DruidMetric( metric_name='quantile_p95', verbose_name='P95(*)', metric_type='postagg', json=json.dumps({ 'type': 'quantile', 'probability': 0.95, 'name': 'p95', 'fieldName': 'a_histogram', }), ), 'aCustomPostAgg': DruidMetric( metric_name='aCustomPostAgg', verbose_name='CUSTOM_POST_AGG(*)', metric_type='postagg', json=json.dumps({ 'type': 'customPostAgg', 'name': 'aCustomPostAgg', 'field': { 'type': 'fieldAccess', 'fieldName': 'aCustomMetric', }, }), ), } adhoc_metric = { 'expressionType': 'SIMPLE', 'column': { 'type': 'DOUBLE', 'column_name': 'value' }, 'aggregate': 'SUM', 'label': 'My Adhoc Metric', } metrics = ['some_sum'] saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs( metrics, metrics_dict) assert set(saved_metrics.keys()) == {'some_sum'} assert post_aggs == {} metrics = [adhoc_metric] saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs( metrics, metrics_dict) assert set(saved_metrics.keys()) == set([adhoc_metric['label']]) assert post_aggs == {} metrics = ['some_sum', adhoc_metric] saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs( metrics, metrics_dict) assert set(saved_metrics.keys()) == {'some_sum', adhoc_metric['label']} assert post_aggs == {} metrics = ['quantile_p95'] saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs( metrics, metrics_dict) result_postaggs = set(['quantile_p95']) assert set(saved_metrics.keys()) == {'a_histogram'} assert set(post_aggs.keys()) == result_postaggs metrics = ['aCustomPostAgg'] saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs( metrics, metrics_dict) result_postaggs = set(['aCustomPostAgg']) assert set(saved_metrics.keys()) == {'aCustomMetric'} assert set(post_aggs.keys()) == result_postaggs
def test_metrics_and_post_aggs(self): """ Test generation of metrics and post-aggregations from an initial list of superset metrics (which may include the results of either). This primarily tests that specifying a post-aggregator metric will also require the raw aggregation of the associated druid metric column. """ metrics_dict = { 'unused_count': DruidMetric( metric_name='unused_count', verbose_name='COUNT(*)', metric_type='count', json=json.dumps({'type': 'count', 'name': 'unused_count'}), ), 'some_sum': DruidMetric( metric_name='some_sum', verbose_name='SUM(*)', metric_type='sum', json=json.dumps({'type': 'sum', 'name': 'sum'}), ), 'a_histogram': DruidMetric( metric_name='a_histogram', verbose_name='APPROXIMATE_HISTOGRAM(*)', metric_type='approxHistogramFold', json=json.dumps( {'type': 'approxHistogramFold', 'name': 'a_histogram'}, ), ), 'aCustomMetric': DruidMetric( metric_name='aCustomMetric', verbose_name='MY_AWESOME_METRIC(*)', metric_type='aCustomType', json=json.dumps( {'type': 'customMetric', 'name': 'aCustomMetric'}, ), ), 'quantile_p95': DruidMetric( metric_name='quantile_p95', verbose_name='P95(*)', metric_type='postagg', json=json.dumps({ 'type': 'quantile', 'probability': 0.95, 'name': 'p95', 'fieldName': 'a_histogram', }), ), 'aCustomPostAgg': DruidMetric( metric_name='aCustomPostAgg', verbose_name='CUSTOM_POST_AGG(*)', metric_type='postagg', json=json.dumps({ 'type': 'customPostAgg', 'name': 'aCustomPostAgg', 'field': { 'type': 'fieldAccess', 'fieldName': 'aCustomMetric', }, }), ), } metrics = ['some_sum'] all_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs( metrics, metrics_dict) assert all_metrics == ['some_sum'] assert post_aggs == {} metrics = ['quantile_p95'] all_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs( metrics, metrics_dict) result_postaggs = set(['quantile_p95']) assert all_metrics == ['a_histogram'] assert set(post_aggs.keys()) == result_postaggs metrics = ['aCustomPostAgg'] all_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs( metrics, metrics_dict) result_postaggs = set(['aCustomPostAgg']) assert all_metrics == ['aCustomMetric'] assert set(post_aggs.keys()) == result_postaggs