def test_get_filters_handles_arrays_for_string_types(self): filtr = {'col': 'A', 'op': '==', 'val': ['a', 'b']} res = DruidDatasource.get_filters([filtr], []) self.assertEqual('a', res.filter['filter']['value']) filtr = {'col': 'A', 'op': '==', 'val': []} res = DruidDatasource.get_filters([filtr], []) self.assertEqual('', res.filter['filter']['value'])
def test_druid_type_from_adhoc_metric(self): druid_type = DruidDatasource.druid_type_from_adhoc_metric({ 'column': {'type': 'DOUBLE', 'column_name': 'value'}, 'aggregate': 'SUM', 'label': 'My Adhoc Metric', }) assert(druid_type == 'doubleSum') druid_type = DruidDatasource.druid_type_from_adhoc_metric({ 'column': {'type': 'LONG', 'column_name': 'value'}, 'aggregate': 'MAX', 'label': 'My Adhoc Metric', }) assert(druid_type == 'longMax') druid_type = DruidDatasource.druid_type_from_adhoc_metric({ 'column': {'type': 'VARCHAR(255)', 'column_name': 'value'}, 'aggregate': 'COUNT', 'label': 'My Adhoc Metric', }) assert(druid_type == 'count') druid_type = DruidDatasource.druid_type_from_adhoc_metric({ 'column': {'type': 'VARCHAR(255)', 'column_name': 'value'}, 'aggregate': 'COUNT_DISTINCT', 'label': 'My Adhoc Metric', }) assert(druid_type == 'cardinality')
def test_get_filters_converts_strings_to_num(self): filtr = {'col': 'A', 'op': 'in', 'val': ['6']} res = DruidDatasource.get_filters([filtr], ['A']) self.assertEqual(6, res.filter['filter']['value']) filtr = {'col': 'A', 'op': '==', 'val': '6'} res = DruidDatasource.get_filters([filtr], ['A']) self.assertEqual(6, res.filter['filter']['value'])
def test_get_filters_converts_strings_to_num(self): filtr = {'col': 'A', 'op': 'in', 'val': ['6']} col = DruidColumn(column_name='A') column_dict = {'A': col} res = DruidDatasource.get_filters([filtr], ['A'], column_dict) self.assertEqual(6, res.filter['filter']['value']) filtr = {'col': 'A', 'op': '==', 'val': '6'} res = DruidDatasource.get_filters([filtr], ['A'], column_dict) self.assertEqual(6, res.filter['filter']['value'])
def test_get_filters_handles_arrays_for_string_types(self): filtr = {'col': 'A', 'op': '==', 'val': ['a', 'b']} col = DruidColumn(column_name='A') column_dict = {'A': col} res = DruidDatasource.get_filters([filtr], [], column_dict) self.assertEqual('a', res.filter['filter']['value']) filtr = {'col': 'A', 'op': '==', 'val': []} res = DruidDatasource.get_filters([filtr], [], column_dict) self.assertIsNone(res.filter['filter']['value'])
def test_get_filters_constructs_equals_for_in_not_in_single_value(self): filtr = {'col': 'A', 'op': 'in', 'val': ['a']} cola = DruidColumn(column_name='A') colb = DruidColumn(column_name='B') column_dict = {'A': cola, 'B': colb} res = DruidDatasource.get_filters([filtr], [], column_dict) self.assertEqual('selector', res.filter['filter']['type'])
def test_get_filters_ignores_in_not_in_with_empty_value(self): filtr1 = {'col': 'A', 'op': 'in', 'val': []} filtr2 = {'col': 'A', 'op': 'not in', 'val': []} col = DruidColumn(column_name='A') column_dict = {'A': col} res = DruidDatasource.get_filters([filtr1, filtr2], [], column_dict) self.assertIsNone(res)
def test_import_druid_override_idential(self): datasource = self.create_druid_datasource( 'copy_cat', id=10004, cols_names=['new_col1', 'col2', 'col3'], metric_names=['new_metric1']) imported_id = DruidDatasource.import_obj( datasource, import_time=1993) copy_datasource = self.create_druid_datasource( 'copy_cat', id=10004, cols_names=['new_col1', 'col2', 'col3'], metric_names=['new_metric1']) imported_id_copy = DruidDatasource.import_obj( copy_datasource, import_time=1994) self.assertEquals(imported_id, imported_id_copy) self.assert_datasource_equals( copy_datasource, self.get_datasource(imported_id))
def test_metrics_and_post_aggs_tree(self): metrics = ['A', 'B', 'm1', 'm2'] metrics_dict = {} for i in range(ord('A'), ord('K') + 1): emplace(metrics_dict, chr(i), True) for i in range(1, 10): emplace(metrics_dict, 'm' + str(i), False) def depends_on(index, fields): dependents = fields if isinstance(fields, list) else [fields] metrics_dict[index].json_obj = {'fieldNames': dependents} depends_on('A', ['m1', 'D', 'C']) depends_on('B', ['B', 'C', 'E', 'F', 'm3']) depends_on('C', ['H', 'I']) depends_on('D', ['m2', 'm5', 'G', 'C']) depends_on('E', ['H', 'I', 'J']) depends_on('F', ['J', 'm5']) depends_on('G', ['m4', 'm7', 'm6', 'A']) depends_on('H', ['A', 'm4', 'I']) depends_on('I', ['H', 'K']) depends_on('J', 'K') depends_on('K', ['m8', 'm9']) all_metrics, postaggs = DruidDatasource.metrics_and_post_aggs( metrics, metrics_dict) expected_metrics = set(all_metrics) self.assertEqual(9, len(all_metrics)) for i in range(1, 10): expected_metrics.remove('m' + str(i)) self.assertEqual(0, len(expected_metrics)) self.assertEqual(11, len(postaggs)) for i in range(ord('A'), ord('K') + 1): del postaggs[chr(i)] self.assertEqual(0, len(postaggs))
def test_get_filters_constructs_filter_in(self): filtr = {'col': 'A', 'op': 'in', 'val': ['a', 'b', 'c']} res = DruidDatasource.get_filters([filtr], []) self.assertIn('filter', res.filter) self.assertIn('fields', res.filter['filter']) self.assertEqual('or', res.filter['filter']['type']) self.assertEqual(3, len(res.filter['filter']['fields']))
def test_get_filters_constructs_regex_filter(self): filtr = {'col': 'A', 'op': 'regex', 'val': '[abc]'} col = DruidColumn(column_name='A') column_dict = {'A': col} res = DruidDatasource.get_filters([filtr], [], column_dict) self.assertEqual('regex', res.filter['filter']['type']) self.assertEqual('[abc]', res.filter['filter']['pattern']) self.assertEqual('A', res.filter['filter']['dimension'])
def test_get_filters_constructs_filter_equals(self): filtr = {'col': 'A', 'op': '==', 'val': 'h'} col = DruidColumn(column_name='A') column_dict = {'A': col} res = DruidDatasource.get_filters([filtr], [], column_dict) self.assertEqual('selector', res.filter['filter']['type']) self.assertEqual('A', res.filter['filter']['dimension']) self.assertEqual('h', res.filter['filter']['value'])
def test_get_filters_constructs_bounds_filter(self): filtr = {'col': 'A', 'op': '>=', 'val': 'h'} res = DruidDatasource.get_filters([filtr], []) self.assertFalse(res.filter['filter']['lowerStrict']) self.assertEqual('A', res.filter['filter']['dimension']) self.assertEqual('h', res.filter['filter']['lower']) self.assertFalse(res.filter['filter']['alphaNumeric']) filtr['op'] = '>' res = DruidDatasource.get_filters([filtr], []) self.assertTrue(res.filter['filter']['lowerStrict']) filtr['op'] = '<=' res = DruidDatasource.get_filters([filtr], []) self.assertFalse(res.filter['filter']['upperStrict']) self.assertEqual('h', res.filter['filter']['upper']) filtr['op'] = '<' res = DruidDatasource.get_filters([filtr], []) self.assertTrue(res.filter['filter']['upperStrict'])
def test_get_filters_constructs_filter_not_equals(self): filtr = {'col': 'A', 'op': '!=', 'val': 'h'} res = DruidDatasource.get_filters([filtr], []) self.assertEqual('not', res.filter['filter']['type']) self.assertEqual( 'h', res.filter['filter']['field'].filter['filter']['value'], )
def test_import_druid_no_metadata(self): datasource, dict_datasource = self.create_druid_datasource( 'pure_druid', id=ID_PREFIX + 1) imported_cluster = DruidDatasource.import_from_dict(db.session, dict_datasource) db.session.commit() imported = self.get_datasource(imported_cluster.id) self.assert_datasource_equals(datasource, imported)
def test_import_druid_2_col_2_met(self): datasource = self.create_druid_datasource( 'druid_2_col_2_met', id=10003, cols_names=['c1', 'c2'], metric_names=['m1', 'm2']) imported_id = DruidDatasource.import_obj( datasource, import_time=1991) imported = self.get_datasource(imported_id) self.assert_datasource_equals(datasource, imported)
def test_get_aggregations(self): ds = DruidDatasource(datasource_name='datasource') metrics_dict = { 'sum1': DruidMetric( metric_name='sum1', metric_type='doubleSum', json=json.dumps({'type': 'doubleSum', 'name': 'sum1'}), ), 'sum2': DruidMetric( metric_name='sum2', metric_type='doubleSum', json=json.dumps({'type': 'doubleSum', 'name': 'sum2'}), ), 'div1': DruidMetric( metric_name='div1', metric_type='postagg', json=json.dumps({ 'fn': '/', 'type': 'arithmetic', 'name': 'div1', 'fields': [ { 'fieldName': 'sum1', 'type': 'fieldAccess', }, { 'fieldName': 'sum2', 'type': 'fieldAccess', }, ], }), ), } metric_names = ['sum1', 'sum2'] aggs = ds.get_aggregations(metrics_dict, metric_names) expected_agg = {name: metrics_dict[name].json_obj for name in metric_names} self.assertEqual(expected_agg, aggs) metric_names = ['sum1', 'col1'] self.assertRaises( SupersetException, ds.get_aggregations, metrics_dict, metric_names) metric_names = ['sum1', 'div1'] self.assertRaises( SupersetException, ds.get_aggregations, metrics_dict, metric_names)
def test_import_druid_2_col_2_met(self): datasource, dict_datasource = self.create_druid_datasource( 'druid_2_col_2_met', id=ID_PREFIX + 3, cols_names=['c1', 'c2'], metric_names=['m1', 'm2']) imported_cluster = DruidDatasource.import_from_dict(db.session, dict_datasource) db.session.commit() imported = self.get_datasource(imported_cluster.id) self.assert_datasource_equals(datasource, imported)
def test_get_filters_composes_multiple_filters(self): filtr1 = {'col': 'A', 'op': '!=', 'val': 'y'} filtr2 = {'col': 'B', 'op': 'in', 'val': ['a', 'b', 'c']} cola = DruidColumn(column_name='A') colb = DruidColumn(column_name='B') column_dict = {'A': cola, 'B': colb} res = DruidDatasource.get_filters([filtr1, filtr2], [], column_dict) self.assertEqual('and', res.filter['filter']['type']) self.assertEqual(2, len(res.filter['filter']['fields']))
def test_import_druid_override_identical(self): datasource, dict_datasource = self.create_druid_datasource( 'copy_cat', id=ID_PREFIX + 4, cols_names=['new_col1', 'col2', 'col3'], metric_names=['new_metric1']) imported = DruidDatasource.import_from_dict(session=db.session, dict_rep=dict_datasource) db.session.commit() copy_datasource, dict_cp_datasource = self.create_druid_datasource( 'copy_cat', id=ID_PREFIX + 4, cols_names=['new_col1', 'col2', 'col3'], metric_names=['new_metric1']) imported_copy = DruidDatasource.import_from_dict(db.session, dict_cp_datasource) db.session.commit() self.assertEquals(imported.id, imported_copy.id) self.assert_datasource_equals( copy_datasource, self.get_datasource(imported.id))
def test_import_druid_override(self): datasource = self.create_druid_datasource( 'druid_override', id=10004, cols_names=['col1'], metric_names=['m1']) imported_id = DruidDatasource.import_obj( datasource, import_time=1991) table_over = self.create_druid_datasource( 'druid_override', id=10004, cols_names=['new_col1', 'col2', 'col3'], metric_names=['new_metric1']) imported_over_id = DruidDatasource.import_obj( table_over, import_time=1992) imported_over = self.get_datasource(imported_over_id) self.assertEquals(imported_id, imported_over.id) expected_datasource = self.create_druid_datasource( 'druid_override', id=10004, metric_names=['new_metric1', 'm1'], cols_names=['col1', 'new_col1', 'col2', 'col3']) self.assert_datasource_equals(expected_datasource, imported_over)
def test_get_filters_constructs_filter_not_equals(self): filtr = {'col': 'A', 'op': '!=', 'val': 'h'} col = DruidColumn(column_name='A') column_dict = {'A': col} res = DruidDatasource.get_filters([filtr], [], column_dict) self.assertEqual('not', res.filter['filter']['type']) self.assertEqual( 'h', res.filter['filter']['field'].filter['filter']['value'], )
def test_import_druid_1_col_1_met(self): datasource, dict_datasource = self.create_druid_datasource( 'druid_1_col_1_met', id=ID_PREFIX + 2, cols_names=['col1'], metric_names=['metric1']) imported_cluster = DruidDatasource.import_from_dict(db.session, dict_datasource) db.session.commit() imported = self.get_datasource(imported_cluster.id) self.assert_datasource_equals(datasource, imported) self.assertEquals( {DBREF: ID_PREFIX + 2, 'database_name': 'druid_test'}, json.loads(imported.params))
def test_import_druid_1_col_1_met(self): datasource = self.create_druid_datasource( 'druid_1_col_1_met', id=10002, cols_names=["col1"], metric_names=["metric1"]) imported_id = DruidDatasource.import_obj( datasource, import_time=1990) imported = self.get_datasource(imported_id) self.assert_datasource_equals(datasource, imported) self.assertEquals( {'remote_id': 10002, 'import_time': 1990, 'database_name': 'druid_test'}, json.loads(imported.params))
def test_get_filters_constructs_filter_not_in(self): filtr = {'col': 'A', 'op': 'not in', 'val': ['a', 'b', 'c']} col = DruidColumn(column_name='A') column_dict = {'A': col} res = DruidDatasource.get_filters([filtr], [], column_dict) self.assertIn('filter', res.filter) self.assertIn('type', res.filter['filter']) self.assertEqual('not', res.filter['filter']['type']) self.assertIn('field', res.filter['filter']) self.assertEqual( 3, len(res.filter['filter']['field'].filter['filter']['fields']), )
def test_import_druid_override_append(self): datasource, dict_datasource = self.create_druid_datasource( 'druid_override', id=ID_PREFIX + 3, cols_names=['col1'], metric_names=['m1']) imported_cluster = DruidDatasource.import_from_dict(db.session, dict_datasource) db.session.commit() table_over, table_over_dict = self.create_druid_datasource( 'druid_override', id=ID_PREFIX + 3, cols_names=['new_col1', 'col2', 'col3'], metric_names=['new_metric1']) imported_over_cluster = DruidDatasource.import_from_dict( db.session, table_over_dict) db.session.commit() imported_over = self.get_datasource(imported_over_cluster.id) self.assertEquals(imported_cluster.id, imported_over.id) expected_datasource, _ = self.create_druid_datasource( 'druid_override', id=ID_PREFIX + 3, metric_names=['new_metric1', 'm1'], cols_names=['col1', 'new_col1', 'col2', 'col3']) self.assert_datasource_equals(expected_datasource, imported_over)
def test_get_filters_extraction_fn_map(self): filters = [{'col': 'deviceName', 'val': ['iPhone X'], 'op': 'in'}] dimension_spec = { 'type': 'extraction', 'dimension': 'device', 'outputName': 'deviceName', 'outputType': 'STRING', 'extractionFn': { 'type': 'lookup', 'dimension': 'dimensionName', 'outputName': 'dimensionOutputName', 'replaceMissingValueWith': 'missing_value', 'retainMissingValue': False, 'lookup': { 'type': 'map', 'map': { 'iPhone10,1': 'iPhone 8', 'iPhone10,4': 'iPhone 8', 'iPhone10,2': 'iPhone 8 Plus', 'iPhone10,5': 'iPhone 8 Plus', 'iPhone10,3': 'iPhone X', 'iPhone10,6': 'iPhone X', }, 'isOneToOne': False, }, }, } spec_json = json.dumps(dimension_spec) col = DruidColumn(column_name='deviceName', dimension_spec_json=spec_json) column_dict = {'deviceName': col} f = DruidDatasource.get_filters(filters, [], column_dict) assert isinstance(f.extraction_function, MapLookupExtraction) dim_ext_fn = dimension_spec['extractionFn'] f_ext_fn = f.extraction_function self.assertEqual(dim_ext_fn['lookup']['map'], f_ext_fn._mapping) self.assertEqual(dim_ext_fn['lookup']['isOneToOne'], f_ext_fn._injective) self.assertEqual( dim_ext_fn['replaceMissingValueWith'], f_ext_fn._replace_missing_values, ) self.assertEqual( dim_ext_fn['retainMissingValue'], f_ext_fn._retain_missing_values, )
def test_recursive_get_fields(self): conf = { 'type': 'quantile', 'fieldName': 'f1', 'field': { 'type': 'custom', 'fields': [{ 'type': 'fieldAccess', 'fieldName': 'f2', }, { 'type': 'fieldAccess', 'fieldName': 'f3', }, { 'type': 'quantiles', 'fieldName': 'f4', 'field': { 'type': 'custom', }, }, { 'type': 'custom', 'fields': [{ 'type': 'fieldAccess', 'fieldName': 'f5', }, { 'type': 'fieldAccess', 'fieldName': 'f2', 'fields': [{ 'type': 'fieldAccess', 'fieldName': 'f3', }, { 'type': 'fieldIgnoreMe', 'fieldName': 'f6', }], }], }], }, } fields = DruidDatasource.recursive_get_fields(conf) expected = set(['f1', 'f2', 'f3', 'f4', 'f5']) self.assertEqual(5, len(fields)) for field in fields: expected.remove(field) self.assertEqual(0, len(expected))
def test_set_perm_druid_datasource(self): self.create_druid_test_objects() session = db.session druid_cluster = (session.query(DruidCluster).filter_by( cluster_name="druid_test").one()) datasource = DruidDatasource( datasource_name="tmp_datasource", cluster=druid_cluster, cluster_id=druid_cluster.id, ) session.add(datasource) session.commit() # store without a schema stored_datasource = (session.query(DruidDatasource).filter_by( datasource_name="tmp_datasource").one()) self.assertEqual( stored_datasource.perm, f"[druid_test].[tmp_datasource](id:{stored_datasource.id})", ) self.assertIsNotNone( security_manager.find_permission_view_menu("datasource_access", stored_datasource.perm)) self.assertIsNone(stored_datasource.schema_perm) # store with a schema stored_datasource.datasource_name = "tmp_schema.tmp_datasource" session.commit() self.assertEqual( stored_datasource.perm, f"[druid_test].[tmp_schema.tmp_datasource](id:{stored_datasource.id})", ) self.assertIsNotNone( security_manager.find_permission_view_menu("datasource_access", stored_datasource.perm)) self.assertIsNotNone(stored_datasource.schema_perm, "[druid_test].[tmp_schema]") self.assertIsNotNone( security_manager.find_permission_view_menu( "schema_access", stored_datasource.schema_perm)) session.delete(stored_datasource) session.commit()
def test_import_druid_1_col_1_met(self): datasource, dict_datasource = self.create_druid_datasource( "druid_1_col_1_met", id=ID_PREFIX + 2, cols_names=["col1"], metric_names=["metric1"], ) imported_cluster = DruidDatasource.import_from_dict( db.session, dict_datasource) db.session.commit() imported = self.get_datasource(imported_cluster.id) self.assert_datasource_equals(datasource, imported) self.assertEqual( { DBREF: ID_PREFIX + 2, "database_name": "druid_test" }, json.loads(imported.params), )
def test_get_filters_extraction_fn_map(self): filters = [{"col": "deviceName", "val": ["iPhone X"], "op": "in"}] dimension_spec = { "type": "extraction", "dimension": "device", "outputName": "deviceName", "outputType": "STRING", "extractionFn": { "type": "lookup", "dimension": "dimensionName", "outputName": "dimensionOutputName", "replaceMissingValueWith": "missing_value", "retainMissingValue": False, "lookup": { "type": "map", "map": { "iPhone10,1": "iPhone 8", "iPhone10,4": "iPhone 8", "iPhone10,2": "iPhone 8 Plus", "iPhone10,5": "iPhone 8 Plus", "iPhone10,3": "iPhone X", "iPhone10,6": "iPhone X", }, "isOneToOne": False, }, }, } spec_json = json.dumps(dimension_spec) col = DruidColumn(column_name="deviceName", dimension_spec_json=spec_json) column_dict = {"deviceName": col} f = DruidDatasource.get_filters(filters, [], column_dict) assert isinstance(f.extraction_function, MapLookupExtraction) dim_ext_fn = dimension_spec["extractionFn"] f_ext_fn = f.extraction_function self.assertEqual(dim_ext_fn["lookup"]["map"], f_ext_fn._mapping) self.assertEqual(dim_ext_fn["lookup"]["isOneToOne"], f_ext_fn._injective) self.assertEqual(dim_ext_fn["replaceMissingValueWith"], f_ext_fn._replace_missing_values) self.assertEqual(dim_ext_fn["retainMissingValue"], f_ext_fn._retain_missing_values)
def test_run_query_multiple_groupby(self): client = Mock() from_dttm = Mock() to_dttm = Mock() from_dttm.replace = Mock(return_value=from_dttm) to_dttm.replace = Mock(return_value=to_dttm) from_dttm.isoformat = Mock(return_value='from') to_dttm.isoformat = Mock(return_value='to') timezone = 'timezone' from_dttm.tzname = Mock(return_value=timezone) ds = DruidDatasource(datasource_name='datasource') metric1 = DruidMetric(metric_name='metric1') metric2 = DruidMetric(metric_name='metric2') ds.metrics = [metric1, metric2] col1 = DruidColumn(column_name='col1') col2 = DruidColumn(column_name='col2') ds.columns = [col1, col2] all_metrics = [] post_aggs = ['some_agg'] ds._metrics_and_post_aggs = Mock(return_value=(all_metrics, post_aggs)) groupby = ['col1', 'col2'] metrics = ['metric1'] ds.get_having_filters = Mock(return_value=[]) client.query_builder = Mock() client.query_builder.last_query = Mock() client.query_builder.last_query.query_dict = {'mock': 0} # no groupby calls client.timeseries ds.run_query( groupby, metrics, None, from_dttm, to_dttm, client=client, row_limit=100, filter=[], ) self.assertEqual(0, len(client.topn.call_args_list)) self.assertEqual(1, len(client.groupby.call_args_list)) self.assertEqual(0, len(client.timeseries.call_args_list)) # check that there is no dimensions entry called_args = client.groupby.call_args_list[0][1] self.assertIn('dimensions', called_args) self.assertEqual(['col1', 'col2'], called_args['dimensions'])
def test_get_filters_extraction_fn_regex(self): filters = [{'col': 'buildPrefix', 'val': ['22B'], 'op': 'in'}] dimension_spec = { 'type': 'extraction', 'dimension': 'build', 'outputName': 'buildPrefix', 'outputType': 'STRING', 'extractionFn': { 'type': 'regex', 'expr': '(^[0-9A-Za-z]{3})', }, } spec_json = json.dumps(dimension_spec) col = DruidColumn(column_name='buildPrefix', dimension_spec_json=spec_json) column_dict = {'buildPrefix': col} f = DruidDatasource.get_filters(filters, [], column_dict) assert isinstance(f.extraction_function, RegexExtraction) dim_ext_fn = dimension_spec['extractionFn'] f_ext_fn = f.extraction_function self.assertEqual(dim_ext_fn['expr'], f_ext_fn._expr)
def test_run_query_with_adhoc_metric(self): client = Mock() from_dttm = Mock() to_dttm = Mock() from_dttm.replace = Mock(return_value=from_dttm) to_dttm.replace = Mock(return_value=to_dttm) from_dttm.isoformat = Mock(return_value='from') to_dttm.isoformat = Mock(return_value='to') timezone = 'timezone' from_dttm.tzname = Mock(return_value=timezone) ds = DruidDatasource(datasource_name='datasource') metric1 = DruidMetric(metric_name='metric1') metric2 = DruidMetric(metric_name='metric2') ds.metrics = [metric1, metric2] col1 = DruidColumn(column_name='col1') col2 = DruidColumn(column_name='col2') ds.columns = [col1, col2] all_metrics = [] post_aggs = ['some_agg'] ds._metrics_and_post_aggs = Mock(return_value=(all_metrics, post_aggs)) groupby = [] metrics = [{ 'expressionType': 'SIMPLE', 'column': {'type': 'DOUBLE', 'column_name': 'col1'}, 'aggregate': 'SUM', 'label': 'My Adhoc Metric', }] ds.get_having_filters = Mock(return_value=[]) client.query_builder = Mock() client.query_builder.last_query = Mock() client.query_builder.last_query.query_dict = {'mock': 0} # no groupby calls client.timeseries ds.run_query( groupby, metrics, None, from_dttm, to_dttm, client=client, filter=[], row_limit=100, ) self.assertEqual(0, len(client.topn.call_args_list)) self.assertEqual(0, len(client.groupby.call_args_list)) self.assertEqual(1, len(client.timeseries.call_args_list)) # check that there is no dimensions entry called_args = client.timeseries.call_args_list[0][1] self.assertNotIn('dimensions', called_args) self.assertIn('post_aggregations', called_args)
def create_druid_datasource(self, name, id=0, cols_names=[], metric_names=[]): params = { 'remote_id': id, 'database_name': 'druid_test', 'import-export-test': True } datasource = DruidDatasource( id=id, datasource_name=name, cluster_name='druid_test', params=json.dumps(params), ) for col_name in cols_names: datasource.columns.append(DruidColumn(column_name=col_name)) for metric_name in metric_names: datasource.metrics.append(DruidMetric(metric_name=metric_name)) return datasource
def test_get_filters_extraction_fn_regex(self): filters = [{"col": "buildPrefix", "val": ["22B"], "op": "in"}] dimension_spec = { "type": "extraction", "dimension": "build", "outputName": "buildPrefix", "outputType": "STRING", "extractionFn": { "type": "regex", "expr": "(^[0-9A-Za-z]{3})" }, } spec_json = json.dumps(dimension_spec) col = DruidColumn(column_name="buildPrefix", dimension_spec_json=spec_json) column_dict = {"buildPrefix": col} f = DruidDatasource.get_filters(filters, [], column_dict) assert isinstance(f.extraction_function, RegexExtraction) dim_ext_fn = dimension_spec["extractionFn"] f_ext_fn = f.extraction_function self.assertEqual(dim_ext_fn["expr"], f_ext_fn._expr)
def test_get_filters_extraction_fn_registered_lookup_extraction(self): filters = [{"col": "country", "val": ["Spain"], "op": "in"}] dimension_spec = { "type": "extraction", "dimension": "country_name", "outputName": "country", "outputType": "STRING", "extractionFn": { "type": "registeredLookup", "lookup": "country_name" }, } spec_json = json.dumps(dimension_spec) col = DruidColumn(column_name="country", dimension_spec_json=spec_json) column_dict = {"country": col} f = DruidDatasource.get_filters(filters, [], column_dict) assert isinstance(f.extraction_function, RegisteredLookupExtraction) dim_ext_fn = dimension_spec["extractionFn"] self.assertEqual(dim_ext_fn["type"], f.extraction_function.extraction_type) self.assertEqual(dim_ext_fn["lookup"], f.extraction_function._lookup)
def create_druid_datasource(self, name, id=0, cols_names=[], metric_names=[]): params = { "remote_id": id, "database_name": "druid_test", "import-export-test": True, } datasource = DruidDatasource( id=id, datasource_name=name, cluster_name="druid_test", params=json.dumps(params), ) for col_name in cols_names: datasource.columns.append(DruidColumn(column_name=col_name)) for metric_name in metric_names: datasource.metrics.append( DruidMetric(metric_name=metric_name, json="{}")) return datasource
def create_druid_datasource(self, name, id=0, cols_names=[], metric_names=[]): cluster_name = "druid_test" cluster = self.get_or_create(DruidCluster, {"cluster_name": cluster_name}) params = {"remote_id": id, "database_name": cluster_name} datasource = DruidDatasource( id=id, datasource_name=name, cluster_id=cluster.id, params=json.dumps(params), ) for col_name in cols_names: datasource.columns.append(DruidColumn(column_name=col_name)) for metric_name in metric_names: datasource.metrics.append( DruidMetric(metric_name=metric_name, json="{}")) return datasource
def test_get_filters_extraction_fn_time_format(self): filters = [{"col": "dayOfMonth", "val": ["1", "20"], "op": "in"}] dimension_spec = { "type": "extraction", "dimension": "__time", "outputName": "dayOfMonth", "extractionFn": { "type": "timeFormat", "format": "d", "timeZone": "Asia/Kolkata", "locale": "en", }, } spec_json = json.dumps(dimension_spec) col = DruidColumn(column_name="dayOfMonth", dimension_spec_json=spec_json) column_dict = {"dayOfMonth": col} f = DruidDatasource.get_filters(filters, [], column_dict) assert isinstance(f.extraction_function, TimeFormatExtraction) dim_ext_fn = dimension_spec["extractionFn"] self.assertEqual(dim_ext_fn["type"], f.extraction_function.extraction_type) self.assertEqual(dim_ext_fn["format"], f.extraction_function._format) self.assertEqual(dim_ext_fn["timeZone"], f.extraction_function._time_zone) self.assertEqual(dim_ext_fn["locale"], f.extraction_function._locale)
def create_druid_datasource(self, name, id=0, cols_names=[], metric_names=[]): cluster_name = "druid_test" cluster = self.get_or_create(DruidCluster, {"cluster_name": cluster_name}, db.session) name = "{0}{1}".format(NAME_PREFIX, name) params = {DBREF: id, "database_name": cluster_name} dict_rep = { "cluster_id": cluster.id, "datasource_name": name, "id": id, "params": json.dumps(params), "columns": [{ "column_name": c } for c in cols_names], "metrics": [{ "metric_name": c, "json": "{}" } for c in metric_names], } datasource = DruidDatasource( id=id, datasource_name=name, cluster_id=cluster.id, params=json.dumps(params), ) for col_name in cols_names: datasource.columns.append(DruidColumn(column_name=col_name)) for metric_name in metric_names: datasource.metrics.append(DruidMetric(metric_name=metric_name)) return datasource, dict_rep
def test_recursive_get_fields(self): conf = { "type": "quantile", "fieldName": "f1", "field": { "type": "custom", "fields": [ {"type": "fieldAccess", "fieldName": "f2"}, {"type": "fieldAccess", "fieldName": "f3"}, { "type": "quantiles", "fieldName": "f4", "field": {"type": "custom"}, }, { "type": "custom", "fields": [ {"type": "fieldAccess", "fieldName": "f5"}, { "type": "fieldAccess", "fieldName": "f2", "fields": [ {"type": "fieldAccess", "fieldName": "f3"}, {"type": "fieldIgnoreMe", "fieldName": "f6"}, ], }, ], }, ], }, } fields = DruidDatasource.recursive_get_fields(conf) expected = set(["f1", "f2", "f3", "f4", "f5"]) self.assertEqual(5, len(fields)) for field in fields: expected.remove(field) self.assertEqual(0, len(expected))
def decode_dashboards( # pylint: disable=too-many-return-statements o: Dict[str, Any]) -> Any: """ Function to be passed into json.loads obj_hook parameter Recreates the dashboard object from a json representation. """ # pylint: disable=import-outside-toplevel from superset.connectors.druid.models import ( DruidCluster, DruidColumn, DruidDatasource, DruidMetric, ) if "__Dashboard__" in o: return Dashboard(**o["__Dashboard__"]) if "__Slice__" in o: return Slice(**o["__Slice__"]) if "__TableColumn__" in o: return TableColumn(**o["__TableColumn__"]) if "__SqlaTable__" in o: return SqlaTable(**o["__SqlaTable__"]) if "__SqlMetric__" in o: return SqlMetric(**o["__SqlMetric__"]) if "__DruidCluster__" in o: return DruidCluster(**o["__DruidCluster__"]) if "__DruidColumn__" in o: return DruidColumn(**o["__DruidColumn__"]) if "__DruidDatasource__" in o: return DruidDatasource(**o["__DruidDatasource__"]) if "__DruidMetric__" in o: return DruidMetric(**o["__DruidMetric__"]) if "__datetime__" in o: return datetime.strptime(o["__datetime__"], "%Y-%m-%dT%H:%M:%S") return o
def test_druid_type_from_adhoc_metric(self): druid_type = DruidDatasource.druid_type_from_adhoc_metric({ "column": { "type": "DOUBLE", "column_name": "value" }, "aggregate": "SUM", "label": "My Adhoc Metric", }) assert druid_type == "doubleSum" druid_type = DruidDatasource.druid_type_from_adhoc_metric({ "column": { "type": "LONG", "column_name": "value" }, "aggregate": "MAX", "label": "My Adhoc Metric", }) assert druid_type == "longMax" druid_type = DruidDatasource.druid_type_from_adhoc_metric({ "column": { "type": "VARCHAR(255)", "column_name": "value" }, "aggregate": "COUNT", "label": "My Adhoc Metric", }) assert druid_type == "count" druid_type = DruidDatasource.druid_type_from_adhoc_metric({ "column": { "type": "VARCHAR(255)", "column_name": "value" }, "aggregate": "COUNT_DISTINCT", "label": "My Adhoc Metric", }) assert druid_type == "cardinality" druid_type = DruidDatasource.druid_type_from_adhoc_metric({ "column": { "type": "hyperUnique", "column_name": "value" }, "aggregate": "COUNT_DISTINCT", "label": "My Adhoc Metric", }) assert druid_type == "hyperUnique"
def test_metrics_and_post_aggs(self): """ Test generation of metrics and post-aggregations from an initial list of superset metrics (which may include the results of either). This primarily tests that specifying a post-aggregator metric will also require the raw aggregation of the associated druid metric column. """ metrics_dict = { "unused_count": DruidMetric( metric_name="unused_count", verbose_name="COUNT(*)", metric_type="count", json=json.dumps({ "type": "count", "name": "unused_count" }), ), "some_sum": DruidMetric( metric_name="some_sum", verbose_name="SUM(*)", metric_type="sum", json=json.dumps({ "type": "sum", "name": "sum" }), ), "a_histogram": DruidMetric( metric_name="a_histogram", verbose_name="APPROXIMATE_HISTOGRAM(*)", metric_type="approxHistogramFold", json=json.dumps({ "type": "approxHistogramFold", "name": "a_histogram" }), ), "aCustomMetric": DruidMetric( metric_name="aCustomMetric", verbose_name="MY_AWESOME_METRIC(*)", metric_type="aCustomType", json=json.dumps({ "type": "customMetric", "name": "aCustomMetric" }), ), "quantile_p95": DruidMetric( metric_name="quantile_p95", verbose_name="P95(*)", metric_type="postagg", json=json.dumps({ "type": "quantile", "probability": 0.95, "name": "p95", "fieldName": "a_histogram", }), ), "aCustomPostAgg": DruidMetric( metric_name="aCustomPostAgg", verbose_name="CUSTOM_POST_AGG(*)", metric_type="postagg", json=json.dumps({ "type": "customPostAgg", "name": "aCustomPostAgg", "field": { "type": "fieldAccess", "fieldName": "aCustomMetric" }, }), ), } adhoc_metric = { "expressionType": "SIMPLE", "column": { "type": "DOUBLE", "column_name": "value" }, "aggregate": "SUM", "label": "My Adhoc Metric", } metrics = ["some_sum"] saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs( metrics, metrics_dict) assert set(saved_metrics.keys()) == {"some_sum"} assert post_aggs == {} metrics = [adhoc_metric] saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs( metrics, metrics_dict) assert set(saved_metrics.keys()) == set([adhoc_metric["label"]]) assert post_aggs == {} metrics = ["some_sum", adhoc_metric] saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs( metrics, metrics_dict) assert set(saved_metrics.keys()) == {"some_sum", adhoc_metric["label"]} assert post_aggs == {} metrics = ["quantile_p95"] saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs( metrics, metrics_dict) result_postaggs = set(["quantile_p95"]) assert set(saved_metrics.keys()) == {"a_histogram"} assert set(post_aggs.keys()) == result_postaggs metrics = ["aCustomPostAgg"] saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs( metrics, metrics_dict) result_postaggs = set(["aCustomPostAgg"]) assert set(saved_metrics.keys()) == {"aCustomMetric"} assert set(post_aggs.keys()) == result_postaggs
def test_get_filters_composes_multiple_filters(self): filtr1 = {'col': 'A', 'op': '!=', 'val': 'y'} filtr2 = {'col': 'B', 'op': 'in', 'val': ['a', 'b', 'c']} res = DruidDatasource.get_filters([filtr1, filtr2], []) self.assertEqual('and', res.filter['filter']['type']) self.assertEqual(2, len(res.filter['filter']['fields']))
def test_run_query_single_groupby(self): client = Mock() from_dttm = Mock() to_dttm = Mock() from_dttm.replace = Mock(return_value=from_dttm) to_dttm.replace = Mock(return_value=to_dttm) from_dttm.isoformat = Mock(return_value="from") to_dttm.isoformat = Mock(return_value="to") timezone = "timezone" from_dttm.tzname = Mock(return_value=timezone) ds = DruidDatasource(datasource_name="datasource") metric1 = DruidMetric(metric_name="metric1") metric2 = DruidMetric(metric_name="metric2") ds.metrics = [metric1, metric2] col1 = DruidColumn(column_name="col1") col2 = DruidColumn(column_name="col2") ds.columns = [col1, col2] aggs = ["metric1"] post_aggs = ["some_agg"] ds._metrics_and_post_aggs = Mock(return_value=(aggs, post_aggs)) groupby = ["col1"] metrics = ["metric1"] ds.get_having_filters = Mock(return_value=[]) client.query_builder.last_query.query_dict = {"mock": 0} # client.topn is called twice ds.run_query( groupby, metrics, None, from_dttm, to_dttm, timeseries_limit=100, client=client, order_desc=True, filter=[], ) self.assertEqual(2, len(client.topn.call_args_list)) self.assertEqual(0, len(client.groupby.call_args_list)) self.assertEqual(0, len(client.timeseries.call_args_list)) # check that there is no dimensions entry called_args_pre = client.topn.call_args_list[0][1] self.assertNotIn("dimensions", called_args_pre) self.assertIn("dimension", called_args_pre) called_args = client.topn.call_args_list[1][1] self.assertIn("dimension", called_args) self.assertEqual("col1", called_args["dimension"]) # not order_desc client = Mock() client.query_builder.last_query.query_dict = {"mock": 0} ds.run_query( groupby, metrics, None, from_dttm, to_dttm, client=client, order_desc=False, filter=[], row_limit=100, ) self.assertEqual(0, len(client.topn.call_args_list)) self.assertEqual(1, len(client.groupby.call_args_list)) self.assertEqual(0, len(client.timeseries.call_args_list)) self.assertIn("dimensions", client.groupby.call_args_list[0][1]) self.assertEqual(["col1"], client.groupby.call_args_list[0][1]["dimensions"]) # order_desc but timeseries and dimension spec # calls topn with single dimension spec 'dimension' spec = {"outputName": "hello", "dimension": "matcho"} spec_json = json.dumps(spec) col3 = DruidColumn(column_name="col3", dimension_spec_json=spec_json) ds.columns.append(col3) groupby = ["col3"] client = Mock() client.query_builder.last_query.query_dict = {"mock": 0} ds.run_query( groupby, metrics, None, from_dttm, to_dttm, client=client, order_desc=True, timeseries_limit=5, filter=[], row_limit=100, ) self.assertEqual(2, len(client.topn.call_args_list)) self.assertEqual(0, len(client.groupby.call_args_list)) self.assertEqual(0, len(client.timeseries.call_args_list)) self.assertIn("dimension", client.topn.call_args_list[0][1]) self.assertIn("dimension", client.topn.call_args_list[1][1]) # uses dimension for pre query and full spec for final query self.assertEqual("matcho", client.topn.call_args_list[0][1]["dimension"]) self.assertEqual(spec, client.topn.call_args_list[1][1]["dimension"])
def test_get_filters_keeps_trailing_spaces(self): filtr = {"col": "A", "op": "in", "val": ["a "]} col = DruidColumn(column_name="A") column_dict = {"A": col} res = DruidDatasource.get_filters([filtr], [], column_dict) self.assertEqual("a ", res.filter["filter"]["value"])
def test_get_filters_extracts_values_in_quotes(self): filtr = {"col": "A", "op": "in", "val": ['"a"']} col = DruidColumn(column_name="A") column_dict = {"A": col} res = DruidDatasource.get_filters([filtr], [], column_dict) self.assertEqual("a", res.filter["filter"]["value"])
def test_get_filters_handles_none_for_string_types(self): filtr = {"col": "A", "op": "==", "val": None} col = DruidColumn(column_name="A") column_dict = {"A": col} res = DruidDatasource.get_filters([filtr], [], column_dict) self.assertIsNone(res)
def test_get_filters_ignores_in_not_in_with_empty_value(self): filtr1 = {'col': 'A', 'op': 'in', 'val': []} filtr2 = {'col': 'A', 'op': 'not in', 'val': []} res = DruidDatasource.get_filters([filtr1, filtr2], []) self.assertEqual(None, res)
def test_import_druid_no_metadata(self): datasource = self.create_druid_datasource('pure_druid', id=10001) imported_id = DruidDatasource.import_obj(datasource, import_time=1989) imported = self.get_datasource(imported_id) self.assert_datasource_equals(datasource, imported)
def test_get_filters_handles_none_for_string_types(self): filtr = {'col': 'A', 'op': '==', 'val': None} col = DruidColumn(column_name='A') column_dict = {'A': col} res = DruidDatasource.get_filters([filtr], [], column_dict) self.assertIsNone(res)
def test_metrics_and_post_aggs(self): """ Test generation of metrics and post-aggregations from an initial list of superset metrics (which may include the results of either). This primarily tests that specifying a post-aggregator metric will also require the raw aggregation of the associated druid metric column. """ metrics_dict = { 'unused_count': DruidMetric( metric_name='unused_count', verbose_name='COUNT(*)', metric_type='count', json=json.dumps({ 'type': 'count', 'name': 'unused_count' }), ), 'some_sum': DruidMetric( metric_name='some_sum', verbose_name='SUM(*)', metric_type='sum', json=json.dumps({ 'type': 'sum', 'name': 'sum' }), ), 'a_histogram': DruidMetric( metric_name='a_histogram', verbose_name='APPROXIMATE_HISTOGRAM(*)', metric_type='approxHistogramFold', json=json.dumps( { 'type': 'approxHistogramFold', 'name': 'a_histogram' }, ), ), 'aCustomMetric': DruidMetric( metric_name='aCustomMetric', verbose_name='MY_AWESOME_METRIC(*)', metric_type='aCustomType', json=json.dumps( { 'type': 'customMetric', 'name': 'aCustomMetric' }, ), ), 'quantile_p95': DruidMetric( metric_name='quantile_p95', verbose_name='P95(*)', metric_type='postagg', json=json.dumps({ 'type': 'quantile', 'probability': 0.95, 'name': 'p95', 'fieldName': 'a_histogram', }), ), 'aCustomPostAgg': DruidMetric( metric_name='aCustomPostAgg', verbose_name='CUSTOM_POST_AGG(*)', metric_type='postagg', json=json.dumps({ 'type': 'customPostAgg', 'name': 'aCustomPostAgg', 'field': { 'type': 'fieldAccess', 'fieldName': 'aCustomMetric', }, }), ), } adhoc_metric = { 'expressionType': 'SIMPLE', 'column': { 'type': 'DOUBLE', 'column_name': 'value' }, 'aggregate': 'SUM', 'label': 'My Adhoc Metric', } metrics = ['some_sum'] saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs( metrics, metrics_dict) assert set(saved_metrics.keys()) == {'some_sum'} assert post_aggs == {} metrics = [adhoc_metric] saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs( metrics, metrics_dict) assert set(saved_metrics.keys()) == set([adhoc_metric['label']]) assert post_aggs == {} metrics = ['some_sum', adhoc_metric] saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs( metrics, metrics_dict) assert set(saved_metrics.keys()) == {'some_sum', adhoc_metric['label']} assert post_aggs == {} metrics = ['quantile_p95'] saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs( metrics, metrics_dict) result_postaggs = set(['quantile_p95']) assert set(saved_metrics.keys()) == {'a_histogram'} assert set(post_aggs.keys()) == result_postaggs metrics = ['aCustomPostAgg'] saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs( metrics, metrics_dict) result_postaggs = set(['aCustomPostAgg']) assert set(saved_metrics.keys()) == {'aCustomMetric'} assert set(post_aggs.keys()) == result_postaggs
def test_run_query_single_groupby(self): client = Mock() from_dttm = Mock() to_dttm = Mock() from_dttm.replace = Mock(return_value=from_dttm) to_dttm.replace = Mock(return_value=to_dttm) from_dttm.isoformat = Mock(return_value='from') to_dttm.isoformat = Mock(return_value='to') timezone = 'timezone' from_dttm.tzname = Mock(return_value=timezone) ds = DruidDatasource(datasource_name='datasource') metric1 = DruidMetric(metric_name='metric1') metric2 = DruidMetric(metric_name='metric2') ds.metrics = [metric1, metric2] col1 = DruidColumn(column_name='col1') col2 = DruidColumn(column_name='col2') ds.columns = [col1, col2] aggs = ['metric1'] post_aggs = ['some_agg'] ds._metrics_and_post_aggs = Mock(return_value=(aggs, post_aggs)) groupby = ['col1'] metrics = ['metric1'] ds.get_having_filters = Mock(return_value=[]) client.query_builder.last_query.query_dict = {'mock': 0} # client.topn is called twice ds.run_query( groupby, metrics, None, from_dttm, to_dttm, timeseries_limit=100, client=client, order_desc=True, filter=[], ) self.assertEqual(2, len(client.topn.call_args_list)) self.assertEqual(0, len(client.groupby.call_args_list)) self.assertEqual(0, len(client.timeseries.call_args_list)) # check that there is no dimensions entry called_args_pre = client.topn.call_args_list[0][1] self.assertNotIn('dimensions', called_args_pre) self.assertIn('dimension', called_args_pre) called_args = client.topn.call_args_list[1][1] self.assertIn('dimension', called_args) self.assertEqual('col1', called_args['dimension']) # not order_desc client = Mock() client.query_builder.last_query.query_dict = {'mock': 0} ds.run_query( groupby, metrics, None, from_dttm, to_dttm, client=client, order_desc=False, filter=[], row_limit=100, ) self.assertEqual(0, len(client.topn.call_args_list)) self.assertEqual(1, len(client.groupby.call_args_list)) self.assertEqual(0, len(client.timeseries.call_args_list)) self.assertIn('dimensions', client.groupby.call_args_list[0][1]) self.assertEqual(['col1'], client.groupby.call_args_list[0][1]['dimensions']) # order_desc but timeseries and dimension spec # calls topn with single dimension spec 'dimension' spec = {'outputName': 'hello', 'dimension': 'matcho'} spec_json = json.dumps(spec) col3 = DruidColumn(column_name='col3', dimension_spec_json=spec_json) ds.columns.append(col3) groupby = ['col3'] client = Mock() client.query_builder.last_query.query_dict = {'mock': 0} ds.run_query( groupby, metrics, None, from_dttm, to_dttm, client=client, order_desc=True, timeseries_limit=5, filter=[], row_limit=100, ) self.assertEqual(2, len(client.topn.call_args_list)) self.assertEqual(0, len(client.groupby.call_args_list)) self.assertEqual(0, len(client.timeseries.call_args_list)) self.assertIn('dimension', client.topn.call_args_list[0][1]) self.assertIn('dimension', client.topn.call_args_list[1][1]) # uses dimension for pre query and full spec for final query self.assertEqual('matcho', client.topn.call_args_list[0][1]['dimension']) self.assertEqual(spec, client.topn.call_args_list[1][1]['dimension'])
def test_get_filters_extracts_values_in_quotes(self): filtr = {'col': 'A', 'op': 'in', 'val': [' "a" ']} col = DruidColumn(column_name='A') column_dict = {'A': col} res = DruidDatasource.get_filters([filtr], [], column_dict) self.assertEqual('a', res.filter['filter']['value'])
def test_run_query_order_by_metrics(self): client = Mock() client.query_builder.last_query.query_dict = {"mock": 0} from_dttm = Mock() to_dttm = Mock() ds = DruidDatasource(datasource_name="datasource") ds.get_having_filters = Mock(return_value=[]) dim1 = DruidColumn(column_name="dim1") dim2 = DruidColumn(column_name="dim2") metrics_dict = { "count1": DruidMetric( metric_name="count1", metric_type="count", json=json.dumps({ "type": "count", "name": "count1" }), ), "sum1": DruidMetric( metric_name="sum1", metric_type="doubleSum", json=json.dumps({ "type": "doubleSum", "name": "sum1" }), ), "sum2": DruidMetric( metric_name="sum2", metric_type="doubleSum", json=json.dumps({ "type": "doubleSum", "name": "sum2" }), ), "div1": DruidMetric( metric_name="div1", metric_type="postagg", json=json.dumps({ "fn": "/", "type": "arithmetic", "name": "div1", "fields": [ { "fieldName": "sum1", "type": "fieldAccess" }, { "fieldName": "sum2", "type": "fieldAccess" }, ], }), ), } ds.columns = [dim1, dim2] ds.metrics = list(metrics_dict.values()) groupby = ["dim1"] metrics = ["count1"] granularity = "all" # get the counts of the top 5 'dim1's, order by 'sum1' ds.run_query( groupby, metrics, granularity, from_dttm, to_dttm, timeseries_limit=5, timeseries_limit_metric="sum1", client=client, order_desc=True, filter=[], ) qry_obj = client.topn.call_args_list[0][1] self.assertEqual("dim1", qry_obj["dimension"]) self.assertEqual("sum1", qry_obj["metric"]) aggregations = qry_obj["aggregations"] post_aggregations = qry_obj["post_aggregations"] self.assertEqual({"count1", "sum1"}, set(aggregations.keys())) self.assertEqual(set(), set(post_aggregations.keys())) # get the counts of the top 5 'dim1's, order by 'div1' ds.run_query( groupby, metrics, granularity, from_dttm, to_dttm, timeseries_limit=5, timeseries_limit_metric="div1", client=client, order_desc=True, filter=[], ) qry_obj = client.topn.call_args_list[1][1] self.assertEqual("dim1", qry_obj["dimension"]) self.assertEqual("div1", qry_obj["metric"]) aggregations = qry_obj["aggregations"] post_aggregations = qry_obj["post_aggregations"] self.assertEqual({"count1", "sum1", "sum2"}, set(aggregations.keys())) self.assertEqual({"div1"}, set(post_aggregations.keys())) groupby = ["dim1", "dim2"] # get the counts of the top 5 ['dim1', 'dim2']s, order by 'sum1' ds.run_query( groupby, metrics, granularity, from_dttm, to_dttm, timeseries_limit=5, timeseries_limit_metric="sum1", client=client, order_desc=True, filter=[], ) qry_obj = client.groupby.call_args_list[0][1] self.assertEqual({"dim1", "dim2"}, set(qry_obj["dimensions"])) self.assertEqual("sum1", qry_obj["limit_spec"]["columns"][0]["dimension"]) aggregations = qry_obj["aggregations"] post_aggregations = qry_obj["post_aggregations"] self.assertEqual({"count1", "sum1"}, set(aggregations.keys())) self.assertEqual(set(), set(post_aggregations.keys())) # get the counts of the top 5 ['dim1', 'dim2']s, order by 'div1' ds.run_query( groupby, metrics, granularity, from_dttm, to_dttm, timeseries_limit=5, timeseries_limit_metric="div1", client=client, order_desc=True, filter=[], ) qry_obj = client.groupby.call_args_list[1][1] self.assertEqual({"dim1", "dim2"}, set(qry_obj["dimensions"])) self.assertEqual("div1", qry_obj["limit_spec"]["columns"][0]["dimension"]) aggregations = qry_obj["aggregations"] post_aggregations = qry_obj["post_aggregations"] self.assertEqual({"count1", "sum1", "sum2"}, set(aggregations.keys())) self.assertEqual({"div1"}, set(post_aggregations.keys()))
def test_run_query_order_by_metrics(self): client = Mock() client.query_builder.last_query.query_dict = {'mock': 0} from_dttm = Mock() to_dttm = Mock() ds = DruidDatasource(datasource_name='datasource') ds.get_having_filters = Mock(return_value=[]) dim1 = DruidColumn(column_name='dim1') dim2 = DruidColumn(column_name='dim2') metrics_dict = { 'count1': DruidMetric( metric_name='count1', metric_type='count', json=json.dumps({ 'type': 'count', 'name': 'count1' }), ), 'sum1': DruidMetric( metric_name='sum1', metric_type='doubleSum', json=json.dumps({ 'type': 'doubleSum', 'name': 'sum1' }), ), 'sum2': DruidMetric( metric_name='sum2', metric_type='doubleSum', json=json.dumps({ 'type': 'doubleSum', 'name': 'sum2' }), ), 'div1': DruidMetric( metric_name='div1', metric_type='postagg', json=json.dumps({ 'fn': '/', 'type': 'arithmetic', 'name': 'div1', 'fields': [ { 'fieldName': 'sum1', 'type': 'fieldAccess', }, { 'fieldName': 'sum2', 'type': 'fieldAccess', }, ], }), ), } ds.columns = [dim1, dim2] ds.metrics = list(metrics_dict.values()) groupby = ['dim1'] metrics = ['count1'] granularity = 'all' # get the counts of the top 5 'dim1's, order by 'sum1' ds.run_query( groupby, metrics, granularity, from_dttm, to_dttm, timeseries_limit=5, timeseries_limit_metric='sum1', client=client, order_desc=True, filter=[], ) qry_obj = client.topn.call_args_list[0][1] self.assertEqual('dim1', qry_obj['dimension']) self.assertEqual('sum1', qry_obj['metric']) aggregations = qry_obj['aggregations'] post_aggregations = qry_obj['post_aggregations'] self.assertEqual({'count1', 'sum1'}, set(aggregations.keys())) self.assertEqual(set(), set(post_aggregations.keys())) # get the counts of the top 5 'dim1's, order by 'div1' ds.run_query( groupby, metrics, granularity, from_dttm, to_dttm, timeseries_limit=5, timeseries_limit_metric='div1', client=client, order_desc=True, filter=[], ) qry_obj = client.topn.call_args_list[1][1] self.assertEqual('dim1', qry_obj['dimension']) self.assertEqual('div1', qry_obj['metric']) aggregations = qry_obj['aggregations'] post_aggregations = qry_obj['post_aggregations'] self.assertEqual({'count1', 'sum1', 'sum2'}, set(aggregations.keys())) self.assertEqual({'div1'}, set(post_aggregations.keys())) groupby = ['dim1', 'dim2'] # get the counts of the top 5 ['dim1', 'dim2']s, order by 'sum1' ds.run_query( groupby, metrics, granularity, from_dttm, to_dttm, timeseries_limit=5, timeseries_limit_metric='sum1', client=client, order_desc=True, filter=[], ) qry_obj = client.groupby.call_args_list[0][1] self.assertEqual({'dim1', 'dim2'}, set(qry_obj['dimensions'])) self.assertEqual('sum1', qry_obj['limit_spec']['columns'][0]['dimension']) aggregations = qry_obj['aggregations'] post_aggregations = qry_obj['post_aggregations'] self.assertEqual({'count1', 'sum1'}, set(aggregations.keys())) self.assertEqual(set(), set(post_aggregations.keys())) # get the counts of the top 5 ['dim1', 'dim2']s, order by 'div1' ds.run_query( groupby, metrics, granularity, from_dttm, to_dttm, timeseries_limit=5, timeseries_limit_metric='div1', client=client, order_desc=True, filter=[], ) qry_obj = client.groupby.call_args_list[1][1] self.assertEqual({'dim1', 'dim2'}, set(qry_obj['dimensions'])) self.assertEqual('div1', qry_obj['limit_spec']['columns'][0]['dimension']) aggregations = qry_obj['aggregations'] post_aggregations = qry_obj['post_aggregations'] self.assertEqual({'count1', 'sum1', 'sum2'}, set(aggregations.keys())) self.assertEqual({'div1'}, set(post_aggregations.keys()))