def test_get_filters_handles_arrays_for_string_types(self):
     filtr = {'col': 'A', 'op': '==', 'val': ['a', 'b']}
     res = DruidDatasource.get_filters([filtr], [])
     self.assertEqual('a', res.filter['filter']['value'])
     filtr = {'col': 'A', 'op': '==', 'val': []}
     res = DruidDatasource.get_filters([filtr], [])
     self.assertEqual('', res.filter['filter']['value'])
Exemple #2
0
    def test_druid_type_from_adhoc_metric(self):

        druid_type = DruidDatasource.druid_type_from_adhoc_metric({
            'column': {'type': 'DOUBLE', 'column_name': 'value'},
            'aggregate': 'SUM',
            'label': 'My Adhoc Metric',
        })
        assert(druid_type == 'doubleSum')

        druid_type = DruidDatasource.druid_type_from_adhoc_metric({
            'column': {'type': 'LONG', 'column_name': 'value'},
            'aggregate': 'MAX',
            'label': 'My Adhoc Metric',
        })
        assert(druid_type == 'longMax')

        druid_type = DruidDatasource.druid_type_from_adhoc_metric({
            'column': {'type': 'VARCHAR(255)', 'column_name': 'value'},
            'aggregate': 'COUNT',
            'label': 'My Adhoc Metric',
        })
        assert(druid_type == 'count')

        druid_type = DruidDatasource.druid_type_from_adhoc_metric({
            'column': {'type': 'VARCHAR(255)', 'column_name': 'value'},
            'aggregate': 'COUNT_DISTINCT',
            'label': 'My Adhoc Metric',
        })
        assert(druid_type == 'cardinality')
 def test_get_filters_converts_strings_to_num(self):
     filtr = {'col': 'A', 'op': 'in', 'val': ['6']}
     res = DruidDatasource.get_filters([filtr], ['A'])
     self.assertEqual(6, res.filter['filter']['value'])
     filtr = {'col': 'A', 'op': '==', 'val': '6'}
     res = DruidDatasource.get_filters([filtr], ['A'])
     self.assertEqual(6, res.filter['filter']['value'])
Exemple #4
0
 def test_get_filters_converts_strings_to_num(self):
     filtr = {'col': 'A', 'op': 'in', 'val': ['6']}
     col = DruidColumn(column_name='A')
     column_dict = {'A': col}
     res = DruidDatasource.get_filters([filtr], ['A'], column_dict)
     self.assertEqual(6, res.filter['filter']['value'])
     filtr = {'col': 'A', 'op': '==', 'val': '6'}
     res = DruidDatasource.get_filters([filtr], ['A'], column_dict)
     self.assertEqual(6, res.filter['filter']['value'])
Exemple #5
0
    def test_get_filters_handles_arrays_for_string_types(self):
        filtr = {'col': 'A', 'op': '==', 'val': ['a', 'b']}
        col = DruidColumn(column_name='A')
        column_dict = {'A': col}
        res = DruidDatasource.get_filters([filtr], [], column_dict)
        self.assertEqual('a', res.filter['filter']['value'])

        filtr = {'col': 'A', 'op': '==', 'val': []}
        res = DruidDatasource.get_filters([filtr], [], column_dict)
        self.assertIsNone(res.filter['filter']['value'])
Exemple #6
0
 def test_get_filters_constructs_equals_for_in_not_in_single_value(self):
     filtr = {'col': 'A', 'op': 'in', 'val': ['a']}
     cola = DruidColumn(column_name='A')
     colb = DruidColumn(column_name='B')
     column_dict = {'A': cola, 'B': colb}
     res = DruidDatasource.get_filters([filtr], [], column_dict)
     self.assertEqual('selector', res.filter['filter']['type'])
Exemple #7
0
 def test_get_filters_ignores_in_not_in_with_empty_value(self):
     filtr1 = {'col': 'A', 'op': 'in', 'val': []}
     filtr2 = {'col': 'A', 'op': 'not in', 'val': []}
     col = DruidColumn(column_name='A')
     column_dict = {'A': col}
     res = DruidDatasource.get_filters([filtr1, filtr2], [], column_dict)
     self.assertIsNone(res)
    def test_import_druid_override_idential(self):
        datasource = self.create_druid_datasource(
            'copy_cat', id=10004, cols_names=['new_col1', 'col2', 'col3'],
            metric_names=['new_metric1'])
        imported_id = DruidDatasource.import_obj(
            datasource, import_time=1993)

        copy_datasource = self.create_druid_datasource(
            'copy_cat', id=10004, cols_names=['new_col1', 'col2', 'col3'],
            metric_names=['new_metric1'])
        imported_id_copy = DruidDatasource.import_obj(
            copy_datasource, import_time=1994)

        self.assertEquals(imported_id, imported_id_copy)
        self.assert_datasource_equals(
            copy_datasource, self.get_datasource(imported_id))
    def test_metrics_and_post_aggs_tree(self):
        metrics = ['A', 'B', 'm1', 'm2']
        metrics_dict = {}
        for i in range(ord('A'), ord('K') + 1):
            emplace(metrics_dict, chr(i), True)
        for i in range(1, 10):
            emplace(metrics_dict, 'm' + str(i), False)

        def depends_on(index, fields):
            dependents = fields if isinstance(fields, list) else [fields]
            metrics_dict[index].json_obj = {'fieldNames': dependents}

        depends_on('A', ['m1', 'D', 'C'])
        depends_on('B', ['B', 'C', 'E', 'F', 'm3'])
        depends_on('C', ['H', 'I'])
        depends_on('D', ['m2', 'm5', 'G', 'C'])
        depends_on('E', ['H', 'I', 'J'])
        depends_on('F', ['J', 'm5'])
        depends_on('G', ['m4', 'm7', 'm6', 'A'])
        depends_on('H', ['A', 'm4', 'I'])
        depends_on('I', ['H', 'K'])
        depends_on('J', 'K')
        depends_on('K', ['m8', 'm9'])
        all_metrics, postaggs = DruidDatasource.metrics_and_post_aggs(
            metrics, metrics_dict)
        expected_metrics = set(all_metrics)
        self.assertEqual(9, len(all_metrics))
        for i in range(1, 10):
            expected_metrics.remove('m' + str(i))
        self.assertEqual(0, len(expected_metrics))
        self.assertEqual(11, len(postaggs))
        for i in range(ord('A'), ord('K') + 1):
            del postaggs[chr(i)]
        self.assertEqual(0, len(postaggs))
 def test_get_filters_constructs_filter_in(self):
     filtr = {'col': 'A', 'op': 'in', 'val': ['a', 'b', 'c']}
     res = DruidDatasource.get_filters([filtr], [])
     self.assertIn('filter', res.filter)
     self.assertIn('fields', res.filter['filter'])
     self.assertEqual('or', res.filter['filter']['type'])
     self.assertEqual(3, len(res.filter['filter']['fields']))
Exemple #11
0
 def test_get_filters_constructs_regex_filter(self):
     filtr = {'col': 'A', 'op': 'regex', 'val': '[abc]'}
     col = DruidColumn(column_name='A')
     column_dict = {'A': col}
     res = DruidDatasource.get_filters([filtr], [], column_dict)
     self.assertEqual('regex', res.filter['filter']['type'])
     self.assertEqual('[abc]', res.filter['filter']['pattern'])
     self.assertEqual('A', res.filter['filter']['dimension'])
Exemple #12
0
 def test_get_filters_constructs_filter_equals(self):
     filtr = {'col': 'A', 'op': '==', 'val': 'h'}
     col = DruidColumn(column_name='A')
     column_dict = {'A': col}
     res = DruidDatasource.get_filters([filtr], [], column_dict)
     self.assertEqual('selector', res.filter['filter']['type'])
     self.assertEqual('A', res.filter['filter']['dimension'])
     self.assertEqual('h', res.filter['filter']['value'])
 def test_get_filters_constructs_bounds_filter(self):
     filtr = {'col': 'A', 'op': '>=', 'val': 'h'}
     res = DruidDatasource.get_filters([filtr], [])
     self.assertFalse(res.filter['filter']['lowerStrict'])
     self.assertEqual('A', res.filter['filter']['dimension'])
     self.assertEqual('h', res.filter['filter']['lower'])
     self.assertFalse(res.filter['filter']['alphaNumeric'])
     filtr['op'] = '>'
     res = DruidDatasource.get_filters([filtr], [])
     self.assertTrue(res.filter['filter']['lowerStrict'])
     filtr['op'] = '<='
     res = DruidDatasource.get_filters([filtr], [])
     self.assertFalse(res.filter['filter']['upperStrict'])
     self.assertEqual('h', res.filter['filter']['upper'])
     filtr['op'] = '<'
     res = DruidDatasource.get_filters([filtr], [])
     self.assertTrue(res.filter['filter']['upperStrict'])
 def test_get_filters_constructs_filter_not_equals(self):
     filtr = {'col': 'A', 'op': '!=', 'val': 'h'}
     res = DruidDatasource.get_filters([filtr], [])
     self.assertEqual('not', res.filter['filter']['type'])
     self.assertEqual(
         'h',
         res.filter['filter']['field'].filter['filter']['value'],
     )
 def test_import_druid_no_metadata(self):
     datasource, dict_datasource = self.create_druid_datasource(
         'pure_druid', id=ID_PREFIX + 1)
     imported_cluster = DruidDatasource.import_from_dict(db.session,
                                                         dict_datasource)
     db.session.commit()
     imported = self.get_datasource(imported_cluster.id)
     self.assert_datasource_equals(datasource, imported)
 def test_import_druid_2_col_2_met(self):
     datasource = self.create_druid_datasource(
         'druid_2_col_2_met', id=10003, cols_names=['c1', 'c2'],
         metric_names=['m1', 'm2'])
     imported_id = DruidDatasource.import_obj(
         datasource, import_time=1991)
     imported = self.get_datasource(imported_id)
     self.assert_datasource_equals(datasource, imported)
Exemple #17
0
    def test_get_aggregations(self):
        ds = DruidDatasource(datasource_name='datasource')
        metrics_dict = {
            'sum1': DruidMetric(
                metric_name='sum1',
                metric_type='doubleSum',
                json=json.dumps({'type': 'doubleSum', 'name': 'sum1'}),
            ),
            'sum2': DruidMetric(
                metric_name='sum2',
                metric_type='doubleSum',
                json=json.dumps({'type': 'doubleSum', 'name': 'sum2'}),
            ),
            'div1': DruidMetric(
                metric_name='div1',
                metric_type='postagg',
                json=json.dumps({
                    'fn': '/',
                    'type': 'arithmetic',
                    'name': 'div1',
                    'fields': [
                        {
                            'fieldName': 'sum1',
                            'type': 'fieldAccess',
                        },
                        {
                            'fieldName': 'sum2',
                            'type': 'fieldAccess',
                        },
                    ],
                }),
            ),
        }
        metric_names = ['sum1', 'sum2']
        aggs = ds.get_aggregations(metrics_dict, metric_names)
        expected_agg = {name: metrics_dict[name].json_obj for name in metric_names}
        self.assertEqual(expected_agg, aggs)

        metric_names = ['sum1', 'col1']
        self.assertRaises(
            SupersetException, ds.get_aggregations, metrics_dict, metric_names)

        metric_names = ['sum1', 'div1']
        self.assertRaises(
            SupersetException, ds.get_aggregations, metrics_dict, metric_names)
 def test_import_druid_2_col_2_met(self):
     datasource, dict_datasource = self.create_druid_datasource(
         'druid_2_col_2_met', id=ID_PREFIX + 3, cols_names=['c1', 'c2'],
         metric_names=['m1', 'm2'])
     imported_cluster = DruidDatasource.import_from_dict(db.session,
                                                         dict_datasource)
     db.session.commit()
     imported = self.get_datasource(imported_cluster.id)
     self.assert_datasource_equals(datasource, imported)
Exemple #19
0
 def test_get_filters_composes_multiple_filters(self):
     filtr1 = {'col': 'A', 'op': '!=', 'val': 'y'}
     filtr2 = {'col': 'B', 'op': 'in', 'val': ['a', 'b', 'c']}
     cola = DruidColumn(column_name='A')
     colb = DruidColumn(column_name='B')
     column_dict = {'A': cola, 'B': colb}
     res = DruidDatasource.get_filters([filtr1, filtr2], [], column_dict)
     self.assertEqual('and', res.filter['filter']['type'])
     self.assertEqual(2, len(res.filter['filter']['fields']))
    def test_import_druid_override_identical(self):
        datasource, dict_datasource = self.create_druid_datasource(
            'copy_cat', id=ID_PREFIX + 4,
            cols_names=['new_col1', 'col2', 'col3'],
            metric_names=['new_metric1'])
        imported = DruidDatasource.import_from_dict(session=db.session,
                                                    dict_rep=dict_datasource)
        db.session.commit()
        copy_datasource, dict_cp_datasource = self.create_druid_datasource(
            'copy_cat', id=ID_PREFIX + 4,
            cols_names=['new_col1', 'col2', 'col3'],
            metric_names=['new_metric1'])
        imported_copy = DruidDatasource.import_from_dict(db.session,
                                                         dict_cp_datasource)
        db.session.commit()

        self.assertEquals(imported.id, imported_copy.id)
        self.assert_datasource_equals(
            copy_datasource, self.get_datasource(imported.id))
    def test_import_druid_override(self):
        datasource = self.create_druid_datasource(
            'druid_override', id=10004, cols_names=['col1'],
            metric_names=['m1'])
        imported_id = DruidDatasource.import_obj(
            datasource, import_time=1991)
        table_over = self.create_druid_datasource(
            'druid_override', id=10004,
            cols_names=['new_col1', 'col2', 'col3'],
            metric_names=['new_metric1'])
        imported_over_id = DruidDatasource.import_obj(
            table_over, import_time=1992)

        imported_over = self.get_datasource(imported_over_id)
        self.assertEquals(imported_id, imported_over.id)
        expected_datasource = self.create_druid_datasource(
            'druid_override', id=10004, metric_names=['new_metric1', 'm1'],
            cols_names=['col1', 'new_col1', 'col2', 'col3'])
        self.assert_datasource_equals(expected_datasource, imported_over)
Exemple #22
0
 def test_get_filters_constructs_filter_not_equals(self):
     filtr = {'col': 'A', 'op': '!=', 'val': 'h'}
     col = DruidColumn(column_name='A')
     column_dict = {'A': col}
     res = DruidDatasource.get_filters([filtr], [], column_dict)
     self.assertEqual('not', res.filter['filter']['type'])
     self.assertEqual(
         'h',
         res.filter['filter']['field'].filter['filter']['value'],
     )
 def test_import_druid_1_col_1_met(self):
     datasource, dict_datasource = self.create_druid_datasource(
         'druid_1_col_1_met', id=ID_PREFIX + 2,
         cols_names=['col1'], metric_names=['metric1'])
     imported_cluster = DruidDatasource.import_from_dict(db.session,
                                                         dict_datasource)
     db.session.commit()
     imported = self.get_datasource(imported_cluster.id)
     self.assert_datasource_equals(datasource, imported)
     self.assertEquals(
         {DBREF: ID_PREFIX + 2, 'database_name': 'druid_test'},
         json.loads(imported.params))
 def test_import_druid_1_col_1_met(self):
     datasource = self.create_druid_datasource(
         'druid_1_col_1_met', id=10002,
         cols_names=["col1"], metric_names=["metric1"])
     imported_id = DruidDatasource.import_obj(
         datasource, import_time=1990)
     imported = self.get_datasource(imported_id)
     self.assert_datasource_equals(datasource, imported)
     self.assertEquals(
         {'remote_id': 10002, 'import_time': 1990,
          'database_name': 'druid_test'},
         json.loads(imported.params))
Exemple #25
0
 def test_get_filters_constructs_filter_not_in(self):
     filtr = {'col': 'A', 'op': 'not in', 'val': ['a', 'b', 'c']}
     col = DruidColumn(column_name='A')
     column_dict = {'A': col}
     res = DruidDatasource.get_filters([filtr], [], column_dict)
     self.assertIn('filter', res.filter)
     self.assertIn('type', res.filter['filter'])
     self.assertEqual('not', res.filter['filter']['type'])
     self.assertIn('field', res.filter['filter'])
     self.assertEqual(
         3,
         len(res.filter['filter']['field'].filter['filter']['fields']),
     )
 def test_import_druid_override_append(self):
     datasource, dict_datasource = self.create_druid_datasource(
         'druid_override', id=ID_PREFIX + 3, cols_names=['col1'],
         metric_names=['m1'])
     imported_cluster = DruidDatasource.import_from_dict(db.session,
                                                         dict_datasource)
     db.session.commit()
     table_over, table_over_dict = self.create_druid_datasource(
         'druid_override', id=ID_PREFIX + 3,
         cols_names=['new_col1', 'col2', 'col3'],
         metric_names=['new_metric1'])
     imported_over_cluster = DruidDatasource.import_from_dict(
         db.session,
         table_over_dict)
     db.session.commit()
     imported_over = self.get_datasource(imported_over_cluster.id)
     self.assertEquals(imported_cluster.id, imported_over.id)
     expected_datasource, _ = self.create_druid_datasource(
         'druid_override', id=ID_PREFIX + 3,
         metric_names=['new_metric1', 'm1'],
         cols_names=['col1', 'new_col1', 'col2', 'col3'])
     self.assert_datasource_equals(expected_datasource, imported_over)
Exemple #27
0
 def test_get_filters_extraction_fn_map(self):
     filters = [{'col': 'deviceName', 'val': ['iPhone X'], 'op': 'in'}]
     dimension_spec = {
         'type': 'extraction',
         'dimension': 'device',
         'outputName': 'deviceName',
         'outputType': 'STRING',
         'extractionFn': {
             'type': 'lookup',
             'dimension': 'dimensionName',
             'outputName': 'dimensionOutputName',
             'replaceMissingValueWith': 'missing_value',
             'retainMissingValue': False,
             'lookup': {
                 'type': 'map',
                 'map': {
                     'iPhone10,1': 'iPhone 8',
                     'iPhone10,4': 'iPhone 8',
                     'iPhone10,2': 'iPhone 8 Plus',
                     'iPhone10,5': 'iPhone 8 Plus',
                     'iPhone10,3': 'iPhone X',
                     'iPhone10,6': 'iPhone X',
                 },
                 'isOneToOne': False,
             },
         },
     }
     spec_json = json.dumps(dimension_spec)
     col = DruidColumn(column_name='deviceName', dimension_spec_json=spec_json)
     column_dict = {'deviceName': col}
     f = DruidDatasource.get_filters(filters, [], column_dict)
     assert isinstance(f.extraction_function, MapLookupExtraction)
     dim_ext_fn = dimension_spec['extractionFn']
     f_ext_fn = f.extraction_function
     self.assertEqual(dim_ext_fn['lookup']['map'], f_ext_fn._mapping)
     self.assertEqual(dim_ext_fn['lookup']['isOneToOne'], f_ext_fn._injective)
     self.assertEqual(
         dim_ext_fn['replaceMissingValueWith'],
         f_ext_fn._replace_missing_values,
     )
     self.assertEqual(
         dim_ext_fn['retainMissingValue'],
         f_ext_fn._retain_missing_values,
     )
 def test_recursive_get_fields(self):
     conf = {
         'type': 'quantile',
         'fieldName': 'f1',
         'field': {
             'type': 'custom',
             'fields': [{
                 'type': 'fieldAccess',
                 'fieldName': 'f2',
             }, {
                 'type': 'fieldAccess',
                 'fieldName': 'f3',
             }, {
                 'type': 'quantiles',
                 'fieldName': 'f4',
                 'field': {
                     'type': 'custom',
                 },
             }, {
                 'type': 'custom',
                 'fields': [{
                     'type': 'fieldAccess',
                     'fieldName': 'f5',
                 }, {
                     'type': 'fieldAccess',
                     'fieldName': 'f2',
                     'fields': [{
                         'type': 'fieldAccess',
                         'fieldName': 'f3',
                     }, {
                         'type': 'fieldIgnoreMe',
                         'fieldName': 'f6',
                     }],
                 }],
             }],
         },
     }
     fields = DruidDatasource.recursive_get_fields(conf)
     expected = set(['f1', 'f2', 'f3', 'f4', 'f5'])
     self.assertEqual(5, len(fields))
     for field in fields:
         expected.remove(field)
     self.assertEqual(0, len(expected))
 def test_recursive_get_fields(self):
     conf = {
         'type': 'quantile',
         'fieldName': 'f1',
         'field': {
             'type': 'custom',
             'fields': [{
                 'type': 'fieldAccess',
                 'fieldName': 'f2',
             }, {
                 'type': 'fieldAccess',
                 'fieldName': 'f3',
             }, {
                 'type': 'quantiles',
                 'fieldName': 'f4',
                 'field': {
                     'type': 'custom',
                 },
             }, {
                 'type': 'custom',
                 'fields': [{
                     'type': 'fieldAccess',
                     'fieldName': 'f5',
                 }, {
                     'type': 'fieldAccess',
                     'fieldName': 'f2',
                     'fields': [{
                         'type': 'fieldAccess',
                         'fieldName': 'f3',
                     }, {
                         'type': 'fieldIgnoreMe',
                         'fieldName': 'f6',
                     }],
                 }],
             }],
         },
     }
     fields = DruidDatasource.recursive_get_fields(conf)
     expected = set(['f1', 'f2', 'f3', 'f4', 'f5'])
     self.assertEqual(5, len(fields))
     for field in fields:
         expected.remove(field)
     self.assertEqual(0, len(expected))
    def test_set_perm_druid_datasource(self):
        self.create_druid_test_objects()
        session = db.session
        druid_cluster = (session.query(DruidCluster).filter_by(
            cluster_name="druid_test").one())
        datasource = DruidDatasource(
            datasource_name="tmp_datasource",
            cluster=druid_cluster,
            cluster_id=druid_cluster.id,
        )
        session.add(datasource)
        session.commit()

        # store without a schema
        stored_datasource = (session.query(DruidDatasource).filter_by(
            datasource_name="tmp_datasource").one())
        self.assertEqual(
            stored_datasource.perm,
            f"[druid_test].[tmp_datasource](id:{stored_datasource.id})",
        )
        self.assertIsNotNone(
            security_manager.find_permission_view_menu("datasource_access",
                                                       stored_datasource.perm))
        self.assertIsNone(stored_datasource.schema_perm)

        # store with a schema
        stored_datasource.datasource_name = "tmp_schema.tmp_datasource"
        session.commit()
        self.assertEqual(
            stored_datasource.perm,
            f"[druid_test].[tmp_schema.tmp_datasource](id:{stored_datasource.id})",
        )
        self.assertIsNotNone(
            security_manager.find_permission_view_menu("datasource_access",
                                                       stored_datasource.perm))
        self.assertIsNotNone(stored_datasource.schema_perm,
                             "[druid_test].[tmp_schema]")
        self.assertIsNotNone(
            security_manager.find_permission_view_menu(
                "schema_access", stored_datasource.schema_perm))

        session.delete(stored_datasource)
        session.commit()
 def test_import_druid_1_col_1_met(self):
     datasource, dict_datasource = self.create_druid_datasource(
         "druid_1_col_1_met",
         id=ID_PREFIX + 2,
         cols_names=["col1"],
         metric_names=["metric1"],
     )
     imported_cluster = DruidDatasource.import_from_dict(
         db.session, dict_datasource)
     db.session.commit()
     imported = self.get_datasource(imported_cluster.id)
     self.assert_datasource_equals(datasource, imported)
     self.assertEqual(
         {
             DBREF: ID_PREFIX + 2,
             "database_name": "druid_test"
         },
         json.loads(imported.params),
     )
 def test_get_filters_extraction_fn_map(self):
     filters = [{"col": "deviceName", "val": ["iPhone X"], "op": "in"}]
     dimension_spec = {
         "type": "extraction",
         "dimension": "device",
         "outputName": "deviceName",
         "outputType": "STRING",
         "extractionFn": {
             "type": "lookup",
             "dimension": "dimensionName",
             "outputName": "dimensionOutputName",
             "replaceMissingValueWith": "missing_value",
             "retainMissingValue": False,
             "lookup": {
                 "type": "map",
                 "map": {
                     "iPhone10,1": "iPhone 8",
                     "iPhone10,4": "iPhone 8",
                     "iPhone10,2": "iPhone 8 Plus",
                     "iPhone10,5": "iPhone 8 Plus",
                     "iPhone10,3": "iPhone X",
                     "iPhone10,6": "iPhone X",
                 },
                 "isOneToOne": False,
             },
         },
     }
     spec_json = json.dumps(dimension_spec)
     col = DruidColumn(column_name="deviceName",
                       dimension_spec_json=spec_json)
     column_dict = {"deviceName": col}
     f = DruidDatasource.get_filters(filters, [], column_dict)
     assert isinstance(f.extraction_function, MapLookupExtraction)
     dim_ext_fn = dimension_spec["extractionFn"]
     f_ext_fn = f.extraction_function
     self.assertEqual(dim_ext_fn["lookup"]["map"], f_ext_fn._mapping)
     self.assertEqual(dim_ext_fn["lookup"]["isOneToOne"],
                      f_ext_fn._injective)
     self.assertEqual(dim_ext_fn["replaceMissingValueWith"],
                      f_ext_fn._replace_missing_values)
     self.assertEqual(dim_ext_fn["retainMissingValue"],
                      f_ext_fn._retain_missing_values)
Exemple #33
0
 def test_run_query_multiple_groupby(self):
     client = Mock()
     from_dttm = Mock()
     to_dttm = Mock()
     from_dttm.replace = Mock(return_value=from_dttm)
     to_dttm.replace = Mock(return_value=to_dttm)
     from_dttm.isoformat = Mock(return_value='from')
     to_dttm.isoformat = Mock(return_value='to')
     timezone = 'timezone'
     from_dttm.tzname = Mock(return_value=timezone)
     ds = DruidDatasource(datasource_name='datasource')
     metric1 = DruidMetric(metric_name='metric1')
     metric2 = DruidMetric(metric_name='metric2')
     ds.metrics = [metric1, metric2]
     col1 = DruidColumn(column_name='col1')
     col2 = DruidColumn(column_name='col2')
     ds.columns = [col1, col2]
     all_metrics = []
     post_aggs = ['some_agg']
     ds._metrics_and_post_aggs = Mock(return_value=(all_metrics, post_aggs))
     groupby = ['col1', 'col2']
     metrics = ['metric1']
     ds.get_having_filters = Mock(return_value=[])
     client.query_builder = Mock()
     client.query_builder.last_query = Mock()
     client.query_builder.last_query.query_dict = {'mock': 0}
     # no groupby calls client.timeseries
     ds.run_query(
         groupby, metrics, None, from_dttm,
         to_dttm, client=client, row_limit=100,
         filter=[],
     )
     self.assertEqual(0, len(client.topn.call_args_list))
     self.assertEqual(1, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     # check that there is no dimensions entry
     called_args = client.groupby.call_args_list[0][1]
     self.assertIn('dimensions', called_args)
     self.assertEqual(['col1', 'col2'], called_args['dimensions'])
Exemple #34
0
 def test_get_filters_extraction_fn_regex(self):
     filters = [{'col': 'buildPrefix', 'val': ['22B'], 'op': 'in'}]
     dimension_spec = {
         'type': 'extraction',
         'dimension': 'build',
         'outputName': 'buildPrefix',
         'outputType': 'STRING',
         'extractionFn': {
             'type': 'regex',
             'expr': '(^[0-9A-Za-z]{3})',
         },
     }
     spec_json = json.dumps(dimension_spec)
     col = DruidColumn(column_name='buildPrefix', dimension_spec_json=spec_json)
     column_dict = {'buildPrefix': col}
     f = DruidDatasource.get_filters(filters, [], column_dict)
     assert isinstance(f.extraction_function, RegexExtraction)
     dim_ext_fn = dimension_spec['extractionFn']
     f_ext_fn = f.extraction_function
     self.assertEqual(dim_ext_fn['expr'], f_ext_fn._expr)
Exemple #35
0
    def test_run_query_with_adhoc_metric(self):
        client = Mock()
        from_dttm = Mock()
        to_dttm = Mock()
        from_dttm.replace = Mock(return_value=from_dttm)
        to_dttm.replace = Mock(return_value=to_dttm)
        from_dttm.isoformat = Mock(return_value='from')
        to_dttm.isoformat = Mock(return_value='to')
        timezone = 'timezone'
        from_dttm.tzname = Mock(return_value=timezone)
        ds = DruidDatasource(datasource_name='datasource')
        metric1 = DruidMetric(metric_name='metric1')
        metric2 = DruidMetric(metric_name='metric2')
        ds.metrics = [metric1, metric2]
        col1 = DruidColumn(column_name='col1')
        col2 = DruidColumn(column_name='col2')
        ds.columns = [col1, col2]
        all_metrics = []
        post_aggs = ['some_agg']
        ds._metrics_and_post_aggs = Mock(return_value=(all_metrics, post_aggs))
        groupby = []
        metrics = [{
            'expressionType': 'SIMPLE',
            'column': {'type': 'DOUBLE', 'column_name': 'col1'},
            'aggregate': 'SUM',
            'label': 'My Adhoc Metric',
        }]

        ds.get_having_filters = Mock(return_value=[])
        client.query_builder = Mock()
        client.query_builder.last_query = Mock()
        client.query_builder.last_query.query_dict = {'mock': 0}
        # no groupby calls client.timeseries
        ds.run_query(
            groupby, metrics, None, from_dttm,
            to_dttm, client=client, filter=[], row_limit=100,
        )
        self.assertEqual(0, len(client.topn.call_args_list))
        self.assertEqual(0, len(client.groupby.call_args_list))
        self.assertEqual(1, len(client.timeseries.call_args_list))
        # check that there is no dimensions entry
        called_args = client.timeseries.call_args_list[0][1]
        self.assertNotIn('dimensions', called_args)
        self.assertIn('post_aggregations', called_args)
 def create_druid_datasource(self,
                             name,
                             id=0,
                             cols_names=[],
                             metric_names=[]):
     params = {
         'remote_id': id,
         'database_name': 'druid_test',
         'import-export-test': True
     }
     datasource = DruidDatasource(
         id=id,
         datasource_name=name,
         cluster_name='druid_test',
         params=json.dumps(params),
     )
     for col_name in cols_names:
         datasource.columns.append(DruidColumn(column_name=col_name))
     for metric_name in metric_names:
         datasource.metrics.append(DruidMetric(metric_name=metric_name))
     return datasource
 def test_get_filters_extraction_fn_regex(self):
     filters = [{"col": "buildPrefix", "val": ["22B"], "op": "in"}]
     dimension_spec = {
         "type": "extraction",
         "dimension": "build",
         "outputName": "buildPrefix",
         "outputType": "STRING",
         "extractionFn": {
             "type": "regex",
             "expr": "(^[0-9A-Za-z]{3})"
         },
     }
     spec_json = json.dumps(dimension_spec)
     col = DruidColumn(column_name="buildPrefix",
                       dimension_spec_json=spec_json)
     column_dict = {"buildPrefix": col}
     f = DruidDatasource.get_filters(filters, [], column_dict)
     assert isinstance(f.extraction_function, RegexExtraction)
     dim_ext_fn = dimension_spec["extractionFn"]
     f_ext_fn = f.extraction_function
     self.assertEqual(dim_ext_fn["expr"], f_ext_fn._expr)
 def test_get_filters_extraction_fn_registered_lookup_extraction(self):
     filters = [{"col": "country", "val": ["Spain"], "op": "in"}]
     dimension_spec = {
         "type": "extraction",
         "dimension": "country_name",
         "outputName": "country",
         "outputType": "STRING",
         "extractionFn": {
             "type": "registeredLookup",
             "lookup": "country_name"
         },
     }
     spec_json = json.dumps(dimension_spec)
     col = DruidColumn(column_name="country", dimension_spec_json=spec_json)
     column_dict = {"country": col}
     f = DruidDatasource.get_filters(filters, [], column_dict)
     assert isinstance(f.extraction_function, RegisteredLookupExtraction)
     dim_ext_fn = dimension_spec["extractionFn"]
     self.assertEqual(dim_ext_fn["type"],
                      f.extraction_function.extraction_type)
     self.assertEqual(dim_ext_fn["lookup"], f.extraction_function._lookup)
 def test_get_filters_extraction_fn_regex(self):
     filters = [{'col': 'buildPrefix', 'val': ['22B'], 'op': 'in'}]
     dimension_spec = {
         'type': 'extraction',
         'dimension': 'build',
         'outputName': 'buildPrefix',
         'outputType': 'STRING',
         'extractionFn': {
             'type': 'regex',
             'expr': '(^[0-9A-Za-z]{3})',
         },
     }
     spec_json = json.dumps(dimension_spec)
     col = DruidColumn(column_name='buildPrefix',
                       dimension_spec_json=spec_json)
     column_dict = {'buildPrefix': col}
     f = DruidDatasource.get_filters(filters, [], column_dict)
     assert isinstance(f.extraction_function, RegexExtraction)
     dim_ext_fn = dimension_spec['extractionFn']
     f_ext_fn = f.extraction_function
     self.assertEqual(dim_ext_fn['expr'], f_ext_fn._expr)
Exemple #40
0
 def create_druid_datasource(self,
                             name,
                             id=0,
                             cols_names=[],
                             metric_names=[]):
     params = {
         "remote_id": id,
         "database_name": "druid_test",
         "import-export-test": True,
     }
     datasource = DruidDatasource(
         id=id,
         datasource_name=name,
         cluster_name="druid_test",
         params=json.dumps(params),
     )
     for col_name in cols_names:
         datasource.columns.append(DruidColumn(column_name=col_name))
     for metric_name in metric_names:
         datasource.metrics.append(
             DruidMetric(metric_name=metric_name, json="{}"))
     return datasource
    def create_druid_datasource(self,
                                name,
                                id=0,
                                cols_names=[],
                                metric_names=[]):
        cluster_name = "druid_test"
        cluster = self.get_or_create(DruidCluster,
                                     {"cluster_name": cluster_name})

        params = {"remote_id": id, "database_name": cluster_name}
        datasource = DruidDatasource(
            id=id,
            datasource_name=name,
            cluster_id=cluster.id,
            params=json.dumps(params),
        )
        for col_name in cols_names:
            datasource.columns.append(DruidColumn(column_name=col_name))
        for metric_name in metric_names:
            datasource.metrics.append(
                DruidMetric(metric_name=metric_name, json="{}"))
        return datasource
 def test_get_filters_extraction_fn_time_format(self):
     filters = [{"col": "dayOfMonth", "val": ["1", "20"], "op": "in"}]
     dimension_spec = {
         "type": "extraction",
         "dimension": "__time",
         "outputName": "dayOfMonth",
         "extractionFn": {
             "type": "timeFormat",
             "format": "d",
             "timeZone": "Asia/Kolkata",
             "locale": "en",
         },
     }
     spec_json = json.dumps(dimension_spec)
     col = DruidColumn(column_name="dayOfMonth", dimension_spec_json=spec_json)
     column_dict = {"dayOfMonth": col}
     f = DruidDatasource.get_filters(filters, [], column_dict)
     assert isinstance(f.extraction_function, TimeFormatExtraction)
     dim_ext_fn = dimension_spec["extractionFn"]
     self.assertEqual(dim_ext_fn["type"], f.extraction_function.extraction_type)
     self.assertEqual(dim_ext_fn["format"], f.extraction_function._format)
     self.assertEqual(dim_ext_fn["timeZone"], f.extraction_function._time_zone)
     self.assertEqual(dim_ext_fn["locale"], f.extraction_function._locale)
    def create_druid_datasource(self,
                                name,
                                id=0,
                                cols_names=[],
                                metric_names=[]):
        cluster_name = "druid_test"
        cluster = self.get_or_create(DruidCluster,
                                     {"cluster_name": cluster_name},
                                     db.session)

        name = "{0}{1}".format(NAME_PREFIX, name)
        params = {DBREF: id, "database_name": cluster_name}
        dict_rep = {
            "cluster_id": cluster.id,
            "datasource_name": name,
            "id": id,
            "params": json.dumps(params),
            "columns": [{
                "column_name": c
            } for c in cols_names],
            "metrics": [{
                "metric_name": c,
                "json": "{}"
            } for c in metric_names],
        }

        datasource = DruidDatasource(
            id=id,
            datasource_name=name,
            cluster_id=cluster.id,
            params=json.dumps(params),
        )
        for col_name in cols_names:
            datasource.columns.append(DruidColumn(column_name=col_name))
        for metric_name in metric_names:
            datasource.metrics.append(DruidMetric(metric_name=metric_name))
        return datasource, dict_rep
 def test_recursive_get_fields(self):
     conf = {
         "type": "quantile",
         "fieldName": "f1",
         "field": {
             "type": "custom",
             "fields": [
                 {"type": "fieldAccess", "fieldName": "f2"},
                 {"type": "fieldAccess", "fieldName": "f3"},
                 {
                     "type": "quantiles",
                     "fieldName": "f4",
                     "field": {"type": "custom"},
                 },
                 {
                     "type": "custom",
                     "fields": [
                         {"type": "fieldAccess", "fieldName": "f5"},
                         {
                             "type": "fieldAccess",
                             "fieldName": "f2",
                             "fields": [
                                 {"type": "fieldAccess", "fieldName": "f3"},
                                 {"type": "fieldIgnoreMe", "fieldName": "f6"},
                             ],
                         },
                     ],
                 },
             ],
         },
     }
     fields = DruidDatasource.recursive_get_fields(conf)
     expected = set(["f1", "f2", "f3", "f4", "f5"])
     self.assertEqual(5, len(fields))
     for field in fields:
         expected.remove(field)
     self.assertEqual(0, len(expected))
Exemple #45
0
def decode_dashboards(  # pylint: disable=too-many-return-statements
        o: Dict[str, Any]) -> Any:
    """
    Function to be passed into json.loads obj_hook parameter
    Recreates the dashboard object from a json representation.
    """
    # pylint: disable=import-outside-toplevel
    from superset.connectors.druid.models import (
        DruidCluster,
        DruidColumn,
        DruidDatasource,
        DruidMetric,
    )

    if "__Dashboard__" in o:
        return Dashboard(**o["__Dashboard__"])
    if "__Slice__" in o:
        return Slice(**o["__Slice__"])
    if "__TableColumn__" in o:
        return TableColumn(**o["__TableColumn__"])
    if "__SqlaTable__" in o:
        return SqlaTable(**o["__SqlaTable__"])
    if "__SqlMetric__" in o:
        return SqlMetric(**o["__SqlMetric__"])
    if "__DruidCluster__" in o:
        return DruidCluster(**o["__DruidCluster__"])
    if "__DruidColumn__" in o:
        return DruidColumn(**o["__DruidColumn__"])
    if "__DruidDatasource__" in o:
        return DruidDatasource(**o["__DruidDatasource__"])
    if "__DruidMetric__" in o:
        return DruidMetric(**o["__DruidMetric__"])
    if "__datetime__" in o:
        return datetime.strptime(o["__datetime__"], "%Y-%m-%dT%H:%M:%S")

    return o
    def test_druid_type_from_adhoc_metric(self):

        druid_type = DruidDatasource.druid_type_from_adhoc_metric({
            "column": {
                "type": "DOUBLE",
                "column_name": "value"
            },
            "aggregate":
            "SUM",
            "label":
            "My Adhoc Metric",
        })
        assert druid_type == "doubleSum"

        druid_type = DruidDatasource.druid_type_from_adhoc_metric({
            "column": {
                "type": "LONG",
                "column_name": "value"
            },
            "aggregate":
            "MAX",
            "label":
            "My Adhoc Metric",
        })
        assert druid_type == "longMax"

        druid_type = DruidDatasource.druid_type_from_adhoc_metric({
            "column": {
                "type": "VARCHAR(255)",
                "column_name": "value"
            },
            "aggregate":
            "COUNT",
            "label":
            "My Adhoc Metric",
        })
        assert druid_type == "count"

        druid_type = DruidDatasource.druid_type_from_adhoc_metric({
            "column": {
                "type": "VARCHAR(255)",
                "column_name": "value"
            },
            "aggregate":
            "COUNT_DISTINCT",
            "label":
            "My Adhoc Metric",
        })
        assert druid_type == "cardinality"

        druid_type = DruidDatasource.druid_type_from_adhoc_metric({
            "column": {
                "type": "hyperUnique",
                "column_name": "value"
            },
            "aggregate":
            "COUNT_DISTINCT",
            "label":
            "My Adhoc Metric",
        })
        assert druid_type == "hyperUnique"
    def test_metrics_and_post_aggs(self):
        """
        Test generation of metrics and post-aggregations from an initial list
        of superset metrics (which may include the results of either). This
        primarily tests that specifying a post-aggregator metric will also
        require the raw aggregation of the associated druid metric column.
        """
        metrics_dict = {
            "unused_count":
            DruidMetric(
                metric_name="unused_count",
                verbose_name="COUNT(*)",
                metric_type="count",
                json=json.dumps({
                    "type": "count",
                    "name": "unused_count"
                }),
            ),
            "some_sum":
            DruidMetric(
                metric_name="some_sum",
                verbose_name="SUM(*)",
                metric_type="sum",
                json=json.dumps({
                    "type": "sum",
                    "name": "sum"
                }),
            ),
            "a_histogram":
            DruidMetric(
                metric_name="a_histogram",
                verbose_name="APPROXIMATE_HISTOGRAM(*)",
                metric_type="approxHistogramFold",
                json=json.dumps({
                    "type": "approxHistogramFold",
                    "name": "a_histogram"
                }),
            ),
            "aCustomMetric":
            DruidMetric(
                metric_name="aCustomMetric",
                verbose_name="MY_AWESOME_METRIC(*)",
                metric_type="aCustomType",
                json=json.dumps({
                    "type": "customMetric",
                    "name": "aCustomMetric"
                }),
            ),
            "quantile_p95":
            DruidMetric(
                metric_name="quantile_p95",
                verbose_name="P95(*)",
                metric_type="postagg",
                json=json.dumps({
                    "type": "quantile",
                    "probability": 0.95,
                    "name": "p95",
                    "fieldName": "a_histogram",
                }),
            ),
            "aCustomPostAgg":
            DruidMetric(
                metric_name="aCustomPostAgg",
                verbose_name="CUSTOM_POST_AGG(*)",
                metric_type="postagg",
                json=json.dumps({
                    "type": "customPostAgg",
                    "name": "aCustomPostAgg",
                    "field": {
                        "type": "fieldAccess",
                        "fieldName": "aCustomMetric"
                    },
                }),
            ),
        }

        adhoc_metric = {
            "expressionType": "SIMPLE",
            "column": {
                "type": "DOUBLE",
                "column_name": "value"
            },
            "aggregate": "SUM",
            "label": "My Adhoc Metric",
        }

        metrics = ["some_sum"]
        saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs(
            metrics, metrics_dict)

        assert set(saved_metrics.keys()) == {"some_sum"}
        assert post_aggs == {}

        metrics = [adhoc_metric]
        saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs(
            metrics, metrics_dict)

        assert set(saved_metrics.keys()) == set([adhoc_metric["label"]])
        assert post_aggs == {}

        metrics = ["some_sum", adhoc_metric]
        saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs(
            metrics, metrics_dict)

        assert set(saved_metrics.keys()) == {"some_sum", adhoc_metric["label"]}
        assert post_aggs == {}

        metrics = ["quantile_p95"]
        saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs(
            metrics, metrics_dict)

        result_postaggs = set(["quantile_p95"])
        assert set(saved_metrics.keys()) == {"a_histogram"}
        assert set(post_aggs.keys()) == result_postaggs

        metrics = ["aCustomPostAgg"]
        saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs(
            metrics, metrics_dict)

        result_postaggs = set(["aCustomPostAgg"])
        assert set(saved_metrics.keys()) == {"aCustomMetric"}
        assert set(post_aggs.keys()) == result_postaggs
Exemple #48
0
 def test_get_filters_composes_multiple_filters(self):
     filtr1 = {'col': 'A', 'op': '!=', 'val': 'y'}
     filtr2 = {'col': 'B', 'op': 'in', 'val': ['a', 'b', 'c']}
     res = DruidDatasource.get_filters([filtr1, filtr2], [])
     self.assertEqual('and', res.filter['filter']['type'])
     self.assertEqual(2, len(res.filter['filter']['fields']))
 def test_run_query_single_groupby(self):
     client = Mock()
     from_dttm = Mock()
     to_dttm = Mock()
     from_dttm.replace = Mock(return_value=from_dttm)
     to_dttm.replace = Mock(return_value=to_dttm)
     from_dttm.isoformat = Mock(return_value="from")
     to_dttm.isoformat = Mock(return_value="to")
     timezone = "timezone"
     from_dttm.tzname = Mock(return_value=timezone)
     ds = DruidDatasource(datasource_name="datasource")
     metric1 = DruidMetric(metric_name="metric1")
     metric2 = DruidMetric(metric_name="metric2")
     ds.metrics = [metric1, metric2]
     col1 = DruidColumn(column_name="col1")
     col2 = DruidColumn(column_name="col2")
     ds.columns = [col1, col2]
     aggs = ["metric1"]
     post_aggs = ["some_agg"]
     ds._metrics_and_post_aggs = Mock(return_value=(aggs, post_aggs))
     groupby = ["col1"]
     metrics = ["metric1"]
     ds.get_having_filters = Mock(return_value=[])
     client.query_builder.last_query.query_dict = {"mock": 0}
     # client.topn is called twice
     ds.run_query(
         groupby,
         metrics,
         None,
         from_dttm,
         to_dttm,
         timeseries_limit=100,
         client=client,
         order_desc=True,
         filter=[],
     )
     self.assertEqual(2, len(client.topn.call_args_list))
     self.assertEqual(0, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     # check that there is no dimensions entry
     called_args_pre = client.topn.call_args_list[0][1]
     self.assertNotIn("dimensions", called_args_pre)
     self.assertIn("dimension", called_args_pre)
     called_args = client.topn.call_args_list[1][1]
     self.assertIn("dimension", called_args)
     self.assertEqual("col1", called_args["dimension"])
     # not order_desc
     client = Mock()
     client.query_builder.last_query.query_dict = {"mock": 0}
     ds.run_query(
         groupby,
         metrics,
         None,
         from_dttm,
         to_dttm,
         client=client,
         order_desc=False,
         filter=[],
         row_limit=100,
     )
     self.assertEqual(0, len(client.topn.call_args_list))
     self.assertEqual(1, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     self.assertIn("dimensions", client.groupby.call_args_list[0][1])
     self.assertEqual(["col1"],
                      client.groupby.call_args_list[0][1]["dimensions"])
     # order_desc but timeseries and dimension spec
     # calls topn with single dimension spec 'dimension'
     spec = {"outputName": "hello", "dimension": "matcho"}
     spec_json = json.dumps(spec)
     col3 = DruidColumn(column_name="col3", dimension_spec_json=spec_json)
     ds.columns.append(col3)
     groupby = ["col3"]
     client = Mock()
     client.query_builder.last_query.query_dict = {"mock": 0}
     ds.run_query(
         groupby,
         metrics,
         None,
         from_dttm,
         to_dttm,
         client=client,
         order_desc=True,
         timeseries_limit=5,
         filter=[],
         row_limit=100,
     )
     self.assertEqual(2, len(client.topn.call_args_list))
     self.assertEqual(0, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     self.assertIn("dimension", client.topn.call_args_list[0][1])
     self.assertIn("dimension", client.topn.call_args_list[1][1])
     # uses dimension for pre query and full spec for final query
     self.assertEqual("matcho",
                      client.topn.call_args_list[0][1]["dimension"])
     self.assertEqual(spec, client.topn.call_args_list[1][1]["dimension"])
 def test_get_filters_keeps_trailing_spaces(self):
     filtr = {"col": "A", "op": "in", "val": ["a "]}
     col = DruidColumn(column_name="A")
     column_dict = {"A": col}
     res = DruidDatasource.get_filters([filtr], [], column_dict)
     self.assertEqual("a ", res.filter["filter"]["value"])
 def test_get_filters_extracts_values_in_quotes(self):
     filtr = {"col": "A", "op": "in", "val": ['"a"']}
     col = DruidColumn(column_name="A")
     column_dict = {"A": col}
     res = DruidDatasource.get_filters([filtr], [], column_dict)
     self.assertEqual("a", res.filter["filter"]["value"])
 def test_get_filters_handles_none_for_string_types(self):
     filtr = {"col": "A", "op": "==", "val": None}
     col = DruidColumn(column_name="A")
     column_dict = {"A": col}
     res = DruidDatasource.get_filters([filtr], [], column_dict)
     self.assertIsNone(res)
Exemple #53
0
 def test_get_filters_ignores_in_not_in_with_empty_value(self):
     filtr1 = {'col': 'A', 'op': 'in', 'val': []}
     filtr2 = {'col': 'A', 'op': 'not in', 'val': []}
     res = DruidDatasource.get_filters([filtr1, filtr2], [])
     self.assertEqual(None, res)
 def test_import_druid_no_metadata(self):
     datasource = self.create_druid_datasource('pure_druid', id=10001)
     imported_id = DruidDatasource.import_obj(datasource, import_time=1989)
     imported = self.get_datasource(imported_id)
     self.assert_datasource_equals(datasource, imported)
 def test_get_filters_handles_none_for_string_types(self):
     filtr = {'col': 'A', 'op': '==', 'val': None}
     col = DruidColumn(column_name='A')
     column_dict = {'A': col}
     res = DruidDatasource.get_filters([filtr], [], column_dict)
     self.assertIsNone(res)
    def test_metrics_and_post_aggs(self):
        """
        Test generation of metrics and post-aggregations from an initial list
        of superset metrics (which may include the results of either). This
        primarily tests that specifying a post-aggregator metric will also
        require the raw aggregation of the associated druid metric column.
        """
        metrics_dict = {
            'unused_count':
            DruidMetric(
                metric_name='unused_count',
                verbose_name='COUNT(*)',
                metric_type='count',
                json=json.dumps({
                    'type': 'count',
                    'name': 'unused_count'
                }),
            ),
            'some_sum':
            DruidMetric(
                metric_name='some_sum',
                verbose_name='SUM(*)',
                metric_type='sum',
                json=json.dumps({
                    'type': 'sum',
                    'name': 'sum'
                }),
            ),
            'a_histogram':
            DruidMetric(
                metric_name='a_histogram',
                verbose_name='APPROXIMATE_HISTOGRAM(*)',
                metric_type='approxHistogramFold',
                json=json.dumps(
                    {
                        'type': 'approxHistogramFold',
                        'name': 'a_histogram'
                    }, ),
            ),
            'aCustomMetric':
            DruidMetric(
                metric_name='aCustomMetric',
                verbose_name='MY_AWESOME_METRIC(*)',
                metric_type='aCustomType',
                json=json.dumps(
                    {
                        'type': 'customMetric',
                        'name': 'aCustomMetric'
                    }, ),
            ),
            'quantile_p95':
            DruidMetric(
                metric_name='quantile_p95',
                verbose_name='P95(*)',
                metric_type='postagg',
                json=json.dumps({
                    'type': 'quantile',
                    'probability': 0.95,
                    'name': 'p95',
                    'fieldName': 'a_histogram',
                }),
            ),
            'aCustomPostAgg':
            DruidMetric(
                metric_name='aCustomPostAgg',
                verbose_name='CUSTOM_POST_AGG(*)',
                metric_type='postagg',
                json=json.dumps({
                    'type': 'customPostAgg',
                    'name': 'aCustomPostAgg',
                    'field': {
                        'type': 'fieldAccess',
                        'fieldName': 'aCustomMetric',
                    },
                }),
            ),
        }

        adhoc_metric = {
            'expressionType': 'SIMPLE',
            'column': {
                'type': 'DOUBLE',
                'column_name': 'value'
            },
            'aggregate': 'SUM',
            'label': 'My Adhoc Metric',
        }

        metrics = ['some_sum']
        saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs(
            metrics, metrics_dict)

        assert set(saved_metrics.keys()) == {'some_sum'}
        assert post_aggs == {}

        metrics = [adhoc_metric]
        saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs(
            metrics, metrics_dict)

        assert set(saved_metrics.keys()) == set([adhoc_metric['label']])
        assert post_aggs == {}

        metrics = ['some_sum', adhoc_metric]
        saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs(
            metrics, metrics_dict)

        assert set(saved_metrics.keys()) == {'some_sum', adhoc_metric['label']}
        assert post_aggs == {}

        metrics = ['quantile_p95']
        saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs(
            metrics, metrics_dict)

        result_postaggs = set(['quantile_p95'])
        assert set(saved_metrics.keys()) == {'a_histogram'}
        assert set(post_aggs.keys()) == result_postaggs

        metrics = ['aCustomPostAgg']
        saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs(
            metrics, metrics_dict)

        result_postaggs = set(['aCustomPostAgg'])
        assert set(saved_metrics.keys()) == {'aCustomMetric'}
        assert set(post_aggs.keys()) == result_postaggs
 def test_run_query_single_groupby(self):
     client = Mock()
     from_dttm = Mock()
     to_dttm = Mock()
     from_dttm.replace = Mock(return_value=from_dttm)
     to_dttm.replace = Mock(return_value=to_dttm)
     from_dttm.isoformat = Mock(return_value='from')
     to_dttm.isoformat = Mock(return_value='to')
     timezone = 'timezone'
     from_dttm.tzname = Mock(return_value=timezone)
     ds = DruidDatasource(datasource_name='datasource')
     metric1 = DruidMetric(metric_name='metric1')
     metric2 = DruidMetric(metric_name='metric2')
     ds.metrics = [metric1, metric2]
     col1 = DruidColumn(column_name='col1')
     col2 = DruidColumn(column_name='col2')
     ds.columns = [col1, col2]
     aggs = ['metric1']
     post_aggs = ['some_agg']
     ds._metrics_and_post_aggs = Mock(return_value=(aggs, post_aggs))
     groupby = ['col1']
     metrics = ['metric1']
     ds.get_having_filters = Mock(return_value=[])
     client.query_builder.last_query.query_dict = {'mock': 0}
     # client.topn is called twice
     ds.run_query(
         groupby,
         metrics,
         None,
         from_dttm,
         to_dttm,
         timeseries_limit=100,
         client=client,
         order_desc=True,
         filter=[],
     )
     self.assertEqual(2, len(client.topn.call_args_list))
     self.assertEqual(0, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     # check that there is no dimensions entry
     called_args_pre = client.topn.call_args_list[0][1]
     self.assertNotIn('dimensions', called_args_pre)
     self.assertIn('dimension', called_args_pre)
     called_args = client.topn.call_args_list[1][1]
     self.assertIn('dimension', called_args)
     self.assertEqual('col1', called_args['dimension'])
     # not order_desc
     client = Mock()
     client.query_builder.last_query.query_dict = {'mock': 0}
     ds.run_query(
         groupby,
         metrics,
         None,
         from_dttm,
         to_dttm,
         client=client,
         order_desc=False,
         filter=[],
         row_limit=100,
     )
     self.assertEqual(0, len(client.topn.call_args_list))
     self.assertEqual(1, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     self.assertIn('dimensions', client.groupby.call_args_list[0][1])
     self.assertEqual(['col1'],
                      client.groupby.call_args_list[0][1]['dimensions'])
     # order_desc but timeseries and dimension spec
     # calls topn with single dimension spec 'dimension'
     spec = {'outputName': 'hello', 'dimension': 'matcho'}
     spec_json = json.dumps(spec)
     col3 = DruidColumn(column_name='col3', dimension_spec_json=spec_json)
     ds.columns.append(col3)
     groupby = ['col3']
     client = Mock()
     client.query_builder.last_query.query_dict = {'mock': 0}
     ds.run_query(
         groupby,
         metrics,
         None,
         from_dttm,
         to_dttm,
         client=client,
         order_desc=True,
         timeseries_limit=5,
         filter=[],
         row_limit=100,
     )
     self.assertEqual(2, len(client.topn.call_args_list))
     self.assertEqual(0, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     self.assertIn('dimension', client.topn.call_args_list[0][1])
     self.assertIn('dimension', client.topn.call_args_list[1][1])
     # uses dimension for pre query and full spec for final query
     self.assertEqual('matcho',
                      client.topn.call_args_list[0][1]['dimension'])
     self.assertEqual(spec, client.topn.call_args_list[1][1]['dimension'])
 def test_get_filters_extracts_values_in_quotes(self):
     filtr = {'col': 'A', 'op': 'in', 'val': ['  "a" ']}
     col = DruidColumn(column_name='A')
     column_dict = {'A': col}
     res = DruidDatasource.get_filters([filtr], [], column_dict)
     self.assertEqual('a', res.filter['filter']['value'])
    def test_run_query_order_by_metrics(self):
        client = Mock()
        client.query_builder.last_query.query_dict = {"mock": 0}
        from_dttm = Mock()
        to_dttm = Mock()
        ds = DruidDatasource(datasource_name="datasource")
        ds.get_having_filters = Mock(return_value=[])
        dim1 = DruidColumn(column_name="dim1")
        dim2 = DruidColumn(column_name="dim2")
        metrics_dict = {
            "count1":
            DruidMetric(
                metric_name="count1",
                metric_type="count",
                json=json.dumps({
                    "type": "count",
                    "name": "count1"
                }),
            ),
            "sum1":
            DruidMetric(
                metric_name="sum1",
                metric_type="doubleSum",
                json=json.dumps({
                    "type": "doubleSum",
                    "name": "sum1"
                }),
            ),
            "sum2":
            DruidMetric(
                metric_name="sum2",
                metric_type="doubleSum",
                json=json.dumps({
                    "type": "doubleSum",
                    "name": "sum2"
                }),
            ),
            "div1":
            DruidMetric(
                metric_name="div1",
                metric_type="postagg",
                json=json.dumps({
                    "fn":
                    "/",
                    "type":
                    "arithmetic",
                    "name":
                    "div1",
                    "fields": [
                        {
                            "fieldName": "sum1",
                            "type": "fieldAccess"
                        },
                        {
                            "fieldName": "sum2",
                            "type": "fieldAccess"
                        },
                    ],
                }),
            ),
        }
        ds.columns = [dim1, dim2]
        ds.metrics = list(metrics_dict.values())

        groupby = ["dim1"]
        metrics = ["count1"]
        granularity = "all"
        # get the counts of the top 5 'dim1's, order by 'sum1'
        ds.run_query(
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            timeseries_limit=5,
            timeseries_limit_metric="sum1",
            client=client,
            order_desc=True,
            filter=[],
        )
        qry_obj = client.topn.call_args_list[0][1]
        self.assertEqual("dim1", qry_obj["dimension"])
        self.assertEqual("sum1", qry_obj["metric"])
        aggregations = qry_obj["aggregations"]
        post_aggregations = qry_obj["post_aggregations"]
        self.assertEqual({"count1", "sum1"}, set(aggregations.keys()))
        self.assertEqual(set(), set(post_aggregations.keys()))

        # get the counts of the top 5 'dim1's, order by 'div1'
        ds.run_query(
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            timeseries_limit=5,
            timeseries_limit_metric="div1",
            client=client,
            order_desc=True,
            filter=[],
        )
        qry_obj = client.topn.call_args_list[1][1]
        self.assertEqual("dim1", qry_obj["dimension"])
        self.assertEqual("div1", qry_obj["metric"])
        aggregations = qry_obj["aggregations"]
        post_aggregations = qry_obj["post_aggregations"]
        self.assertEqual({"count1", "sum1", "sum2"}, set(aggregations.keys()))
        self.assertEqual({"div1"}, set(post_aggregations.keys()))

        groupby = ["dim1", "dim2"]
        # get the counts of the top 5 ['dim1', 'dim2']s, order by 'sum1'
        ds.run_query(
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            timeseries_limit=5,
            timeseries_limit_metric="sum1",
            client=client,
            order_desc=True,
            filter=[],
        )
        qry_obj = client.groupby.call_args_list[0][1]
        self.assertEqual({"dim1", "dim2"}, set(qry_obj["dimensions"]))
        self.assertEqual("sum1",
                         qry_obj["limit_spec"]["columns"][0]["dimension"])
        aggregations = qry_obj["aggregations"]
        post_aggregations = qry_obj["post_aggregations"]
        self.assertEqual({"count1", "sum1"}, set(aggregations.keys()))
        self.assertEqual(set(), set(post_aggregations.keys()))

        # get the counts of the top 5 ['dim1', 'dim2']s, order by 'div1'
        ds.run_query(
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            timeseries_limit=5,
            timeseries_limit_metric="div1",
            client=client,
            order_desc=True,
            filter=[],
        )
        qry_obj = client.groupby.call_args_list[1][1]
        self.assertEqual({"dim1", "dim2"}, set(qry_obj["dimensions"]))
        self.assertEqual("div1",
                         qry_obj["limit_spec"]["columns"][0]["dimension"])
        aggregations = qry_obj["aggregations"]
        post_aggregations = qry_obj["post_aggregations"]
        self.assertEqual({"count1", "sum1", "sum2"}, set(aggregations.keys()))
        self.assertEqual({"div1"}, set(post_aggregations.keys()))
    def test_run_query_order_by_metrics(self):
        client = Mock()
        client.query_builder.last_query.query_dict = {'mock': 0}
        from_dttm = Mock()
        to_dttm = Mock()
        ds = DruidDatasource(datasource_name='datasource')
        ds.get_having_filters = Mock(return_value=[])
        dim1 = DruidColumn(column_name='dim1')
        dim2 = DruidColumn(column_name='dim2')
        metrics_dict = {
            'count1':
            DruidMetric(
                metric_name='count1',
                metric_type='count',
                json=json.dumps({
                    'type': 'count',
                    'name': 'count1'
                }),
            ),
            'sum1':
            DruidMetric(
                metric_name='sum1',
                metric_type='doubleSum',
                json=json.dumps({
                    'type': 'doubleSum',
                    'name': 'sum1'
                }),
            ),
            'sum2':
            DruidMetric(
                metric_name='sum2',
                metric_type='doubleSum',
                json=json.dumps({
                    'type': 'doubleSum',
                    'name': 'sum2'
                }),
            ),
            'div1':
            DruidMetric(
                metric_name='div1',
                metric_type='postagg',
                json=json.dumps({
                    'fn':
                    '/',
                    'type':
                    'arithmetic',
                    'name':
                    'div1',
                    'fields': [
                        {
                            'fieldName': 'sum1',
                            'type': 'fieldAccess',
                        },
                        {
                            'fieldName': 'sum2',
                            'type': 'fieldAccess',
                        },
                    ],
                }),
            ),
        }
        ds.columns = [dim1, dim2]
        ds.metrics = list(metrics_dict.values())

        groupby = ['dim1']
        metrics = ['count1']
        granularity = 'all'
        # get the counts of the top 5 'dim1's, order by 'sum1'
        ds.run_query(
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            timeseries_limit=5,
            timeseries_limit_metric='sum1',
            client=client,
            order_desc=True,
            filter=[],
        )
        qry_obj = client.topn.call_args_list[0][1]
        self.assertEqual('dim1', qry_obj['dimension'])
        self.assertEqual('sum1', qry_obj['metric'])
        aggregations = qry_obj['aggregations']
        post_aggregations = qry_obj['post_aggregations']
        self.assertEqual({'count1', 'sum1'}, set(aggregations.keys()))
        self.assertEqual(set(), set(post_aggregations.keys()))

        # get the counts of the top 5 'dim1's, order by 'div1'
        ds.run_query(
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            timeseries_limit=5,
            timeseries_limit_metric='div1',
            client=client,
            order_desc=True,
            filter=[],
        )
        qry_obj = client.topn.call_args_list[1][1]
        self.assertEqual('dim1', qry_obj['dimension'])
        self.assertEqual('div1', qry_obj['metric'])
        aggregations = qry_obj['aggregations']
        post_aggregations = qry_obj['post_aggregations']
        self.assertEqual({'count1', 'sum1', 'sum2'}, set(aggregations.keys()))
        self.assertEqual({'div1'}, set(post_aggregations.keys()))

        groupby = ['dim1', 'dim2']
        # get the counts of the top 5 ['dim1', 'dim2']s, order by 'sum1'
        ds.run_query(
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            timeseries_limit=5,
            timeseries_limit_metric='sum1',
            client=client,
            order_desc=True,
            filter=[],
        )
        qry_obj = client.groupby.call_args_list[0][1]
        self.assertEqual({'dim1', 'dim2'}, set(qry_obj['dimensions']))
        self.assertEqual('sum1',
                         qry_obj['limit_spec']['columns'][0]['dimension'])
        aggregations = qry_obj['aggregations']
        post_aggregations = qry_obj['post_aggregations']
        self.assertEqual({'count1', 'sum1'}, set(aggregations.keys()))
        self.assertEqual(set(), set(post_aggregations.keys()))

        # get the counts of the top 5 ['dim1', 'dim2']s, order by 'div1'
        ds.run_query(
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            timeseries_limit=5,
            timeseries_limit_metric='div1',
            client=client,
            order_desc=True,
            filter=[],
        )
        qry_obj = client.groupby.call_args_list[1][1]
        self.assertEqual({'dim1', 'dim2'}, set(qry_obj['dimensions']))
        self.assertEqual('div1',
                         qry_obj['limit_spec']['columns'][0]['dimension'])
        aggregations = qry_obj['aggregations']
        post_aggregations = qry_obj['post_aggregations']
        self.assertEqual({'count1', 'sum1', 'sum2'}, set(aggregations.keys()))
        self.assertEqual({'div1'}, set(post_aggregations.keys()))