Beispiel #1
0
 def test_get_filters_handles_none_for_string_types(self):
     filtr = {"col": "A", "op": "==", "val": None}
     col = DruidColumn(column_name="A")
     column_dict = {"A": col}
     res = DruidDatasource.get_filters([filtr], [], column_dict)
     self.assertIsNone(res)
Beispiel #2
0
 def test_get_filters_extracts_values_in_quotes(self):
     filtr = {"col": "A", "op": "in", "val": ['"a"']}
     col = DruidColumn(column_name="A")
     column_dict = {"A": col}
     res = DruidDatasource.get_filters([filtr], [], column_dict)
     self.assertEqual("a", res.filter["filter"]["value"])
Beispiel #3
0
 def test_run_query_single_groupby(self):
     client = Mock()
     from_dttm = Mock()
     to_dttm = Mock()
     from_dttm.replace = Mock(return_value=from_dttm)
     to_dttm.replace = Mock(return_value=to_dttm)
     from_dttm.isoformat = Mock(return_value="from")
     to_dttm.isoformat = Mock(return_value="to")
     timezone = "timezone"
     from_dttm.tzname = Mock(return_value=timezone)
     ds = DruidDatasource(datasource_name="datasource")
     metric1 = DruidMetric(metric_name="metric1")
     metric2 = DruidMetric(metric_name="metric2")
     ds.metrics = [metric1, metric2]
     col1 = DruidColumn(column_name="col1")
     col2 = DruidColumn(column_name="col2")
     ds.columns = [col1, col2]
     aggs = ["metric1"]
     post_aggs = ["some_agg"]
     ds._metrics_and_post_aggs = Mock(return_value=(aggs, post_aggs))
     groupby = ["col1"]
     metrics = ["metric1"]
     ds.get_having_filters = Mock(return_value=[])
     client.query_builder.last_query.query_dict = {"mock": 0}
     # client.topn is called twice
     ds.run_query(
         metrics,
         None,
         from_dttm,
         to_dttm,
         groupby=groupby,
         timeseries_limit=100,
         client=client,
         order_desc=True,
         filter=[],
     )
     self.assertEqual(2, len(client.topn.call_args_list))
     self.assertEqual(0, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     # check that there is no dimensions entry
     called_args_pre = client.topn.call_args_list[0][1]
     self.assertNotIn("dimensions", called_args_pre)
     self.assertIn("dimension", called_args_pre)
     called_args = client.topn.call_args_list[1][1]
     self.assertIn("dimension", called_args)
     self.assertEqual("col1", called_args["dimension"])
     # not order_desc
     client = Mock()
     client.query_builder.last_query.query_dict = {"mock": 0}
     ds.run_query(
         metrics,
         None,
         from_dttm,
         to_dttm,
         groupby=groupby,
         client=client,
         order_desc=False,
         filter=[],
         row_limit=100,
     )
     self.assertEqual(0, len(client.topn.call_args_list))
     self.assertEqual(1, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     self.assertIn("dimensions", client.groupby.call_args_list[0][1])
     self.assertEqual(["col1"],
                      client.groupby.call_args_list[0][1]["dimensions"])
     # order_desc but timeseries and dimension spec
     # calls topn with single dimension spec 'dimension'
     spec = {"outputName": "hello", "dimension": "matcho"}
     spec_json = json.dumps(spec)
     col3 = DruidColumn(column_name="col3", dimension_spec_json=spec_json)
     ds.columns.append(col3)
     groupby = ["col3"]
     client = Mock()
     client.query_builder.last_query.query_dict = {"mock": 0}
     ds.run_query(
         metrics,
         None,
         from_dttm,
         to_dttm,
         groupby=groupby,
         client=client,
         order_desc=True,
         timeseries_limit=5,
         filter=[],
         row_limit=100,
     )
     self.assertEqual(2, len(client.topn.call_args_list))
     self.assertEqual(0, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     self.assertIn("dimension", client.topn.call_args_list[0][1])
     self.assertIn("dimension", client.topn.call_args_list[1][1])
     # uses dimension for pre query and full spec for final query
     self.assertEqual("matcho",
                      client.topn.call_args_list[0][1]["dimension"])
     self.assertEqual(spec, client.topn.call_args_list[1][1]["dimension"])
Beispiel #4
0
    def test_run_query_order_by_metrics(self):
        client = Mock()
        client.query_builder.last_query.query_dict = {"mock": 0}
        from_dttm = Mock()
        to_dttm = Mock()
        ds = DruidDatasource(datasource_name="datasource")
        ds.get_having_filters = Mock(return_value=[])
        dim1 = DruidColumn(column_name="dim1")
        dim2 = DruidColumn(column_name="dim2")
        metrics_dict = {
            "count1":
            DruidMetric(
                metric_name="count1",
                metric_type="count",
                json=json.dumps({
                    "type": "count",
                    "name": "count1"
                }),
            ),
            "sum1":
            DruidMetric(
                metric_name="sum1",
                metric_type="doubleSum",
                json=json.dumps({
                    "type": "doubleSum",
                    "name": "sum1"
                }),
            ),
            "sum2":
            DruidMetric(
                metric_name="sum2",
                metric_type="doubleSum",
                json=json.dumps({
                    "type": "doubleSum",
                    "name": "sum2"
                }),
            ),
            "div1":
            DruidMetric(
                metric_name="div1",
                metric_type="postagg",
                json=json.dumps({
                    "fn":
                    "/",
                    "type":
                    "arithmetic",
                    "name":
                    "div1",
                    "fields": [
                        {
                            "fieldName": "sum1",
                            "type": "fieldAccess"
                        },
                        {
                            "fieldName": "sum2",
                            "type": "fieldAccess"
                        },
                    ],
                }),
            ),
        }
        ds.columns = [dim1, dim2]
        ds.metrics = list(metrics_dict.values())

        columns = ["dim1"]
        metrics = ["count1"]
        granularity = "all"
        # get the counts of the top 5 'dim1's, order by 'sum1'
        ds.run_query(
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            groupby=columns,
            timeseries_limit=5,
            timeseries_limit_metric="sum1",
            client=client,
            order_desc=True,
            filter=[],
        )
        qry_obj = client.topn.call_args_list[0][1]
        self.assertEqual("dim1", qry_obj["dimension"])
        self.assertEqual("sum1", qry_obj["metric"])
        aggregations = qry_obj["aggregations"]
        post_aggregations = qry_obj["post_aggregations"]
        self.assertEqual({"count1", "sum1"}, set(aggregations.keys()))
        self.assertEqual(set(), set(post_aggregations.keys()))

        # get the counts of the top 5 'dim1's, order by 'div1'
        ds.run_query(
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            groupby=columns,
            timeseries_limit=5,
            timeseries_limit_metric="div1",
            client=client,
            order_desc=True,
            filter=[],
        )
        qry_obj = client.topn.call_args_list[1][1]
        self.assertEqual("dim1", qry_obj["dimension"])
        self.assertEqual("div1", qry_obj["metric"])
        aggregations = qry_obj["aggregations"]
        post_aggregations = qry_obj["post_aggregations"]
        self.assertEqual({"count1", "sum1", "sum2"}, set(aggregations.keys()))
        self.assertEqual({"div1"}, set(post_aggregations.keys()))

        columns = ["dim1", "dim2"]
        # get the counts of the top 5 ['dim1', 'dim2']s, order by 'sum1'
        ds.run_query(
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            groupby=columns,
            timeseries_limit=5,
            timeseries_limit_metric="sum1",
            client=client,
            order_desc=True,
            filter=[],
        )
        qry_obj = client.groupby.call_args_list[0][1]
        self.assertEqual({"dim1", "dim2"}, set(qry_obj["dimensions"]))
        self.assertEqual("sum1",
                         qry_obj["limit_spec"]["columns"][0]["dimension"])
        aggregations = qry_obj["aggregations"]
        post_aggregations = qry_obj["post_aggregations"]
        self.assertEqual({"count1", "sum1"}, set(aggregations.keys()))
        self.assertEqual(set(), set(post_aggregations.keys()))

        # get the counts of the top 5 ['dim1', 'dim2']s, order by 'div1'
        ds.run_query(
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            groupby=columns,
            timeseries_limit=5,
            timeseries_limit_metric="div1",
            client=client,
            order_desc=True,
            filter=[],
        )
        qry_obj = client.groupby.call_args_list[1][1]
        self.assertEqual({"dim1", "dim2"}, set(qry_obj["dimensions"]))
        self.assertEqual("div1",
                         qry_obj["limit_spec"]["columns"][0]["dimension"])
        aggregations = qry_obj["aggregations"]
        post_aggregations = qry_obj["post_aggregations"]
        self.assertEqual({"count1", "sum1", "sum2"}, set(aggregations.keys()))
        self.assertEqual({"div1"}, set(post_aggregations.keys()))
Beispiel #5
0
 def test_get_filters_keeps_trailing_spaces(self):
     filtr = {"col": "A", "op": "in", "val": ["a "]}
     col = DruidColumn(column_name="A")
     column_dict = {"A": col}
     res = DruidDatasource.get_filters([filtr], [], column_dict)
     self.assertEqual("a ", res.filter["filter"]["value"])
 def test_run_query_single_groupby(self):
     client = Mock()
     from_dttm = Mock()
     to_dttm = Mock()
     from_dttm.replace = Mock(return_value=from_dttm)
     to_dttm.replace = Mock(return_value=to_dttm)
     from_dttm.isoformat = Mock(return_value='from')
     to_dttm.isoformat = Mock(return_value='to')
     timezone = 'timezone'
     from_dttm.tzname = Mock(return_value=timezone)
     ds = DruidDatasource(datasource_name='datasource')
     metric1 = DruidMetric(metric_name='metric1')
     metric2 = DruidMetric(metric_name='metric2')
     ds.metrics = [metric1, metric2]
     col1 = DruidColumn(column_name='col1')
     col2 = DruidColumn(column_name='col2')
     ds.columns = [col1, col2]
     all_metrics = ['metric1']
     post_aggs = ['some_agg']
     ds._metrics_and_post_aggs = Mock(return_value=(all_metrics, post_aggs))
     groupby = ['col1']
     metrics = ['metric1']
     ds.get_having_filters = Mock(return_value=[])
     client.query_builder.last_query.query_dict = {'mock': 0}
     # client.topn is called twice
     ds.run_query(
         groupby, metrics, None, from_dttm, to_dttm, timeseries_limit=100,
         client=client, order_desc=True, filter=[],
     )
     self.assertEqual(2, len(client.topn.call_args_list))
     self.assertEqual(0, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     # check that there is no dimensions entry
     called_args_pre = client.topn.call_args_list[0][1]
     self.assertNotIn('dimensions', called_args_pre)
     self.assertIn('dimension', called_args_pre)
     called_args = client.topn.call_args_list[1][1]
     self.assertIn('dimension', called_args)
     self.assertEqual('col1', called_args['dimension'])
     # not order_desc
     client = Mock()
     client.query_builder.last_query.query_dict = {'mock': 0}
     ds.run_query(
         groupby, metrics, None, from_dttm, to_dttm, client=client,
         order_desc=False, filter=[], row_limit=100,
     )
     self.assertEqual(0, len(client.topn.call_args_list))
     self.assertEqual(1, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     self.assertIn('dimensions', client.groupby.call_args_list[0][1])
     self.assertEqual(['col1'], client.groupby.call_args_list[0][1]['dimensions'])
     # order_desc but timeseries and dimension spec
     # calls topn with single dimension spec 'dimension'
     spec = {'outputName': 'hello', 'dimension': 'matcho'}
     spec_json = json.dumps(spec)
     col3 = DruidColumn(column_name='col3', dimension_spec_json=spec_json)
     ds.columns.append(col3)
     groupby = ['col3']
     client = Mock()
     client.query_builder.last_query.query_dict = {'mock': 0}
     ds.run_query(
         groupby, metrics, None, from_dttm, to_dttm,
         client=client, order_desc=True, timeseries_limit=5,
         filter=[], row_limit=100,
     )
     self.assertEqual(2, len(client.topn.call_args_list))
     self.assertEqual(0, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     self.assertIn('dimension', client.topn.call_args_list[0][1])
     self.assertIn('dimension', client.topn.call_args_list[1][1])
     # uses dimension for pre query and full spec for final query
     self.assertEqual('matcho', client.topn.call_args_list[0][1]['dimension'])
     self.assertEqual(spec, client.topn.call_args_list[1][1]['dimension'])
    def test_run_query_order_by_metrics(self):
        client = Mock()
        client.query_builder.last_query.query_dict = {'mock': 0}
        from_dttm = Mock()
        to_dttm = Mock()
        ds = DruidDatasource(datasource_name='datasource')
        ds.get_having_filters = Mock(return_value=[])
        dim1 = DruidColumn(column_name='dim1')
        dim2 = DruidColumn(column_name='dim2')
        metrics_dict = {
            'count1':
            DruidMetric(
                metric_name='count1',
                metric_type='count',
                json=json.dumps({
                    'type': 'count',
                    'name': 'count1'
                }),
            ),
            'sum1':
            DruidMetric(
                metric_name='sum1',
                metric_type='doubleSum',
                json=json.dumps({
                    'type': 'doubleSum',
                    'name': 'sum1'
                }),
            ),
            'sum2':
            DruidMetric(
                metric_name='sum2',
                metric_type='doubleSum',
                json=json.dumps({
                    'type': 'doubleSum',
                    'name': 'sum2'
                }),
            ),
            'div1':
            DruidMetric(
                metric_name='div1',
                metric_type='postagg',
                json=json.dumps({
                    'fn':
                    '/',
                    'type':
                    'arithmetic',
                    'name':
                    'div1',
                    'fields': [
                        {
                            'fieldName': 'sum1',
                            'type': 'fieldAccess',
                        },
                        {
                            'fieldName': 'sum2',
                            'type': 'fieldAccess',
                        },
                    ],
                }),
            ),
        }
        ds.columns = [dim1, dim2]
        ds.metrics = list(metrics_dict.values())

        groupby = ['dim1']
        metrics = ['count1']
        granularity = 'all'
        # get the counts of the top 5 'dim1's, order by 'sum1'
        ds.run_query(
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            timeseries_limit=5,
            timeseries_limit_metric='sum1',
            client=client,
            order_desc=True,
            filter=[],
        )
        qry_obj = client.topn.call_args_list[0][1]
        self.assertEqual('dim1', qry_obj['dimension'])
        self.assertEqual('sum1', qry_obj['metric'])
        aggregations = qry_obj['aggregations']
        post_aggregations = qry_obj['post_aggregations']
        self.assertEqual({'count1', 'sum1'}, set(aggregations.keys()))
        self.assertEqual(set(), set(post_aggregations.keys()))

        # get the counts of the top 5 'dim1's, order by 'div1'
        ds.run_query(
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            timeseries_limit=5,
            timeseries_limit_metric='div1',
            client=client,
            order_desc=True,
            filter=[],
        )
        qry_obj = client.topn.call_args_list[1][1]
        self.assertEqual('dim1', qry_obj['dimension'])
        self.assertEqual('div1', qry_obj['metric'])
        aggregations = qry_obj['aggregations']
        post_aggregations = qry_obj['post_aggregations']
        self.assertEqual({'count1', 'sum1', 'sum2'}, set(aggregations.keys()))
        self.assertEqual({'div1'}, set(post_aggregations.keys()))

        groupby = ['dim1', 'dim2']
        # get the counts of the top 5 ['dim1', 'dim2']s, order by 'sum1'
        ds.run_query(
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            timeseries_limit=5,
            timeseries_limit_metric='sum1',
            client=client,
            order_desc=True,
            filter=[],
        )
        qry_obj = client.groupby.call_args_list[0][1]
        self.assertEqual({'dim1', 'dim2'}, set(qry_obj['dimensions']))
        self.assertEqual('sum1',
                         qry_obj['limit_spec']['columns'][0]['dimension'])
        aggregations = qry_obj['aggregations']
        post_aggregations = qry_obj['post_aggregations']
        self.assertEqual({'count1', 'sum1'}, set(aggregations.keys()))
        self.assertEqual(set(), set(post_aggregations.keys()))

        # get the counts of the top 5 ['dim1', 'dim2']s, order by 'div1'
        ds.run_query(
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            timeseries_limit=5,
            timeseries_limit_metric='div1',
            client=client,
            order_desc=True,
            filter=[],
        )
        qry_obj = client.groupby.call_args_list[1][1]
        self.assertEqual({'dim1', 'dim2'}, set(qry_obj['dimensions']))
        self.assertEqual('div1',
                         qry_obj['limit_spec']['columns'][0]['dimension'])
        aggregations = qry_obj['aggregations']
        post_aggregations = qry_obj['post_aggregations']
        self.assertEqual({'count1', 'sum1', 'sum2'}, set(aggregations.keys()))
        self.assertEqual({'div1'}, set(post_aggregations.keys()))
 def test_get_filters_extracts_values_in_quotes(self):
     filtr = {'col': 'A', 'op': 'in', 'val': ['  "a" ']}
     col = DruidColumn(column_name='A')
     column_dict = {'A': col}
     res = DruidDatasource.get_filters([filtr], [], column_dict)
     self.assertEqual('a', res.filter['filter']['value'])
 def test_get_filters_handles_none_for_string_types(self):
     filtr = {'col': 'A', 'op': '==', 'val': None}
     col = DruidColumn(column_name='A')
     column_dict = {'A': col}
     res = DruidDatasource.get_filters([filtr], [], column_dict)
     self.assertIsNone(res)