def test_run_query_with_adhoc_metric(self):
        client = Mock()
        from_dttm = Mock()
        to_dttm = Mock()
        from_dttm.replace = Mock(return_value=from_dttm)
        to_dttm.replace = Mock(return_value=to_dttm)
        from_dttm.isoformat = Mock(return_value='from')
        to_dttm.isoformat = Mock(return_value='to')
        timezone = 'timezone'
        from_dttm.tzname = Mock(return_value=timezone)
        ds = DruidDatasource(datasource_name='datasource')
        metric1 = DruidMetric(metric_name='metric1')
        metric2 = DruidMetric(metric_name='metric2')
        ds.metrics = [metric1, metric2]
        col1 = DruidColumn(column_name='col1')
        col2 = DruidColumn(column_name='col2')
        ds.columns = [col1, col2]
        all_metrics = []
        post_aggs = ['some_agg']
        ds._metrics_and_post_aggs = Mock(return_value=(all_metrics, post_aggs))
        groupby = []
        metrics = [{
            'expressionType': 'SIMPLE',
            'column': {
                'type': 'DOUBLE',
                'column_name': 'col1'
            },
            'aggregate': 'SUM',
            'label': 'My Adhoc Metric',
        }]

        ds.get_having_filters = Mock(return_value=[])
        client.query_builder = Mock()
        client.query_builder.last_query = Mock()
        client.query_builder.last_query.query_dict = {'mock': 0}
        # no groupby calls client.timeseries
        ds.run_query(
            groupby,
            metrics,
            None,
            from_dttm,
            to_dttm,
            client=client,
            filter=[],
            row_limit=100,
        )
        self.assertEqual(0, len(client.topn.call_args_list))
        self.assertEqual(0, len(client.groupby.call_args_list))
        self.assertEqual(1, len(client.timeseries.call_args_list))
        # check that there is no dimensions entry
        called_args = client.timeseries.call_args_list[0][1]
        self.assertNotIn('dimensions', called_args)
        self.assertIn('post_aggregations', called_args)
    def test_run_query_with_adhoc_metric(self):
        client = Mock()
        from_dttm = Mock()
        to_dttm = Mock()
        from_dttm.replace = Mock(return_value=from_dttm)
        to_dttm.replace = Mock(return_value=to_dttm)
        from_dttm.isoformat = Mock(return_value="from")
        to_dttm.isoformat = Mock(return_value="to")
        timezone = "timezone"
        from_dttm.tzname = Mock(return_value=timezone)
        ds = DruidDatasource(datasource_name="datasource")
        metric1 = DruidMetric(metric_name="metric1")
        metric2 = DruidMetric(metric_name="metric2")
        ds.metrics = [metric1, metric2]
        col1 = DruidColumn(column_name="col1")
        col2 = DruidColumn(column_name="col2")
        ds.columns = [col1, col2]
        all_metrics = []
        post_aggs = ["some_agg"]
        ds._metrics_and_post_aggs = Mock(return_value=(all_metrics, post_aggs))
        groupby = []
        metrics = [{
            "expressionType": "SIMPLE",
            "column": {
                "type": "DOUBLE",
                "column_name": "col1"
            },
            "aggregate": "SUM",
            "label": "My Adhoc Metric",
        }]

        ds.get_having_filters = Mock(return_value=[])
        client.query_builder = Mock()
        client.query_builder.last_query = Mock()
        client.query_builder.last_query.query_dict = {"mock": 0}
        # no groupby calls client.timeseries
        ds.run_query(
            groupby,
            metrics,
            None,
            from_dttm,
            to_dttm,
            client=client,
            filter=[],
            row_limit=100,
        )
        self.assertEqual(0, len(client.topn.call_args_list))
        self.assertEqual(0, len(client.groupby.call_args_list))
        self.assertEqual(1, len(client.timeseries.call_args_list))
        # check that there is no dimensions entry
        called_args = client.timeseries.call_args_list[0][1]
        self.assertNotIn("dimensions", called_args)
        self.assertIn("post_aggregations", called_args)
 def test_run_query_multiple_groupby(self):
     client = Mock()
     from_dttm = Mock()
     to_dttm = Mock()
     from_dttm.replace = Mock(return_value=from_dttm)
     to_dttm.replace = Mock(return_value=to_dttm)
     from_dttm.isoformat = Mock(return_value="from")
     to_dttm.isoformat = Mock(return_value="to")
     timezone = "timezone"
     from_dttm.tzname = Mock(return_value=timezone)
     ds = DruidDatasource(datasource_name="datasource")
     metric1 = DruidMetric(metric_name="metric1")
     metric2 = DruidMetric(metric_name="metric2")
     ds.metrics = [metric1, metric2]
     col1 = DruidColumn(column_name="col1")
     col2 = DruidColumn(column_name="col2")
     ds.columns = [col1, col2]
     aggs = []
     post_aggs = ["some_agg"]
     ds._metrics_and_post_aggs = Mock(return_value=(aggs, post_aggs))
     groupby = ["col1", "col2"]
     metrics = ["metric1"]
     ds.get_having_filters = Mock(return_value=[])
     client.query_builder = Mock()
     client.query_builder.last_query = Mock()
     client.query_builder.last_query.query_dict = {"mock": 0}
     # no groupby calls client.timeseries
     ds.run_query(
         groupby,
         metrics,
         None,
         from_dttm,
         to_dttm,
         client=client,
         row_limit=100,
         filter=[],
     )
     self.assertEqual(0, len(client.topn.call_args_list))
     self.assertEqual(1, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     # check that there is no dimensions entry
     called_args = client.groupby.call_args_list[0][1]
     self.assertIn("dimensions", called_args)
     self.assertEqual(["col1", "col2"], called_args["dimensions"])
 def test_run_query_multiple_groupby(self):
     client = Mock()
     from_dttm = Mock()
     to_dttm = Mock()
     from_dttm.replace = Mock(return_value=from_dttm)
     to_dttm.replace = Mock(return_value=to_dttm)
     from_dttm.isoformat = Mock(return_value='from')
     to_dttm.isoformat = Mock(return_value='to')
     timezone = 'timezone'
     from_dttm.tzname = Mock(return_value=timezone)
     ds = DruidDatasource(datasource_name='datasource')
     metric1 = DruidMetric(metric_name='metric1')
     metric2 = DruidMetric(metric_name='metric2')
     ds.metrics = [metric1, metric2]
     col1 = DruidColumn(column_name='col1')
     col2 = DruidColumn(column_name='col2')
     ds.columns = [col1, col2]
     aggs = []
     post_aggs = ['some_agg']
     ds._metrics_and_post_aggs = Mock(return_value=(aggs, post_aggs))
     groupby = ['col1', 'col2']
     metrics = ['metric1']
     ds.get_having_filters = Mock(return_value=[])
     client.query_builder = Mock()
     client.query_builder.last_query = Mock()
     client.query_builder.last_query.query_dict = {'mock': 0}
     # no groupby calls client.timeseries
     ds.run_query(
         groupby,
         metrics,
         None,
         from_dttm,
         to_dttm,
         client=client,
         row_limit=100,
         filter=[],
     )
     self.assertEqual(0, len(client.topn.call_args_list))
     self.assertEqual(1, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     # check that there is no dimensions entry
     called_args = client.groupby.call_args_list[0][1]
     self.assertIn('dimensions', called_args)
     self.assertEqual(['col1', 'col2'], called_args['dimensions'])
Exemple #5
0
    def test_run_query_with_adhoc_metric(self):
        client = Mock()
        from_dttm = Mock()
        to_dttm = Mock()
        from_dttm.replace = Mock(return_value=from_dttm)
        to_dttm.replace = Mock(return_value=to_dttm)
        from_dttm.isoformat = Mock(return_value='from')
        to_dttm.isoformat = Mock(return_value='to')
        timezone = 'timezone'
        from_dttm.tzname = Mock(return_value=timezone)
        ds = DruidDatasource(datasource_name='datasource')
        metric1 = DruidMetric(metric_name='metric1')
        metric2 = DruidMetric(metric_name='metric2')
        ds.metrics = [metric1, metric2]
        col1 = DruidColumn(column_name='col1')
        col2 = DruidColumn(column_name='col2')
        ds.columns = [col1, col2]
        all_metrics = []
        post_aggs = ['some_agg']
        ds._metrics_and_post_aggs = Mock(return_value=(all_metrics, post_aggs))
        groupby = []
        metrics = [{
            'expressionType': 'SIMPLE',
            'column': {'type': 'DOUBLE', 'column_name': 'col1'},
            'aggregate': 'SUM',
            'label': 'My Adhoc Metric',
        }]

        ds.get_having_filters = Mock(return_value=[])
        client.query_builder = Mock()
        client.query_builder.last_query = Mock()
        client.query_builder.last_query.query_dict = {'mock': 0}
        # no groupby calls client.timeseries
        ds.run_query(
            groupby, metrics, None, from_dttm,
            to_dttm, client=client, filter=[], row_limit=100,
        )
        self.assertEqual(0, len(client.topn.call_args_list))
        self.assertEqual(0, len(client.groupby.call_args_list))
        self.assertEqual(1, len(client.timeseries.call_args_list))
        # check that there is no dimensions entry
        called_args = client.timeseries.call_args_list[0][1]
        self.assertNotIn('dimensions', called_args)
        self.assertIn('post_aggregations', called_args)
 def test_run_query_multiple_groupby(self):
     client = Mock()
     from_dttm = Mock()
     to_dttm = Mock()
     from_dttm.replace = Mock(return_value=from_dttm)
     to_dttm.replace = Mock(return_value=to_dttm)
     from_dttm.isoformat = Mock(return_value='from')
     to_dttm.isoformat = Mock(return_value='to')
     timezone = 'timezone'
     from_dttm.tzname = Mock(return_value=timezone)
     ds = DruidDatasource(datasource_name='datasource')
     metric1 = DruidMetric(metric_name='metric1')
     metric2 = DruidMetric(metric_name='metric2')
     ds.metrics = [metric1, metric2]
     col1 = DruidColumn(column_name='col1')
     col2 = DruidColumn(column_name='col2')
     ds.columns = [col1, col2]
     all_metrics = []
     post_aggs = ['some_agg']
     ds._metrics_and_post_aggs = Mock(return_value=(all_metrics, post_aggs))
     groupby = ['col1', 'col2']
     metrics = ['metric1']
     ds.get_having_filters = Mock(return_value=[])
     client.query_builder = Mock()
     client.query_builder.last_query = Mock()
     client.query_builder.last_query.query_dict = {'mock': 0}
     # no groupby calls client.timeseries
     ds.run_query(
         groupby, metrics, None, from_dttm,
         to_dttm, client=client, row_limit=100,
         filter=[],
     )
     self.assertEqual(0, len(client.topn.call_args_list))
     self.assertEqual(1, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     # check that there is no dimensions entry
     called_args = client.groupby.call_args_list[0][1]
     self.assertIn('dimensions', called_args)
     self.assertEqual(['col1', 'col2'], called_args['dimensions'])
    def test_run_query_order_by_metrics(self):
        client = Mock()
        client.query_builder.last_query.query_dict = {"mock": 0}
        from_dttm = Mock()
        to_dttm = Mock()
        ds = DruidDatasource(datasource_name="datasource")
        ds.get_having_filters = Mock(return_value=[])
        dim1 = DruidColumn(column_name="dim1")
        dim2 = DruidColumn(column_name="dim2")
        metrics_dict = {
            "count1":
            DruidMetric(
                metric_name="count1",
                metric_type="count",
                json=json.dumps({
                    "type": "count",
                    "name": "count1"
                }),
            ),
            "sum1":
            DruidMetric(
                metric_name="sum1",
                metric_type="doubleSum",
                json=json.dumps({
                    "type": "doubleSum",
                    "name": "sum1"
                }),
            ),
            "sum2":
            DruidMetric(
                metric_name="sum2",
                metric_type="doubleSum",
                json=json.dumps({
                    "type": "doubleSum",
                    "name": "sum2"
                }),
            ),
            "div1":
            DruidMetric(
                metric_name="div1",
                metric_type="postagg",
                json=json.dumps({
                    "fn":
                    "/",
                    "type":
                    "arithmetic",
                    "name":
                    "div1",
                    "fields": [
                        {
                            "fieldName": "sum1",
                            "type": "fieldAccess"
                        },
                        {
                            "fieldName": "sum2",
                            "type": "fieldAccess"
                        },
                    ],
                }),
            ),
        }
        ds.columns = [dim1, dim2]
        ds.metrics = list(metrics_dict.values())

        groupby = ["dim1"]
        metrics = ["count1"]
        granularity = "all"
        # get the counts of the top 5 'dim1's, order by 'sum1'
        ds.run_query(
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            timeseries_limit=5,
            timeseries_limit_metric="sum1",
            client=client,
            order_desc=True,
            filter=[],
        )
        qry_obj = client.topn.call_args_list[0][1]
        self.assertEqual("dim1", qry_obj["dimension"])
        self.assertEqual("sum1", qry_obj["metric"])
        aggregations = qry_obj["aggregations"]
        post_aggregations = qry_obj["post_aggregations"]
        self.assertEqual({"count1", "sum1"}, set(aggregations.keys()))
        self.assertEqual(set(), set(post_aggregations.keys()))

        # get the counts of the top 5 'dim1's, order by 'div1'
        ds.run_query(
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            timeseries_limit=5,
            timeseries_limit_metric="div1",
            client=client,
            order_desc=True,
            filter=[],
        )
        qry_obj = client.topn.call_args_list[1][1]
        self.assertEqual("dim1", qry_obj["dimension"])
        self.assertEqual("div1", qry_obj["metric"])
        aggregations = qry_obj["aggregations"]
        post_aggregations = qry_obj["post_aggregations"]
        self.assertEqual({"count1", "sum1", "sum2"}, set(aggregations.keys()))
        self.assertEqual({"div1"}, set(post_aggregations.keys()))

        groupby = ["dim1", "dim2"]
        # get the counts of the top 5 ['dim1', 'dim2']s, order by 'sum1'
        ds.run_query(
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            timeseries_limit=5,
            timeseries_limit_metric="sum1",
            client=client,
            order_desc=True,
            filter=[],
        )
        qry_obj = client.groupby.call_args_list[0][1]
        self.assertEqual({"dim1", "dim2"}, set(qry_obj["dimensions"]))
        self.assertEqual("sum1",
                         qry_obj["limit_spec"]["columns"][0]["dimension"])
        aggregations = qry_obj["aggregations"]
        post_aggregations = qry_obj["post_aggregations"]
        self.assertEqual({"count1", "sum1"}, set(aggregations.keys()))
        self.assertEqual(set(), set(post_aggregations.keys()))

        # get the counts of the top 5 ['dim1', 'dim2']s, order by 'div1'
        ds.run_query(
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            timeseries_limit=5,
            timeseries_limit_metric="div1",
            client=client,
            order_desc=True,
            filter=[],
        )
        qry_obj = client.groupby.call_args_list[1][1]
        self.assertEqual({"dim1", "dim2"}, set(qry_obj["dimensions"]))
        self.assertEqual("div1",
                         qry_obj["limit_spec"]["columns"][0]["dimension"])
        aggregations = qry_obj["aggregations"]
        post_aggregations = qry_obj["post_aggregations"]
        self.assertEqual({"count1", "sum1", "sum2"}, set(aggregations.keys()))
        self.assertEqual({"div1"}, set(post_aggregations.keys()))
 def test_run_query_single_groupby(self):
     client = Mock()
     from_dttm = Mock()
     to_dttm = Mock()
     from_dttm.replace = Mock(return_value=from_dttm)
     to_dttm.replace = Mock(return_value=to_dttm)
     from_dttm.isoformat = Mock(return_value="from")
     to_dttm.isoformat = Mock(return_value="to")
     timezone = "timezone"
     from_dttm.tzname = Mock(return_value=timezone)
     ds = DruidDatasource(datasource_name="datasource")
     metric1 = DruidMetric(metric_name="metric1")
     metric2 = DruidMetric(metric_name="metric2")
     ds.metrics = [metric1, metric2]
     col1 = DruidColumn(column_name="col1")
     col2 = DruidColumn(column_name="col2")
     ds.columns = [col1, col2]
     aggs = ["metric1"]
     post_aggs = ["some_agg"]
     ds._metrics_and_post_aggs = Mock(return_value=(aggs, post_aggs))
     groupby = ["col1"]
     metrics = ["metric1"]
     ds.get_having_filters = Mock(return_value=[])
     client.query_builder.last_query.query_dict = {"mock": 0}
     # client.topn is called twice
     ds.run_query(
         groupby,
         metrics,
         None,
         from_dttm,
         to_dttm,
         timeseries_limit=100,
         client=client,
         order_desc=True,
         filter=[],
     )
     self.assertEqual(2, len(client.topn.call_args_list))
     self.assertEqual(0, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     # check that there is no dimensions entry
     called_args_pre = client.topn.call_args_list[0][1]
     self.assertNotIn("dimensions", called_args_pre)
     self.assertIn("dimension", called_args_pre)
     called_args = client.topn.call_args_list[1][1]
     self.assertIn("dimension", called_args)
     self.assertEqual("col1", called_args["dimension"])
     # not order_desc
     client = Mock()
     client.query_builder.last_query.query_dict = {"mock": 0}
     ds.run_query(
         groupby,
         metrics,
         None,
         from_dttm,
         to_dttm,
         client=client,
         order_desc=False,
         filter=[],
         row_limit=100,
     )
     self.assertEqual(0, len(client.topn.call_args_list))
     self.assertEqual(1, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     self.assertIn("dimensions", client.groupby.call_args_list[0][1])
     self.assertEqual(["col1"],
                      client.groupby.call_args_list[0][1]["dimensions"])
     # order_desc but timeseries and dimension spec
     # calls topn with single dimension spec 'dimension'
     spec = {"outputName": "hello", "dimension": "matcho"}
     spec_json = json.dumps(spec)
     col3 = DruidColumn(column_name="col3", dimension_spec_json=spec_json)
     ds.columns.append(col3)
     groupby = ["col3"]
     client = Mock()
     client.query_builder.last_query.query_dict = {"mock": 0}
     ds.run_query(
         groupby,
         metrics,
         None,
         from_dttm,
         to_dttm,
         client=client,
         order_desc=True,
         timeseries_limit=5,
         filter=[],
         row_limit=100,
     )
     self.assertEqual(2, len(client.topn.call_args_list))
     self.assertEqual(0, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     self.assertIn("dimension", client.topn.call_args_list[0][1])
     self.assertIn("dimension", client.topn.call_args_list[1][1])
     # uses dimension for pre query and full spec for final query
     self.assertEqual("matcho",
                      client.topn.call_args_list[0][1]["dimension"])
     self.assertEqual(spec, client.topn.call_args_list[1][1]["dimension"])
    def test_run_query_order_by_metrics(self):
        client = Mock()
        client.query_builder.last_query.query_dict = {'mock': 0}
        from_dttm = Mock()
        to_dttm = Mock()
        ds = DruidDatasource(datasource_name='datasource')
        ds.get_having_filters = Mock(return_value=[])
        dim1 = DruidColumn(column_name='dim1')
        dim2 = DruidColumn(column_name='dim2')
        metrics_dict = {
            'count1':
            DruidMetric(
                metric_name='count1',
                metric_type='count',
                json=json.dumps({
                    'type': 'count',
                    'name': 'count1'
                }),
            ),
            'sum1':
            DruidMetric(
                metric_name='sum1',
                metric_type='doubleSum',
                json=json.dumps({
                    'type': 'doubleSum',
                    'name': 'sum1'
                }),
            ),
            'sum2':
            DruidMetric(
                metric_name='sum2',
                metric_type='doubleSum',
                json=json.dumps({
                    'type': 'doubleSum',
                    'name': 'sum2'
                }),
            ),
            'div1':
            DruidMetric(
                metric_name='div1',
                metric_type='postagg',
                json=json.dumps({
                    'fn':
                    '/',
                    'type':
                    'arithmetic',
                    'name':
                    'div1',
                    'fields': [
                        {
                            'fieldName': 'sum1',
                            'type': 'fieldAccess',
                        },
                        {
                            'fieldName': 'sum2',
                            'type': 'fieldAccess',
                        },
                    ],
                }),
            ),
        }
        ds.columns = [dim1, dim2]
        ds.metrics = list(metrics_dict.values())

        groupby = ['dim1']
        metrics = ['count1']
        granularity = 'all'
        # get the counts of the top 5 'dim1's, order by 'sum1'
        ds.run_query(
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            timeseries_limit=5,
            timeseries_limit_metric='sum1',
            client=client,
            order_desc=True,
            filter=[],
        )
        qry_obj = client.topn.call_args_list[0][1]
        self.assertEqual('dim1', qry_obj['dimension'])
        self.assertEqual('sum1', qry_obj['metric'])
        aggregations = qry_obj['aggregations']
        post_aggregations = qry_obj['post_aggregations']
        self.assertEqual({'count1', 'sum1'}, set(aggregations.keys()))
        self.assertEqual(set(), set(post_aggregations.keys()))

        # get the counts of the top 5 'dim1's, order by 'div1'
        ds.run_query(
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            timeseries_limit=5,
            timeseries_limit_metric='div1',
            client=client,
            order_desc=True,
            filter=[],
        )
        qry_obj = client.topn.call_args_list[1][1]
        self.assertEqual('dim1', qry_obj['dimension'])
        self.assertEqual('div1', qry_obj['metric'])
        aggregations = qry_obj['aggregations']
        post_aggregations = qry_obj['post_aggregations']
        self.assertEqual({'count1', 'sum1', 'sum2'}, set(aggregations.keys()))
        self.assertEqual({'div1'}, set(post_aggregations.keys()))

        groupby = ['dim1', 'dim2']
        # get the counts of the top 5 ['dim1', 'dim2']s, order by 'sum1'
        ds.run_query(
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            timeseries_limit=5,
            timeseries_limit_metric='sum1',
            client=client,
            order_desc=True,
            filter=[],
        )
        qry_obj = client.groupby.call_args_list[0][1]
        self.assertEqual({'dim1', 'dim2'}, set(qry_obj['dimensions']))
        self.assertEqual('sum1',
                         qry_obj['limit_spec']['columns'][0]['dimension'])
        aggregations = qry_obj['aggregations']
        post_aggregations = qry_obj['post_aggregations']
        self.assertEqual({'count1', 'sum1'}, set(aggregations.keys()))
        self.assertEqual(set(), set(post_aggregations.keys()))

        # get the counts of the top 5 ['dim1', 'dim2']s, order by 'div1'
        ds.run_query(
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            timeseries_limit=5,
            timeseries_limit_metric='div1',
            client=client,
            order_desc=True,
            filter=[],
        )
        qry_obj = client.groupby.call_args_list[1][1]
        self.assertEqual({'dim1', 'dim2'}, set(qry_obj['dimensions']))
        self.assertEqual('div1',
                         qry_obj['limit_spec']['columns'][0]['dimension'])
        aggregations = qry_obj['aggregations']
        post_aggregations = qry_obj['post_aggregations']
        self.assertEqual({'count1', 'sum1', 'sum2'}, set(aggregations.keys()))
        self.assertEqual({'div1'}, set(post_aggregations.keys()))
 def test_run_query_single_groupby(self):
     client = Mock()
     from_dttm = Mock()
     to_dttm = Mock()
     from_dttm.replace = Mock(return_value=from_dttm)
     to_dttm.replace = Mock(return_value=to_dttm)
     from_dttm.isoformat = Mock(return_value='from')
     to_dttm.isoformat = Mock(return_value='to')
     timezone = 'timezone'
     from_dttm.tzname = Mock(return_value=timezone)
     ds = DruidDatasource(datasource_name='datasource')
     metric1 = DruidMetric(metric_name='metric1')
     metric2 = DruidMetric(metric_name='metric2')
     ds.metrics = [metric1, metric2]
     col1 = DruidColumn(column_name='col1')
     col2 = DruidColumn(column_name='col2')
     ds.columns = [col1, col2]
     aggs = ['metric1']
     post_aggs = ['some_agg']
     ds._metrics_and_post_aggs = Mock(return_value=(aggs, post_aggs))
     groupby = ['col1']
     metrics = ['metric1']
     ds.get_having_filters = Mock(return_value=[])
     client.query_builder.last_query.query_dict = {'mock': 0}
     # client.topn is called twice
     ds.run_query(
         groupby,
         metrics,
         None,
         from_dttm,
         to_dttm,
         timeseries_limit=100,
         client=client,
         order_desc=True,
         filter=[],
     )
     self.assertEqual(2, len(client.topn.call_args_list))
     self.assertEqual(0, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     # check that there is no dimensions entry
     called_args_pre = client.topn.call_args_list[0][1]
     self.assertNotIn('dimensions', called_args_pre)
     self.assertIn('dimension', called_args_pre)
     called_args = client.topn.call_args_list[1][1]
     self.assertIn('dimension', called_args)
     self.assertEqual('col1', called_args['dimension'])
     # not order_desc
     client = Mock()
     client.query_builder.last_query.query_dict = {'mock': 0}
     ds.run_query(
         groupby,
         metrics,
         None,
         from_dttm,
         to_dttm,
         client=client,
         order_desc=False,
         filter=[],
         row_limit=100,
     )
     self.assertEqual(0, len(client.topn.call_args_list))
     self.assertEqual(1, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     self.assertIn('dimensions', client.groupby.call_args_list[0][1])
     self.assertEqual(['col1'],
                      client.groupby.call_args_list[0][1]['dimensions'])
     # order_desc but timeseries and dimension spec
     # calls topn with single dimension spec 'dimension'
     spec = {'outputName': 'hello', 'dimension': 'matcho'}
     spec_json = json.dumps(spec)
     col3 = DruidColumn(column_name='col3', dimension_spec_json=spec_json)
     ds.columns.append(col3)
     groupby = ['col3']
     client = Mock()
     client.query_builder.last_query.query_dict = {'mock': 0}
     ds.run_query(
         groupby,
         metrics,
         None,
         from_dttm,
         to_dttm,
         client=client,
         order_desc=True,
         timeseries_limit=5,
         filter=[],
         row_limit=100,
     )
     self.assertEqual(2, len(client.topn.call_args_list))
     self.assertEqual(0, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     self.assertIn('dimension', client.topn.call_args_list[0][1])
     self.assertIn('dimension', client.topn.call_args_list[1][1])
     # uses dimension for pre query and full spec for final query
     self.assertEqual('matcho',
                      client.topn.call_args_list[0][1]['dimension'])
     self.assertEqual(spec, client.topn.call_args_list[1][1]['dimension'])
 def test_run_query_single_groupby(self):
     client = Mock()
     from_dttm = Mock()
     to_dttm = Mock()
     from_dttm.replace = Mock(return_value=from_dttm)
     to_dttm.replace = Mock(return_value=to_dttm)
     from_dttm.isoformat = Mock(return_value='from')
     to_dttm.isoformat = Mock(return_value='to')
     timezone = 'timezone'
     from_dttm.tzname = Mock(return_value=timezone)
     ds = DruidDatasource(datasource_name='datasource')
     metric1 = DruidMetric(metric_name='metric1')
     metric2 = DruidMetric(metric_name='metric2')
     ds.metrics = [metric1, metric2]
     col1 = DruidColumn(column_name='col1')
     col2 = DruidColumn(column_name='col2')
     ds.columns = [col1, col2]
     all_metrics = ['metric1']
     post_aggs = ['some_agg']
     ds._metrics_and_post_aggs = Mock(return_value=(all_metrics, post_aggs))
     groupby = ['col1']
     metrics = ['metric1']
     ds.get_having_filters = Mock(return_value=[])
     client.query_builder.last_query.query_dict = {'mock': 0}
     # client.topn is called twice
     ds.run_query(
         groupby, metrics, None, from_dttm, to_dttm, timeseries_limit=100,
         client=client, order_desc=True, filter=[],
     )
     self.assertEqual(2, len(client.topn.call_args_list))
     self.assertEqual(0, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     # check that there is no dimensions entry
     called_args_pre = client.topn.call_args_list[0][1]
     self.assertNotIn('dimensions', called_args_pre)
     self.assertIn('dimension', called_args_pre)
     called_args = client.topn.call_args_list[1][1]
     self.assertIn('dimension', called_args)
     self.assertEqual('col1', called_args['dimension'])
     # not order_desc
     client = Mock()
     client.query_builder.last_query.query_dict = {'mock': 0}
     ds.run_query(
         groupby, metrics, None, from_dttm, to_dttm, client=client,
         order_desc=False, filter=[], row_limit=100,
     )
     self.assertEqual(0, len(client.topn.call_args_list))
     self.assertEqual(1, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     self.assertIn('dimensions', client.groupby.call_args_list[0][1])
     self.assertEqual(['col1'], client.groupby.call_args_list[0][1]['dimensions'])
     # order_desc but timeseries and dimension spec
     # calls topn with single dimension spec 'dimension'
     spec = {'outputName': 'hello', 'dimension': 'matcho'}
     spec_json = json.dumps(spec)
     col3 = DruidColumn(column_name='col3', dimension_spec_json=spec_json)
     ds.columns.append(col3)
     groupby = ['col3']
     client = Mock()
     client.query_builder.last_query.query_dict = {'mock': 0}
     ds.run_query(
         groupby, metrics, None, from_dttm, to_dttm,
         client=client, order_desc=True, timeseries_limit=5,
         filter=[], row_limit=100,
     )
     self.assertEqual(2, len(client.topn.call_args_list))
     self.assertEqual(0, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     self.assertIn('dimension', client.topn.call_args_list[0][1])
     self.assertIn('dimension', client.topn.call_args_list[1][1])
     # uses dimension for pre query and full spec for final query
     self.assertEqual('matcho', client.topn.call_args_list[0][1]['dimension'])
     self.assertEqual(spec, client.topn.call_args_list[1][1]['dimension'])
Exemple #12
0
    def test_run_query_order_by_metrics(self):
        client = Mock()
        client.query_builder.last_query.query_dict = {'mock': 0}
        from_dttm = Mock()
        to_dttm = Mock()
        ds = DruidDatasource(datasource_name='datasource')
        ds.get_having_filters = Mock(return_value=[])
        dim1 = DruidColumn(column_name='dim1')
        dim2 = DruidColumn(column_name='dim2')
        metrics_dict = {
            'count1': DruidMetric(
                metric_name='count1',
                metric_type='count',
                json=json.dumps({'type': 'count', 'name': 'count1'}),
            ),
            'sum1': DruidMetric(
                metric_name='sum1',
                metric_type='doubleSum',
                json=json.dumps({'type': 'doubleSum', 'name': 'sum1'}),
            ),
            'sum2': DruidMetric(
                metric_name='sum2',
                metric_type='doubleSum',
                json=json.dumps({'type': 'doubleSum', 'name': 'sum2'}),
            ),
            'div1': DruidMetric(
                metric_name='div1',
                metric_type='postagg',
                json=json.dumps({
                    'fn': '/',
                    'type': 'arithmetic',
                    'name': 'div1',
                    'fields': [
                        {
                            'fieldName': 'sum1',
                            'type': 'fieldAccess',
                        },
                        {
                            'fieldName': 'sum2',
                            'type': 'fieldAccess',
                        },
                    ],
                }),
            ),
        }
        ds.columns = [dim1, dim2]
        ds.metrics = list(metrics_dict.values())

        groupby = ['dim1']
        metrics = ['count1']
        granularity = 'all'
        # get the counts of the top 5 'dim1's, order by 'sum1'
        ds.run_query(
            groupby, metrics, granularity, from_dttm, to_dttm,
            timeseries_limit=5, timeseries_limit_metric='sum1',
            client=client, order_desc=True, filter=[],
        )
        qry_obj = client.topn.call_args_list[0][1]
        self.assertEqual('dim1', qry_obj['dimension'])
        self.assertEqual('sum1', qry_obj['metric'])
        aggregations = qry_obj['aggregations']
        post_aggregations = qry_obj['post_aggregations']
        self.assertEqual({'count1', 'sum1'}, set(aggregations.keys()))
        self.assertEqual(set(), set(post_aggregations.keys()))

        # get the counts of the top 5 'dim1's, order by 'div1'
        ds.run_query(
            groupby, metrics, granularity, from_dttm, to_dttm,
            timeseries_limit=5, timeseries_limit_metric='div1',
            client=client, order_desc=True, filter=[],
        )
        qry_obj = client.topn.call_args_list[1][1]
        self.assertEqual('dim1', qry_obj['dimension'])
        self.assertEqual('div1', qry_obj['metric'])
        aggregations = qry_obj['aggregations']
        post_aggregations = qry_obj['post_aggregations']
        self.assertEqual({'count1', 'sum1', 'sum2'}, set(aggregations.keys()))
        self.assertEqual({'div1'}, set(post_aggregations.keys()))

        groupby = ['dim1', 'dim2']
        # get the counts of the top 5 ['dim1', 'dim2']s, order by 'sum1'
        ds.run_query(
            groupby, metrics, granularity, from_dttm, to_dttm,
            timeseries_limit=5, timeseries_limit_metric='sum1',
            client=client, order_desc=True, filter=[],
        )
        qry_obj = client.groupby.call_args_list[0][1]
        self.assertEqual({'dim1', 'dim2'}, set(qry_obj['dimensions']))
        self.assertEqual('sum1', qry_obj['limit_spec']['columns'][0]['dimension'])
        aggregations = qry_obj['aggregations']
        post_aggregations = qry_obj['post_aggregations']
        self.assertEqual({'count1', 'sum1'}, set(aggregations.keys()))
        self.assertEqual(set(), set(post_aggregations.keys()))

        # get the counts of the top 5 ['dim1', 'dim2']s, order by 'div1'
        ds.run_query(
            groupby, metrics, granularity, from_dttm, to_dttm,
            timeseries_limit=5, timeseries_limit_metric='div1',
            client=client, order_desc=True, filter=[],
        )
        qry_obj = client.groupby.call_args_list[1][1]
        self.assertEqual({'dim1', 'dim2'}, set(qry_obj['dimensions']))
        self.assertEqual('div1', qry_obj['limit_spec']['columns'][0]['dimension'])
        aggregations = qry_obj['aggregations']
        post_aggregations = qry_obj['post_aggregations']
        self.assertEqual({'count1', 'sum1', 'sum2'}, set(aggregations.keys()))
        self.assertEqual({'div1'}, set(post_aggregations.keys()))