def test_get_filters_handles_none_for_string_types(self): filtr = {"col": "A", "op": "==", "val": None} col = DruidColumn(column_name="A") column_dict = {"A": col} res = DruidDatasource.get_filters([filtr], [], column_dict) self.assertIsNone(res)
def test_get_filters_extracts_values_in_quotes(self): filtr = {"col": "A", "op": "in", "val": ['"a"']} col = DruidColumn(column_name="A") column_dict = {"A": col} res = DruidDatasource.get_filters([filtr], [], column_dict) self.assertEqual("a", res.filter["filter"]["value"])
def test_run_query_single_groupby(self): client = Mock() from_dttm = Mock() to_dttm = Mock() from_dttm.replace = Mock(return_value=from_dttm) to_dttm.replace = Mock(return_value=to_dttm) from_dttm.isoformat = Mock(return_value="from") to_dttm.isoformat = Mock(return_value="to") timezone = "timezone" from_dttm.tzname = Mock(return_value=timezone) ds = DruidDatasource(datasource_name="datasource") metric1 = DruidMetric(metric_name="metric1") metric2 = DruidMetric(metric_name="metric2") ds.metrics = [metric1, metric2] col1 = DruidColumn(column_name="col1") col2 = DruidColumn(column_name="col2") ds.columns = [col1, col2] aggs = ["metric1"] post_aggs = ["some_agg"] ds._metrics_and_post_aggs = Mock(return_value=(aggs, post_aggs)) groupby = ["col1"] metrics = ["metric1"] ds.get_having_filters = Mock(return_value=[]) client.query_builder.last_query.query_dict = {"mock": 0} # client.topn is called twice ds.run_query( metrics, None, from_dttm, to_dttm, groupby=groupby, timeseries_limit=100, client=client, order_desc=True, filter=[], ) self.assertEqual(2, len(client.topn.call_args_list)) self.assertEqual(0, len(client.groupby.call_args_list)) self.assertEqual(0, len(client.timeseries.call_args_list)) # check that there is no dimensions entry called_args_pre = client.topn.call_args_list[0][1] self.assertNotIn("dimensions", called_args_pre) self.assertIn("dimension", called_args_pre) called_args = client.topn.call_args_list[1][1] self.assertIn("dimension", called_args) self.assertEqual("col1", called_args["dimension"]) # not order_desc client = Mock() client.query_builder.last_query.query_dict = {"mock": 0} ds.run_query( metrics, None, from_dttm, to_dttm, groupby=groupby, client=client, order_desc=False, filter=[], row_limit=100, ) self.assertEqual(0, len(client.topn.call_args_list)) self.assertEqual(1, len(client.groupby.call_args_list)) self.assertEqual(0, len(client.timeseries.call_args_list)) self.assertIn("dimensions", client.groupby.call_args_list[0][1]) self.assertEqual(["col1"], client.groupby.call_args_list[0][1]["dimensions"]) # order_desc but timeseries and dimension spec # calls topn with single dimension spec 'dimension' spec = {"outputName": "hello", "dimension": "matcho"} spec_json = json.dumps(spec) col3 = DruidColumn(column_name="col3", dimension_spec_json=spec_json) ds.columns.append(col3) groupby = ["col3"] client = Mock() client.query_builder.last_query.query_dict = {"mock": 0} ds.run_query( metrics, None, from_dttm, to_dttm, groupby=groupby, client=client, order_desc=True, timeseries_limit=5, filter=[], row_limit=100, ) self.assertEqual(2, len(client.topn.call_args_list)) self.assertEqual(0, len(client.groupby.call_args_list)) self.assertEqual(0, len(client.timeseries.call_args_list)) self.assertIn("dimension", client.topn.call_args_list[0][1]) self.assertIn("dimension", client.topn.call_args_list[1][1]) # uses dimension for pre query and full spec for final query self.assertEqual("matcho", client.topn.call_args_list[0][1]["dimension"]) self.assertEqual(spec, client.topn.call_args_list[1][1]["dimension"])
def test_run_query_order_by_metrics(self): client = Mock() client.query_builder.last_query.query_dict = {"mock": 0} from_dttm = Mock() to_dttm = Mock() ds = DruidDatasource(datasource_name="datasource") ds.get_having_filters = Mock(return_value=[]) dim1 = DruidColumn(column_name="dim1") dim2 = DruidColumn(column_name="dim2") metrics_dict = { "count1": DruidMetric( metric_name="count1", metric_type="count", json=json.dumps({ "type": "count", "name": "count1" }), ), "sum1": DruidMetric( metric_name="sum1", metric_type="doubleSum", json=json.dumps({ "type": "doubleSum", "name": "sum1" }), ), "sum2": DruidMetric( metric_name="sum2", metric_type="doubleSum", json=json.dumps({ "type": "doubleSum", "name": "sum2" }), ), "div1": DruidMetric( metric_name="div1", metric_type="postagg", json=json.dumps({ "fn": "/", "type": "arithmetic", "name": "div1", "fields": [ { "fieldName": "sum1", "type": "fieldAccess" }, { "fieldName": "sum2", "type": "fieldAccess" }, ], }), ), } ds.columns = [dim1, dim2] ds.metrics = list(metrics_dict.values()) columns = ["dim1"] metrics = ["count1"] granularity = "all" # get the counts of the top 5 'dim1's, order by 'sum1' ds.run_query( metrics, granularity, from_dttm, to_dttm, groupby=columns, timeseries_limit=5, timeseries_limit_metric="sum1", client=client, order_desc=True, filter=[], ) qry_obj = client.topn.call_args_list[0][1] self.assertEqual("dim1", qry_obj["dimension"]) self.assertEqual("sum1", qry_obj["metric"]) aggregations = qry_obj["aggregations"] post_aggregations = qry_obj["post_aggregations"] self.assertEqual({"count1", "sum1"}, set(aggregations.keys())) self.assertEqual(set(), set(post_aggregations.keys())) # get the counts of the top 5 'dim1's, order by 'div1' ds.run_query( metrics, granularity, from_dttm, to_dttm, groupby=columns, timeseries_limit=5, timeseries_limit_metric="div1", client=client, order_desc=True, filter=[], ) qry_obj = client.topn.call_args_list[1][1] self.assertEqual("dim1", qry_obj["dimension"]) self.assertEqual("div1", qry_obj["metric"]) aggregations = qry_obj["aggregations"] post_aggregations = qry_obj["post_aggregations"] self.assertEqual({"count1", "sum1", "sum2"}, set(aggregations.keys())) self.assertEqual({"div1"}, set(post_aggregations.keys())) columns = ["dim1", "dim2"] # get the counts of the top 5 ['dim1', 'dim2']s, order by 'sum1' ds.run_query( metrics, granularity, from_dttm, to_dttm, groupby=columns, timeseries_limit=5, timeseries_limit_metric="sum1", client=client, order_desc=True, filter=[], ) qry_obj = client.groupby.call_args_list[0][1] self.assertEqual({"dim1", "dim2"}, set(qry_obj["dimensions"])) self.assertEqual("sum1", qry_obj["limit_spec"]["columns"][0]["dimension"]) aggregations = qry_obj["aggregations"] post_aggregations = qry_obj["post_aggregations"] self.assertEqual({"count1", "sum1"}, set(aggregations.keys())) self.assertEqual(set(), set(post_aggregations.keys())) # get the counts of the top 5 ['dim1', 'dim2']s, order by 'div1' ds.run_query( metrics, granularity, from_dttm, to_dttm, groupby=columns, timeseries_limit=5, timeseries_limit_metric="div1", client=client, order_desc=True, filter=[], ) qry_obj = client.groupby.call_args_list[1][1] self.assertEqual({"dim1", "dim2"}, set(qry_obj["dimensions"])) self.assertEqual("div1", qry_obj["limit_spec"]["columns"][0]["dimension"]) aggregations = qry_obj["aggregations"] post_aggregations = qry_obj["post_aggregations"] self.assertEqual({"count1", "sum1", "sum2"}, set(aggregations.keys())) self.assertEqual({"div1"}, set(post_aggregations.keys()))
def test_get_filters_keeps_trailing_spaces(self): filtr = {"col": "A", "op": "in", "val": ["a "]} col = DruidColumn(column_name="A") column_dict = {"A": col} res = DruidDatasource.get_filters([filtr], [], column_dict) self.assertEqual("a ", res.filter["filter"]["value"])
def test_run_query_single_groupby(self): client = Mock() from_dttm = Mock() to_dttm = Mock() from_dttm.replace = Mock(return_value=from_dttm) to_dttm.replace = Mock(return_value=to_dttm) from_dttm.isoformat = Mock(return_value='from') to_dttm.isoformat = Mock(return_value='to') timezone = 'timezone' from_dttm.tzname = Mock(return_value=timezone) ds = DruidDatasource(datasource_name='datasource') metric1 = DruidMetric(metric_name='metric1') metric2 = DruidMetric(metric_name='metric2') ds.metrics = [metric1, metric2] col1 = DruidColumn(column_name='col1') col2 = DruidColumn(column_name='col2') ds.columns = [col1, col2] all_metrics = ['metric1'] post_aggs = ['some_agg'] ds._metrics_and_post_aggs = Mock(return_value=(all_metrics, post_aggs)) groupby = ['col1'] metrics = ['metric1'] ds.get_having_filters = Mock(return_value=[]) client.query_builder.last_query.query_dict = {'mock': 0} # client.topn is called twice ds.run_query( groupby, metrics, None, from_dttm, to_dttm, timeseries_limit=100, client=client, order_desc=True, filter=[], ) self.assertEqual(2, len(client.topn.call_args_list)) self.assertEqual(0, len(client.groupby.call_args_list)) self.assertEqual(0, len(client.timeseries.call_args_list)) # check that there is no dimensions entry called_args_pre = client.topn.call_args_list[0][1] self.assertNotIn('dimensions', called_args_pre) self.assertIn('dimension', called_args_pre) called_args = client.topn.call_args_list[1][1] self.assertIn('dimension', called_args) self.assertEqual('col1', called_args['dimension']) # not order_desc client = Mock() client.query_builder.last_query.query_dict = {'mock': 0} ds.run_query( groupby, metrics, None, from_dttm, to_dttm, client=client, order_desc=False, filter=[], row_limit=100, ) self.assertEqual(0, len(client.topn.call_args_list)) self.assertEqual(1, len(client.groupby.call_args_list)) self.assertEqual(0, len(client.timeseries.call_args_list)) self.assertIn('dimensions', client.groupby.call_args_list[0][1]) self.assertEqual(['col1'], client.groupby.call_args_list[0][1]['dimensions']) # order_desc but timeseries and dimension spec # calls topn with single dimension spec 'dimension' spec = {'outputName': 'hello', 'dimension': 'matcho'} spec_json = json.dumps(spec) col3 = DruidColumn(column_name='col3', dimension_spec_json=spec_json) ds.columns.append(col3) groupby = ['col3'] client = Mock() client.query_builder.last_query.query_dict = {'mock': 0} ds.run_query( groupby, metrics, None, from_dttm, to_dttm, client=client, order_desc=True, timeseries_limit=5, filter=[], row_limit=100, ) self.assertEqual(2, len(client.topn.call_args_list)) self.assertEqual(0, len(client.groupby.call_args_list)) self.assertEqual(0, len(client.timeseries.call_args_list)) self.assertIn('dimension', client.topn.call_args_list[0][1]) self.assertIn('dimension', client.topn.call_args_list[1][1]) # uses dimension for pre query and full spec for final query self.assertEqual('matcho', client.topn.call_args_list[0][1]['dimension']) self.assertEqual(spec, client.topn.call_args_list[1][1]['dimension'])
def test_run_query_order_by_metrics(self): client = Mock() client.query_builder.last_query.query_dict = {'mock': 0} from_dttm = Mock() to_dttm = Mock() ds = DruidDatasource(datasource_name='datasource') ds.get_having_filters = Mock(return_value=[]) dim1 = DruidColumn(column_name='dim1') dim2 = DruidColumn(column_name='dim2') metrics_dict = { 'count1': DruidMetric( metric_name='count1', metric_type='count', json=json.dumps({ 'type': 'count', 'name': 'count1' }), ), 'sum1': DruidMetric( metric_name='sum1', metric_type='doubleSum', json=json.dumps({ 'type': 'doubleSum', 'name': 'sum1' }), ), 'sum2': DruidMetric( metric_name='sum2', metric_type='doubleSum', json=json.dumps({ 'type': 'doubleSum', 'name': 'sum2' }), ), 'div1': DruidMetric( metric_name='div1', metric_type='postagg', json=json.dumps({ 'fn': '/', 'type': 'arithmetic', 'name': 'div1', 'fields': [ { 'fieldName': 'sum1', 'type': 'fieldAccess', }, { 'fieldName': 'sum2', 'type': 'fieldAccess', }, ], }), ), } ds.columns = [dim1, dim2] ds.metrics = list(metrics_dict.values()) groupby = ['dim1'] metrics = ['count1'] granularity = 'all' # get the counts of the top 5 'dim1's, order by 'sum1' ds.run_query( groupby, metrics, granularity, from_dttm, to_dttm, timeseries_limit=5, timeseries_limit_metric='sum1', client=client, order_desc=True, filter=[], ) qry_obj = client.topn.call_args_list[0][1] self.assertEqual('dim1', qry_obj['dimension']) self.assertEqual('sum1', qry_obj['metric']) aggregations = qry_obj['aggregations'] post_aggregations = qry_obj['post_aggregations'] self.assertEqual({'count1', 'sum1'}, set(aggregations.keys())) self.assertEqual(set(), set(post_aggregations.keys())) # get the counts of the top 5 'dim1's, order by 'div1' ds.run_query( groupby, metrics, granularity, from_dttm, to_dttm, timeseries_limit=5, timeseries_limit_metric='div1', client=client, order_desc=True, filter=[], ) qry_obj = client.topn.call_args_list[1][1] self.assertEqual('dim1', qry_obj['dimension']) self.assertEqual('div1', qry_obj['metric']) aggregations = qry_obj['aggregations'] post_aggregations = qry_obj['post_aggregations'] self.assertEqual({'count1', 'sum1', 'sum2'}, set(aggregations.keys())) self.assertEqual({'div1'}, set(post_aggregations.keys())) groupby = ['dim1', 'dim2'] # get the counts of the top 5 ['dim1', 'dim2']s, order by 'sum1' ds.run_query( groupby, metrics, granularity, from_dttm, to_dttm, timeseries_limit=5, timeseries_limit_metric='sum1', client=client, order_desc=True, filter=[], ) qry_obj = client.groupby.call_args_list[0][1] self.assertEqual({'dim1', 'dim2'}, set(qry_obj['dimensions'])) self.assertEqual('sum1', qry_obj['limit_spec']['columns'][0]['dimension']) aggregations = qry_obj['aggregations'] post_aggregations = qry_obj['post_aggregations'] self.assertEqual({'count1', 'sum1'}, set(aggregations.keys())) self.assertEqual(set(), set(post_aggregations.keys())) # get the counts of the top 5 ['dim1', 'dim2']s, order by 'div1' ds.run_query( groupby, metrics, granularity, from_dttm, to_dttm, timeseries_limit=5, timeseries_limit_metric='div1', client=client, order_desc=True, filter=[], ) qry_obj = client.groupby.call_args_list[1][1] self.assertEqual({'dim1', 'dim2'}, set(qry_obj['dimensions'])) self.assertEqual('div1', qry_obj['limit_spec']['columns'][0]['dimension']) aggregations = qry_obj['aggregations'] post_aggregations = qry_obj['post_aggregations'] self.assertEqual({'count1', 'sum1', 'sum2'}, set(aggregations.keys())) self.assertEqual({'div1'}, set(post_aggregations.keys()))
def test_get_filters_extracts_values_in_quotes(self): filtr = {'col': 'A', 'op': 'in', 'val': [' "a" ']} col = DruidColumn(column_name='A') column_dict = {'A': col} res = DruidDatasource.get_filters([filtr], [], column_dict) self.assertEqual('a', res.filter['filter']['value'])
def test_get_filters_handles_none_for_string_types(self): filtr = {'col': 'A', 'op': '==', 'val': None} col = DruidColumn(column_name='A') column_dict = {'A': col} res = DruidDatasource.get_filters([filtr], [], column_dict) self.assertIsNone(res)