def test_druid_returns_html_error(self, mock_urlopen): # given message = textwrap.dedent(""" <html> <head> <meta http-equiv="Content-Type" content="text/html;charset=ISO-8859-1"/> <title>Error 500 </title> </head> <body> <h2>HTTP ERROR: 500</h2> <p>Problem accessing /druid/v2/. Reason: <pre> javax.servlet.ServletException: java.lang.OutOfMemoryError: GC overhead limit exceeded</pre></p> <hr /><a href="http://eclipse.org/jetty">Powered by Jetty:// 9.3.19.v20170502</a><hr/> </body> </html> """).strip() mock_urlopen.side_effect = _http_error(500, 'Internal Server Error', message) client = create_client() # when / then with pytest.raises(IOError) as e: client.topn(datasource="testdatasource", granularity="all", intervals="2015-12-29/pt1h", aggregations={"count": doublesum("count")}, dimension="user_name", metric="count", filter=Dimension("user_lang") == "en", threshold=1, context={"timeout": 1000}) assert str(e.value) == textwrap.dedent(""" HTTP Error 500: Internal Server Error Druid Error: javax.servlet.ServletException: java.lang.OutOfMemoryError: GC overhead limit exceeded Query is: { "aggregations": [ { "fieldName": "count", "name": "count", "type": "doubleSum" } ], "context": { "timeout": 1000 }, "dataSource": "testdatasource", "dimension": "user_name", "filter": { "dimension": "user_lang", "type": "selector", "value": "en" }, "granularity": "all", "intervals": "2015-12-29/pt1h", "metric": "count", "queryType": "topN", "threshold": 1 } """).strip()
def get(self, request, cid): campaign_id = cid query = create_druid_client() start_date = '2017-06-27' query_result = query.groupby( datasource='celtra3', granularity='all', dimensions=['adId'], intervals=["{0}/p1d".format(start_date)], aggregations={ 'swipes': doublesum('swipes'), 'interactions': doublesum('interactions'), 'impressions': doublesum('impressions') }, filter=(Dimension('campaignId') == campaign_id)) return Response(query_result.result)
def test_build_aggregators(self): agg_input = { 'agg1': aggregators.count('metric1'), 'agg2': aggregators.longsum('metric2'), 'agg3': aggregators.doublesum('metric3'), 'agg4': aggregators.doublemin('metric4'), 'agg5': aggregators.doublemax('metric5'), 'agg6': aggregators.hyperunique('metric6'), 'agg7': aggregators.cardinality('dim1'), 'agg8': aggregators.cardinality(['dim1', 'dim2'], by_row=True), 'agg9': aggregators.thetasketch('dim1'), 'agg10': aggregators.thetasketch('metric7'), 'agg11': aggregators.thetasketch('metric8', isinputthetasketch = True, size=8192) } built_agg = aggregators.build_aggregators(agg_input) expected = [ {'name': 'agg1', 'type': 'count', 'fieldName': 'metric1'}, {'name': 'agg2', 'type': 'longSum', 'fieldName': 'metric2'}, {'name': 'agg3', 'type': 'doubleSum', 'fieldName': 'metric3'}, {'name': 'agg4', 'type': 'doubleMin', 'fieldName': 'metric4'}, {'name': 'agg5', 'type': 'doubleMax', 'fieldName': 'metric5'}, {'name': 'agg6', 'type': 'hyperUnique', 'fieldName': 'metric6'}, {'name': 'agg7', 'type': 'cardinality', 'fieldNames': ['dim1'], 'byRow': False}, {'name': 'agg8', 'type': 'cardinality', 'fieldNames': ['dim1', 'dim2'], 'byRow': True}, {'name': 'agg9', 'type': 'thetaSketch', 'fieldName': 'dim1', 'isInputThetaSketch': False, 'size': 16384}, {'name': 'agg10', 'type': 'thetaSketch', 'fieldName': 'metric7', 'isInputThetaSketch': False, 'size': 16384}, {'name': 'agg11', 'type': 'thetaSketch', 'fieldName': 'metric8', 'isInputThetaSketch': True, 'size': 8192} ] assert (sorted(built_agg, key=itemgetter('name')) == sorted(expected, key=itemgetter('name')))
def test_druid_returns_results(self, mock_urlopen): # given response = Mock() response.read.return_value = """ [ { "timestamp" : "2015-12-30T14:14:49.000Z", "result" : [ { "dimension" : "aaaa", "metric" : 100 } ] } ] """.encode("utf-8") mock_urlopen.return_value = response client = create_client() # when top = client.topn(datasource="testdatasource", granularity="all", intervals="2015-12-29/pt1h", aggregations={"count": doublesum("count")}, dimension="user_name", metric="count", filter=Dimension("user_lang") == "en", threshold=1, context={"timeout": 1000}) # then assert top is not None assert len(top.result) == 1 assert len(top.result[0]['result']) == 1
def test_client_allows_to_export_last_query(self, mock_urlopen): # given response = Mock() response.read.return_value = """ [ { "timestamp" : "2015-12-30T14:14:49.000Z", "result" : [ { "dimension" : "aaaa", "metric" : 100 } ] } ] """.encode("utf-8") mock_urlopen.return_value = response client = create_client() client.topn( datasource="testdatasource", granularity="all", intervals="2015-12-29/pt1h", aggregations={"count": doublesum("count")}, dimension="user_name", metric="count", filter=Dimension("user_lang") == "en", threshold=1, context={"timeout": 1000}) # when / then # assert that last_query.export_tsv method was called (it should throw an exception, given empty path) with pytest.raises(TypeError): client.export_tsv(None)
def test_druid_returns_results(self, mock_urlopen): # given response = Mock() response.read.return_value = """ [ { "timestamp" : "2015-12-30T14:14:49.000Z", "result" : [ { "dimension" : "aaaa", "metric" : 100 } ] } ] """.encode("utf-8") mock_urlopen.return_value = response client = create_client() # when top = client.topn( datasource="testdatasource", granularity="all", intervals="2015-12-29/pt1h", aggregations={"count": doublesum("count")}, dimension="user_name", metric="count", filter=Dimension("user_lang") == "en", threshold=1, context={"timeout": 1000}) # then assert top is not None assert len(top.result) == 1 assert len(top.result[0]['result']) == 1
def test_client_allows_to_export_last_query(self, mock_urlopen): # given response = Mock() response.read.return_value = """ [ { "timestamp" : "2015-12-30T14:14:49.000Z", "result" : [ { "dimension" : "aaaa", "metric" : 100 } ] } ] """.encode("utf-8") mock_urlopen.return_value = response client = create_client() client.topn(datasource="testdatasource", granularity="all", intervals="2015-12-29/pt1h", aggregations={"count": doublesum("count")}, dimension="user_name", metric="count", filter=Dimension("user_lang") == "en", threshold=1, context={"timeout": 1000}) # when / then # assert that last_query.export_tsv method was called (it should throw an exception, given empty path) with pytest.raises(TypeError): client.export_tsv(None)
def test_build_filtered_aggregator(self): filter_ = filters.Filter(dimension="dim", value="val") agg_input = { "agg1": aggregators.filtered(filter_, aggregators.count("metric1")), "agg2": aggregators.filtered(filter_, aggregators.longsum("metric2")), "agg3": aggregators.filtered(filter_, aggregators.doublesum("metric3")), "agg4": aggregators.filtered(filter_, aggregators.min("metric4")), "agg5": aggregators.filtered(filter_, aggregators.max("metric5")), "agg6": aggregators.filtered(filter_, aggregators.hyperunique("metric6")), "agg7": aggregators.filtered(filter_, aggregators.cardinality("dim1")), "agg8": aggregators.filtered(filter_, aggregators.cardinality(["dim1", "dim2"], by_row=True)), } base = {"type": "filtered", "filter": {"type": "selector", "dimension": "dim", "value": "val"}} aggs = [ {"name": "agg1", "type": "count", "fieldName": "metric1"}, {"name": "agg2", "type": "longSum", "fieldName": "metric2"}, {"name": "agg3", "type": "doubleSum", "fieldName": "metric3"}, {"name": "agg4", "type": "min", "fieldName": "metric4"}, {"name": "agg5", "type": "max", "fieldName": "metric5"}, {"name": "agg6", "type": "hyperUnique", "fieldName": "metric6"}, {"name": "agg7", "type": "cardinality", "fieldNames": ["dim1"], "byRow": False}, {"name": "agg8", "type": "cardinality", "fieldNames": ["dim1", "dim2"], "byRow": True}, ] expected = [] for agg in aggs: exp = deepcopy(base) exp.update({"aggregator": agg}) expected.append(exp) built_agg = aggregators.build_aggregators(agg_input) expected = sorted(built_agg, key=lambda k: itemgetter("name")(itemgetter("aggregator")(k))) actual = sorted(expected, key=lambda k: itemgetter("name")(itemgetter("aggregator")(k))) assert expected == actual
def test_filtered_aggregator(self): filter_ = filters.Filter(dimension="dim", value="val") aggs = [ aggregators.count("metric1"), aggregators.longsum("metric2"), aggregators.doublesum("metric3"), aggregators.doublemin("metric4"), aggregators.doublemax("metric5"), aggregators.hyperunique("metric6"), aggregators.cardinality("dim1"), aggregators.cardinality(["dim1", "dim2"], by_row=True), aggregators.thetasketch("dim1"), aggregators.thetasketch("metric7"), aggregators.thetasketch("metric8", isinputthetasketch=True, size=8192), ] for agg in aggs: expected = { "type": "filtered", "filter": { "type": "selector", "dimension": "dim", "value": "val" }, "aggregator": agg, } actual = aggregators.filtered(filter_, agg) assert actual == expected
def test_filtered_aggregator(self): filter_ = filters.Filter(dimension='dim', value='val') aggs = [aggregators.count('metric1'), aggregators.longsum('metric2'), aggregators.doublesum('metric3'), aggregators.doublemin('metric4'), aggregators.doublemax('metric5'), aggregators.hyperunique('metric6'), aggregators.cardinality('dim1'), aggregators.cardinality(['dim1', 'dim2'], by_row=True), aggregators.thetasketch('dim1'), aggregators.thetasketch('metric7'), aggregators.thetasketch('metric8', isinputthetasketch=True, size=8192) ] for agg in aggs: expected = { 'type': 'filtered', 'filter': { 'type': 'selector', 'dimension': 'dim', 'value': 'val' }, 'aggregator': agg } actual = aggregators.filtered(filter_, agg) assert actual == expected
def test_build_aggregators(self): agg_input = { 'agg1': aggregators.count('metric1'), 'agg2': aggregators.longsum('metric2'), 'agg3': aggregators.doublesum('metric3'), 'agg4': aggregators.min('metric4'), 'agg5': aggregators.max('metric5'), 'agg6': aggregators.hyperunique('metric6'), 'agg7': aggregators.cardinality('dim1'), 'agg8': aggregators.cardinality(['dim1', 'dim2'], by_row=True) } built_agg = aggregators.build_aggregators(agg_input) expected = [ { 'name': 'agg1', 'type': 'count', 'fieldName': 'metric1' }, { 'name': 'agg2', 'type': 'longSum', 'fieldName': 'metric2' }, { 'name': 'agg3', 'type': 'doubleSum', 'fieldName': 'metric3' }, { 'name': 'agg4', 'type': 'min', 'fieldName': 'metric4' }, { 'name': 'agg5', 'type': 'max', 'fieldName': 'metric5' }, { 'name': 'agg6', 'type': 'hyperUnique', 'fieldName': 'metric6' }, { 'name': 'agg7', 'type': 'cardinality', 'fieldNames': ['dim1'], 'byRow': False }, { 'name': 'agg8', 'type': 'cardinality', 'fieldNames': ['dim1', 'dim2'], 'byRow': True }, ] assert (sorted(built_agg, key=itemgetter('name')) == sorted( expected, key=itemgetter('name')))
def druid_timeseries_query_args(self): return { 'datasource': 'pageviews-hourly', 'granularity': self._granularity, 'intervals': self._interval, 'aggregations': { 'pageviews': doublesum('view_count') }, 'filter': self.druid_filter() }
def player_score_count(): """Route to retrieve count of each strike type per player""" group = druid_client.groupby( datasource='denormalized_strike_events', granularity='all', intervals='2015-01-01/2018-01-01', dimensions=["first_name", "last_name", "strike_type"], aggregations={"count": doublesum("count")}) return group.result_json
def __init__(self, dimension, field, aggregation_suffix=''): self.sum_key = '%s%s' % (field, aggregation_suffix) self.dimension = dimension self.dimension_filter = Filter(dimension=dimension, value=field) aggregations = { self.sum_key: filtered_aggregator(filter=self.dimension_filter, agg=doublesum('sum')) } super(SumCalculation, self).__init__(aggregations=aggregations)
def __init__(self, query_client=None): ''' Class to query pydruid and return the data as a pandas dataframe. Pivoted to contain ''' self.datasource = DATASOURCE.name self.granularity = 'month' self.intervals = '%s/%s' % (START_DATE_STR, TODAY_DATE_STR) self.dimensions = [] self.field_dimension = DEFAULT_FIELD self.filter = DEFAULT_FILTER self.agg_alias = 'sum' self.aggregations = {self.agg_alias: doublesum('sum'), 'count': count('count')} self.query_client = query_client or DruidQueryClient
def test_build_filtered_aggregator(self): filter_ = filters.Filter(dimension='dim', value='val') agg_input = { 'agg1': aggregators.filtered(filter_, aggregators.count('metric1')), 'agg2': aggregators.filtered(filter_, aggregators.longsum('metric2')), 'agg3': aggregators.filtered(filter_, aggregators.doublesum('metric3')), 'agg4': aggregators.filtered(filter_, aggregators.min('metric4')), 'agg5': aggregators.filtered(filter_, aggregators.max('metric5')), 'agg6': aggregators.filtered(filter_, aggregators.hyperunique('metric6')), 'agg7': aggregators.filtered(filter_, aggregators.cardinality('dim1')), 'agg8': aggregators.filtered(filter_, aggregators.cardinality(['dim1', 'dim2'], by_row=True)), } base = { 'type': 'filtered', 'filter': { 'type': 'selector', 'dimension': 'dim', 'value': 'val' } } aggs = [ {'name': 'agg1', 'type': 'count', 'fieldName': 'metric1'}, {'name': 'agg2', 'type': 'longSum', 'fieldName': 'metric2'}, {'name': 'agg3', 'type': 'doubleSum', 'fieldName': 'metric3'}, {'name': 'agg4', 'type': 'min', 'fieldName': 'metric4'}, {'name': 'agg5', 'type': 'max', 'fieldName': 'metric5'}, {'name': 'agg6', 'type': 'hyperUnique', 'fieldName': 'metric6'}, {'name': 'agg7', 'type': 'cardinality', 'fieldNames': ['dim1'], 'byRow': False}, {'name': 'agg8', 'type': 'cardinality', 'fieldNames': ['dim1', 'dim2'], 'byRow': True}, ] expected = [] for agg in aggs: exp = deepcopy(base) exp.update({'aggregator': agg}) expected.append(exp) built_agg = aggregators.build_aggregators(agg_input) expected = sorted(built_agg, key=lambda k: itemgetter('name')( itemgetter('aggregator')(k))) actual = sorted(expected, key=lambda k: itemgetter('name')( itemgetter('aggregator')(k))) assert expected == actual
def test_cube_query(self): query = PyDruid("http://pipeline.qiniu.com", 'v2/stream/cubes/query') query.set_qiniu("", "") top = query.topn( datasource='domain_top_statics', granularity='all', intervals='2019-08-13/pt1h', # utc time of 2014 oscars aggregations={'count': doublesum('count')}, metric='count', dimension='Country', threshold=10) df = query.export_pandas() print(df) top.export_tsv('top.tsv')
def _parse_metric(self): if self._metric == 'uv': return {"aggregations": {"result": cardinality(self._field)}} elif self._metric == 'pv': return {"aggregations": {"result": count(self._field)}} elif self._metric == 'longsum': return {"aggregations": {"result": longsum(self._field)}} elif self._metric == 'doublesum': return {"aggregations": {"result": doublesum(self._field)}} else: raise ParseArgException("Parse metric failed")
def test_druid_returns_error(self): # given client = AsyncPyDruid("http://localhost:%s" % (self.get_http_port(), ), "druid/v2/fail_request") # when / then with pytest.raises(IOError): yield client.topn(datasource="testdatasource", granularity="all", intervals="2015-12-29/pt1h", aggregations={"count": doublesum("count")}, dimension="user_name", metric="count", filter=Dimension("user_lang") == "en", threshold=1, context={"timeout": 1000})
def test_druid_returns_error(self, mock_urlopen): # given mock_urlopen.side_effect = _http_error(500, "Druid error") client = create_client() # when / then with pytest.raises(IOError): client.topn(datasource="testdatasource", granularity="all", intervals="2015-12-29/pt1h", aggregations={"count": doublesum("count")}, dimension="user_name", metric="count", filter=Dimension("user_lang") == "en", threshold=1, context={"timeout": 1000})
def __init__(self, dimension, field, weight_field): super(WeightedAverageCalculation, self).__init__(dimension, field, self.SUFFIX) self.weight_key = '%s%s' % (weight_field, self.SUFFIX) self.weight_filter = Filter(dimension=dimension, value=weight_field) weight_aggregation = { self.weight_key: filtered_aggregator(filter=self.weight_filter, agg=doublesum('sum')) } self.add_aggregations(weight_aggregation) weighted_avg = '%s / %s' % (self.sum_key, self.weight_key) self.add_post_aggregation_from_formula(field, weighted_avg)
def strike_type_count(): """Route to retrieve count of each strike type.""" group = druid_client.groupby( datasource='denormalized_strike_events', granularity='all', intervals='2015-01-01/2018-01-01', dimensions=["strike_type"], aggregations={"count": doublesum("count")}) strike_type_count = {} for entry in group.result: key = entry["event"]["strike_type"] value = int(entry["event"]["count"]) strike_type_count[key] = value return strike_type_count
def test_druid_returns_error(self): # given client = AsyncPyDruid("http://localhost:%s" % (self.get_http_port(), ), "druid/v2/fail_request") # when / then with pytest.raises(IOError): yield client.topn( datasource="testdatasource", granularity="all", intervals="2015-12-29/pt1h", aggregations={"count": doublesum("count")}, dimension="user_name", metric="count", filter=Dimension("user_lang") == "en", threshold=1, context={"timeout": 1000})
def get(self, request): query = create_druid_client() last_week_start_date = (datetime.now() - timedelta(days=7)).strftime("%Y-%m-%d") today_date = datetime.now().strftime("%Y-%m-%d") query_result = query.groupby( datasource='celtra3', granularity='week', dimensions=['adId'], intervals=["{0}/{1}".format(last_week_start_date, today_date)], aggregations={ 'user': hyperunique('user'), 'impressions': doublesum('impressions') }, ) return Response(query_result.result)
def strikes_of_a_type(strike_event): """Returns total count of a type of a selected strike event.""" group = druid_client.groupby( datasource='denormalized_strike_events', granularity='all', intervals='2015-01-01/2018-01-01', dimensions=["strike_type"], filter=Dimension("strike_type") == escape(strike_event), aggregations={"count": doublesum("count")}) strike_type_count = {} for entry in group.result: key = entry["event"]["strike_type"] value = int(entry["event"]["count"]) strike_type_count[key] = value return strike_type_count
def test_client_allows_to_export_last_query(self): # given client = AsyncPyDruid("http://localhost:%s" % (self.get_http_port(), ), "druid/v2/return_results") yield client.topn(datasource="testdatasource", granularity="all", intervals="2015-12-29/pt1h", aggregations={"count": doublesum("count")}, dimension="user_name", metric="count", filter=Dimension("user_lang") == "en", threshold=1, context={"timeout": 1000}) # when / then # assert that last_query.export_tsv method was called (it should throw an exception, given empty path) with pytest.raises(TypeError): client.export_tsv(None)
def test_druid_returns_error(self, mock_urlopen): # given ex = urllib.error.HTTPError(None, 500, "Druid error", None, None) mock_urlopen.side_effect = ex client = create_client() # when / then with pytest.raises(IOError): client.topn( datasource="testdatasource", granularity="all", intervals="2015-12-29/pt1h", aggregations={"count": doublesum("count")}, dimension="user_name", metric="count", filter=Dimension("user_lang") == "en", threshold=1, context={"timeout": 1000})
def test_client_allows_to_export_last_query(self): # given client = AsyncPyDruid("http://localhost:%s" % (self.get_http_port(), ), "druid/v2/return_results") yield client.topn( datasource="testdatasource", granularity="all", intervals="2015-12-29/pt1h", aggregations={"count": doublesum("count")}, dimension="user_name", metric="count", filter=Dimension("user_lang") == "en", threshold=1, context={"timeout": 1000}) # when / then # assert that last_query.export_tsv method was called (it should throw an exception, given empty path) with pytest.raises(TypeError): client.export_tsv(None)
def test_client_allows_passing_default_parameters(self): # given client = AsyncPyDruid("http://localhost:%s" % (self.get_http_port(), ), "druid/v2/return_results", defaults=dict(request_timeout=120)) top = yield client.topn(datasource="testdatasource", granularity="all", intervals="2015-12-29/pt1h", aggregations={"count": doublesum("count")}, dimension="user_name", metric="count", filter=Dimension("user_lang") == "en", threshold=1, context={"timeout": 1000}) # then self.assertIsNotNone(top) self.assertEqual(len(top.result), 1) self.assertEqual(len(top.result[0]['result']), 1)
def test_build_aggregators(self): agg_input = { 'agg1': aggregators.count('metric1'), 'agg2': aggregators.longsum('metric2'), 'agg3': aggregators.doublesum('metric3'), 'agg4': aggregators.min('metric4'), 'agg5': aggregators.max('metric5'), 'agg6': aggregators.hyperunique('metric6') } built_agg = aggregators.build_aggregators(agg_input) expected = [ {'name': 'agg1', 'type': 'count', 'fieldName': 'metric1'}, {'name': 'agg2', 'type': 'longSum', 'fieldName': 'metric2'}, {'name': 'agg3', 'type': 'doubleSum', 'fieldName': 'metric3'}, {'name': 'agg4', 'type': 'min', 'fieldName': 'metric4'}, {'name': 'agg5', 'type': 'max', 'fieldName': 'metric5'}, {'name': 'agg6', 'type': 'hyperUnique', 'fieldName': 'metric6'}, ] assert (sorted(built_agg, key=itemgetter('name')) == sorted(expected, key=itemgetter('name')))
def test_filtered_aggregator(self): filter_ = filters.Filter(dimension="dim", value="val") aggs = [ aggregators.count("metric1"), aggregators.longsum("metric2"), aggregators.doublesum("metric3"), aggregators.min("metric4"), aggregators.max("metric5"), aggregators.hyperunique("metric6"), aggregators.cardinality("dim1"), aggregators.cardinality(["dim1", "dim2"], by_row=True), ] for agg in aggs: expected = { "type": "filtered", "filter": {"type": "selector", "dimension": "dim", "value": "val"}, "aggregator": agg, } actual = aggregators.filtered(filter_, agg) assert actual == expected
def positions_delta(self, product_name, min_num_employees, start_dt, end_dt): """ :type product_name: Union[str,unicode] :type min_num_employees: int :type start_dt: datetime :type end_dt: datetime """ query = self.client.timeseries( datasource=TABLE_NAME, granularity='month', intervals=[start_dt.strftime(YMD_FORMAT) + '/' + end_dt.strftime(YMD_FORMAT)], filter=((Dimension('product_name') == product_name) & (Dimension('customer_num_employees') > min_num_employees)), aggregations={"qty": doublesum("qty")}, ) print query.result delta = 0 for item in query.result: delta += item['result']['qty'] return delta
def test_filtered_aggregator(self): filter_ = filters.Filter(dimension='dim', value='val') aggs = [aggregators.count('metric1'), aggregators.longsum('metric2'), aggregators.doublesum('metric3'), aggregators.min('metric4'), aggregators.max('metric5'), aggregators.hyperunique('metric6')] for agg in aggs: expected = { 'type': 'filtered', 'filter': { 'type': 'selector', 'dimension': 'dim', 'value': 'val' }, 'aggregator': agg } actual = aggregators.filtered(filter_, agg) assert actual == expected
def test_druid_returns_results(self): # given client = AsyncPyDruid("http://localhost:%s" % (self.get_http_port(), ), "druid/v2/return_results") # when top = yield client.topn( datasource="testdatasource", granularity="all", intervals="2015-12-29/pt1h", aggregations={"count": doublesum("count")}, dimension="user_name", metric="count", filter=Dimension("user_lang") == "en", threshold=1, context={"timeout": 1000}) # then self.assertIsNotNone(top) self.assertEqual(len(top.result), 1) self.assertEqual(len(top.result[0]['result']), 1)
def test_build_aggregators(self): agg_input = { "agg1": aggregators.count("metric1"), "agg2": aggregators.longsum("metric2"), "agg3": aggregators.doublesum("metric3"), "agg4": aggregators.min("metric4"), "agg5": aggregators.max("metric5"), "agg6": aggregators.hyperunique("metric6"), "agg7": aggregators.cardinality("dim1"), "agg8": aggregators.cardinality(["dim1", "dim2"], by_row=True), } built_agg = aggregators.build_aggregators(agg_input) expected = [ {"name": "agg1", "type": "count", "fieldName": "metric1"}, {"name": "agg2", "type": "longSum", "fieldName": "metric2"}, {"name": "agg3", "type": "doubleSum", "fieldName": "metric3"}, {"name": "agg4", "type": "min", "fieldName": "metric4"}, {"name": "agg5", "type": "max", "fieldName": "metric5"}, {"name": "agg6", "type": "hyperUnique", "fieldName": "metric6"}, {"name": "agg7", "type": "cardinality", "fieldNames": ["dim1"], "byRow": False}, {"name": "agg8", "type": "cardinality", "fieldNames": ["dim1", "dim2"], "byRow": True}, ] assert sorted(built_agg, key=itemgetter("name")) == sorted(expected, key=itemgetter("name"))
def test_build_aggregators(self): agg_input = { 'agg1': aggregators.count('metric1'), 'agg2': aggregators.longsum('metric2'), 'agg3': aggregators.doublesum('metric3'), 'agg4': aggregators.min('metric4'), 'agg5': aggregators.max('metric5'), 'agg6': aggregators.hyperunique('metric6'), 'agg7': aggregators.cardinality('dim1'), 'agg8': aggregators.cardinality(['dim1', 'dim2'], by_row=True) } built_agg = aggregators.build_aggregators(agg_input) expected = [ {'name': 'agg1', 'type': 'count', 'fieldName': 'metric1'}, {'name': 'agg2', 'type': 'longSum', 'fieldName': 'metric2'}, {'name': 'agg3', 'type': 'doubleSum', 'fieldName': 'metric3'}, {'name': 'agg4', 'type': 'min', 'fieldName': 'metric4'}, {'name': 'agg5', 'type': 'max', 'fieldName': 'metric5'}, {'name': 'agg6', 'type': 'hyperUnique', 'fieldName': 'metric6'}, {'name': 'agg7', 'type': 'cardinality', 'fieldNames': ['dim1'], 'byRow': False}, {'name': 'agg8', 'type': 'cardinality', 'fieldNames': ['dim1', 'dim2'], 'byRow': True}, ] assert (sorted(built_agg, key=itemgetter('name')) == sorted(expected, key=itemgetter('name')))
def test_build_filtered_aggregator(self): filter_ = filters.Filter(dimension='dim', value='val') agg_input = { 'agg1': aggregators.filtered(filter_, aggregators.count('metric1')), 'agg2': aggregators.filtered(filter_, aggregators.longsum('metric2')), 'agg3': aggregators.filtered(filter_, aggregators.doublesum('metric3')), 'agg4': aggregators.filtered(filter_, aggregators.doublemin('metric4')), 'agg5': aggregators.filtered(filter_, aggregators.doublemax('metric5')), 'agg6': aggregators.filtered(filter_, aggregators.hyperunique('metric6')), 'agg7': aggregators.filtered(filter_, aggregators.cardinality('dim1')), 'agg8': aggregators.filtered(filter_, aggregators.cardinality(['dim1', 'dim2'], by_row=True)), 'agg9': aggregators.filtered(filter_, aggregators.thetasketch('dim1')), 'agg10': aggregators.filtered(filter_, aggregators.thetasketch('metric7')), 'agg11': aggregators.filtered(filter_, aggregators.thetasketch('metric8', isinputthetasketch = True, size=8192)), } base = { 'type': 'filtered', 'filter': { 'type': 'selector', 'dimension': 'dim', 'value': 'val' } } aggs = [ {'name': 'agg1', 'type': 'count', 'fieldName': 'metric1'}, {'name': 'agg2', 'type': 'longSum', 'fieldName': 'metric2'}, {'name': 'agg3', 'type': 'doubleSum', 'fieldName': 'metric3'}, {'name': 'agg4', 'type': 'doubleMin', 'fieldName': 'metric4'}, {'name': 'agg5', 'type': 'doubleMax', 'fieldName': 'metric5'}, {'name': 'agg6', 'type': 'hyperUnique', 'fieldName': 'metric6'}, {'name': 'agg7', 'type': 'cardinality', 'fieldNames': ['dim1'], 'byRow': False}, {'name': 'agg8', 'type': 'cardinality', 'fieldNames': ['dim1', 'dim2'], 'byRow': True}, {'name': 'agg9', 'type': 'thetaSketch', 'fieldName': 'dim1', 'isInputThetaSketch': False, 'size': 16384}, {'name': 'agg10', 'type': 'thetaSketch', 'fieldName': 'metric7', 'isInputThetaSketch': False, 'size': 16384}, {'name': 'agg11', 'type': 'thetaSketch', 'fieldName': 'metric8', 'isInputThetaSketch': True, 'size': 8192} ] expected = [] for agg in aggs: exp = deepcopy(base) exp.update({'aggregator': agg}) expected.append(exp) built_agg = aggregators.build_aggregators(agg_input) expected = sorted(built_agg, key=lambda k: itemgetter('name')( itemgetter('aggregator')(k))) actual = sorted(expected, key=lambda k: itemgetter('name')( itemgetter('aggregator')(k))) assert expected == actual
from pydruid.client import * from pydruid.utils.aggregators import doublesum query = PyDruid("http://localhost:32769", 'druid/v2') ts = query.topn(datasource='demo', granularity='all', intervals='2016-10-02/p10w', aggregations={'value': doublesum('value')}, dimension='gdp', metric='value', threshold=10) print(ts.result_json)
def test_druid_returns_html_error(self, mock_urlopen): # given message = textwrap.dedent(""" <html> <head> <meta http-equiv="Content-Type" content="text/html;charset=ISO-8859-1"/> <title>Error 500 </title> </head> <body> <h2>HTTP ERROR: 500</h2> <p>Problem accessing /druid/v2/. Reason: <pre> javax.servlet.ServletException: java.lang.OutOfMemoryError: GC overhead limit exceeded</pre></p> <hr /><a href="http://eclipse.org/jetty">Powered by Jetty:// 9.3.19.v20170502</a><hr/> </body> </html> """).strip() ex = urllib.error.HTTPError(None, 500, message, None, None) mock_urlopen.side_effect = ex client = create_client() # when / then with pytest.raises(IOError) as e: client.topn( datasource="testdatasource", granularity="all", intervals="2015-12-29/pt1h", aggregations={"count": doublesum("count")}, dimension="user_name", metric="count", filter=Dimension("user_lang") == "en", threshold=1, context={"timeout": 1000}) assert str(e.value) == textwrap.dedent(""" HTTP Error 500: <html> <head> <meta http-equiv="Content-Type" content="text/html;charset=ISO-8859-1"/> <title>Error 500 </title> </head> <body> <h2>HTTP ERROR: 500</h2> <p>Problem accessing /druid/v2/. Reason: <pre> javax.servlet.ServletException: java.lang.OutOfMemoryError: GC overhead limit exceeded</pre></p> <hr /><a href="http://eclipse.org/jetty">Powered by Jetty:// 9.3.19.v20170502</a><hr/> </body> </html> Druid Error: javax.servlet.ServletException: java.lang.OutOfMemoryError: GC overhead limit exceeded Query is: { "aggregations": [ { "fieldName": "count", "name": "count", "type": "doubleSum" } ], "context": { "timeout": 1000 }, "dataSource": "testdatasource", "dimension": "user_name", "filter": { "dimension": "user_lang", "type": "selector", "value": "en" }, "granularity": "all", "intervals": "2015-12-29/pt1h", "metric": "count", "queryType": "topN", "threshold": 1 } """).strip()
def sum_calculation(cls, dimension, field, interval_creator): dimension_filter = Filter(dimension=dimension, value=field) base_aggregation = filtered_aggregator(filter=dimension_filter, agg=doublesum('sum')) return cls(field, base_aggregation, interval_creator)
def test_build_filtered_aggregator(self): filter_ = filters.Filter(dimension="dim", value="val") agg_input = { "agg1": aggregators.filtered(filter_, aggregators.count("metric1")), "agg2": aggregators.filtered(filter_, aggregators.longsum("metric2")), "agg3": aggregators.filtered(filter_, aggregators.doublesum("metric3")), "agg4": aggregators.filtered(filter_, aggregators.doublemin("metric4")), "agg5": aggregators.filtered(filter_, aggregators.doublemax("metric5")), "agg6": aggregators.filtered(filter_, aggregators.hyperunique("metric6")), "agg7": aggregators.filtered(filter_, aggregators.cardinality("dim1")), "agg8": aggregators.filtered( filter_, aggregators.cardinality(["dim1", "dim2"], by_row=True)), "agg9": aggregators.filtered(filter_, aggregators.thetasketch("dim1")), "agg10": aggregators.filtered(filter_, aggregators.thetasketch("metric7")), "agg11": aggregators.filtered( filter_, aggregators.thetasketch("metric8", isinputthetasketch=True, size=8192), ), } base = { "type": "filtered", "filter": { "type": "selector", "dimension": "dim", "value": "val" }, } aggs = [ { "name": "agg1", "type": "count", "fieldName": "metric1" }, { "name": "agg2", "type": "longSum", "fieldName": "metric2" }, { "name": "agg3", "type": "doubleSum", "fieldName": "metric3" }, { "name": "agg4", "type": "doubleMin", "fieldName": "metric4" }, { "name": "agg5", "type": "doubleMax", "fieldName": "metric5" }, { "name": "agg6", "type": "hyperUnique", "fieldName": "metric6" }, { "name": "agg7", "type": "cardinality", "fieldNames": ["dim1"], "byRow": False, }, { "name": "agg8", "type": "cardinality", "fieldNames": ["dim1", "dim2"], "byRow": True, }, { "name": "agg9", "type": "thetaSketch", "fieldName": "dim1", "isInputThetaSketch": False, "size": 16384, }, { "name": "agg10", "type": "thetaSketch", "fieldName": "metric7", "isInputThetaSketch": False, "size": 16384, }, { "name": "agg11", "type": "thetaSketch", "fieldName": "metric8", "isInputThetaSketch": True, "size": 8192, }, ] expected = [] for agg in aggs: exp = deepcopy(base) exp.update({"aggregator": agg}) expected.append(exp) built_agg = aggregators.build_aggregators(agg_input) expected = sorted(built_agg, key=lambda k: itemgetter("name") (itemgetter("aggregator")(k))) actual = sorted(expected, key=lambda k: itemgetter("name") (itemgetter("aggregator")(k))) assert expected == actual
def test_build_aggregators(self): agg_input = { "agg1": aggregators.count("metric1"), "agg2": aggregators.longsum("metric2"), "agg3": aggregators.doublesum("metric3"), "agg4": aggregators.doublemin("metric4"), "agg5": aggregators.doublemax("metric5"), "agg6": aggregators.hyperunique("metric6"), "agg7": aggregators.cardinality("dim1"), "agg8": aggregators.cardinality(["dim1", "dim2"], by_row=True), "agg9": aggregators.thetasketch("dim1"), "agg10": aggregators.thetasketch("metric7"), "agg11": aggregators.thetasketch("metric8", isinputthetasketch=True, size=8192), } built_agg = aggregators.build_aggregators(agg_input) expected = [ { "name": "agg1", "type": "count", "fieldName": "metric1" }, { "name": "agg2", "type": "longSum", "fieldName": "metric2" }, { "name": "agg3", "type": "doubleSum", "fieldName": "metric3" }, { "name": "agg4", "type": "doubleMin", "fieldName": "metric4" }, { "name": "agg5", "type": "doubleMax", "fieldName": "metric5" }, { "name": "agg6", "type": "hyperUnique", "fieldName": "metric6" }, { "name": "agg7", "type": "cardinality", "fieldNames": ["dim1"], "byRow": False, }, { "name": "agg8", "type": "cardinality", "fieldNames": ["dim1", "dim2"], "byRow": True, }, { "name": "agg9", "type": "thetaSketch", "fieldName": "dim1", "isInputThetaSketch": False, "size": 16384, }, { "name": "agg10", "type": "thetaSketch", "fieldName": "metric7", "isInputThetaSketch": False, "size": 16384, }, { "name": "agg11", "type": "thetaSketch", "fieldName": "metric8", "isInputThetaSketch": True, "size": 8192, }, ] assert sorted(built_agg, key=itemgetter("name")) == sorted(expected, key=itemgetter("name"))
# draw and show it ax.relim() ax.autoscale_view(True, True, True) fig.canvas.draw() plt.show(block=False) # loop to update the data while True: try: query = PyDruid("http://localhost:8082", 'druid/v2') ts = query.timeseries( datasource=datasource, granularity='minute', intervals='2019-10-29/p4w', aggregations={'count': doublesum('count')}, ) df = query.export_pandas() x = df['timestamp'].map(lambda x: x[8:16]) y = df['count'] # set the new data if not li: li, = ax.plot(x, y) else: li.set_ydata(y) fig.canvas.draw() time.sleep(0.01)