Example #1
0
    def test_druid_returns_html_error(self, mock_urlopen):
        # given
        message = textwrap.dedent("""
            <html>
            <head>
            <meta http-equiv="Content-Type" content="text/html;charset=ISO-8859-1"/>
            <title>Error 500 </title>
            </head>
            <body>
            <h2>HTTP ERROR: 500</h2>
            <p>Problem accessing /druid/v2/. Reason:
            <pre>    javax.servlet.ServletException: java.lang.OutOfMemoryError: GC overhead limit exceeded</pre></p>
            <hr /><a href="http://eclipse.org/jetty">Powered by Jetty:// 9.3.19.v20170502</a><hr/>
            </body>
            </html>
        """).strip()
        mock_urlopen.side_effect = _http_error(500, 'Internal Server Error',
                                               message)
        client = create_client()

        # when / then
        with pytest.raises(IOError) as e:
            client.topn(datasource="testdatasource",
                        granularity="all",
                        intervals="2015-12-29/pt1h",
                        aggregations={"count": doublesum("count")},
                        dimension="user_name",
                        metric="count",
                        filter=Dimension("user_lang") == "en",
                        threshold=1,
                        context={"timeout": 1000})

        assert str(e.value) == textwrap.dedent("""
            HTTP Error 500: Internal Server Error 
             Druid Error: javax.servlet.ServletException: java.lang.OutOfMemoryError: GC overhead limit exceeded 
             Query is: {
                "aggregations": [
                    {
                        "fieldName": "count",
                        "name": "count",
                        "type": "doubleSum"
                    }
                ],
                "context": {
                    "timeout": 1000
                },
                "dataSource": "testdatasource",
                "dimension": "user_name",
                "filter": {
                    "dimension": "user_lang",
                    "type": "selector",
                    "value": "en"
                },
                "granularity": "all",
                "intervals": "2015-12-29/pt1h",
                "metric": "count",
                "queryType": "topN",
                "threshold": 1
            }
        """).strip()
Example #2
0
 def get(self, request, cid):
     campaign_id = cid
     query = create_druid_client()
     start_date = '2017-06-27'
     query_result = query.groupby(
         datasource='celtra3',
         granularity='all',
         dimensions=['adId'],
         intervals=["{0}/p1d".format(start_date)],
         aggregations={
             'swipes': doublesum('swipes'),
             'interactions': doublesum('interactions'),
             'impressions': doublesum('impressions')
         },
         filter=(Dimension('campaignId') == campaign_id))
     return Response(query_result.result)
    def test_build_aggregators(self):
        agg_input = {
            'agg1': aggregators.count('metric1'),
            'agg2': aggregators.longsum('metric2'),
            'agg3': aggregators.doublesum('metric3'),
            'agg4': aggregators.doublemin('metric4'),
            'agg5': aggregators.doublemax('metric5'),
            'agg6': aggregators.hyperunique('metric6'),
            'agg7': aggregators.cardinality('dim1'),
            'agg8': aggregators.cardinality(['dim1', 'dim2'], by_row=True),
            'agg9': aggregators.thetasketch('dim1'),
            'agg10': aggregators.thetasketch('metric7'),
            'agg11': aggregators.thetasketch('metric8', isinputthetasketch = True, size=8192)
        }
        built_agg = aggregators.build_aggregators(agg_input)
        expected = [
            {'name': 'agg1', 'type': 'count', 'fieldName': 'metric1'},
            {'name': 'agg2', 'type': 'longSum', 'fieldName': 'metric2'},
            {'name': 'agg3', 'type': 'doubleSum', 'fieldName': 'metric3'},
            {'name': 'agg4', 'type': 'doubleMin', 'fieldName': 'metric4'},
            {'name': 'agg5', 'type': 'doubleMax', 'fieldName': 'metric5'},
            {'name': 'agg6', 'type': 'hyperUnique', 'fieldName': 'metric6'},
            {'name': 'agg7', 'type': 'cardinality', 'fieldNames': ['dim1'], 'byRow': False},
            {'name': 'agg8', 'type': 'cardinality', 'fieldNames': ['dim1', 'dim2'], 'byRow': True},
            {'name': 'agg9', 'type': 'thetaSketch', 'fieldName': 'dim1', 'isInputThetaSketch': False, 'size': 16384},
            {'name': 'agg10', 'type': 'thetaSketch', 'fieldName': 'metric7', 'isInputThetaSketch': False, 'size': 16384},
            {'name': 'agg11', 'type': 'thetaSketch', 'fieldName': 'metric8', 'isInputThetaSketch': True, 'size': 8192}

        ]
        assert (sorted(built_agg, key=itemgetter('name')) ==
                sorted(expected, key=itemgetter('name')))
Example #4
0
    def test_druid_returns_results(self, mock_urlopen):
        # given
        response = Mock()
        response.read.return_value = """
            [ {
  "timestamp" : "2015-12-30T14:14:49.000Z",
  "result" : [ {
    "dimension" : "aaaa",
    "metric" : 100
  } ]
            } ]
        """.encode("utf-8")
        mock_urlopen.return_value = response
        client = create_client()

        # when
        top = client.topn(datasource="testdatasource",
                          granularity="all",
                          intervals="2015-12-29/pt1h",
                          aggregations={"count": doublesum("count")},
                          dimension="user_name",
                          metric="count",
                          filter=Dimension("user_lang") == "en",
                          threshold=1,
                          context={"timeout": 1000})

        # then
        assert top is not None
        assert len(top.result) == 1
        assert len(top.result[0]['result']) == 1
Example #5
0
    def test_client_allows_to_export_last_query(self, mock_urlopen):
        # given
        response = Mock()
        response.read.return_value = """
            [ {
  "timestamp" : "2015-12-30T14:14:49.000Z",
  "result" : [ {
    "dimension" : "aaaa",
    "metric" : 100
  } ]
            } ]
        """.encode("utf-8")
        mock_urlopen.return_value = response
        client = create_client()
        client.topn(
                datasource="testdatasource",
                granularity="all",
                intervals="2015-12-29/pt1h",
                aggregations={"count": doublesum("count")},
                dimension="user_name",
                metric="count",
                filter=Dimension("user_lang") == "en",
                threshold=1,
                context={"timeout": 1000})

        # when / then
        # assert that last_query.export_tsv method was called (it should throw an exception, given empty path)
        with pytest.raises(TypeError):
            client.export_tsv(None)
Example #6
0
    def test_druid_returns_results(self, mock_urlopen):
        # given
        response = Mock()
        response.read.return_value = """
            [ {
  "timestamp" : "2015-12-30T14:14:49.000Z",
  "result" : [ {
    "dimension" : "aaaa",
    "metric" : 100
  } ]
            } ]
        """.encode("utf-8")
        mock_urlopen.return_value = response
        client = create_client()

        # when
        top = client.topn(
                datasource="testdatasource",
                granularity="all",
                intervals="2015-12-29/pt1h",
                aggregations={"count": doublesum("count")},
                dimension="user_name",
                metric="count",
                filter=Dimension("user_lang") == "en",
                threshold=1,
                context={"timeout": 1000})

        # then
        assert top is not None
        assert len(top.result) == 1
        assert len(top.result[0]['result']) == 1
Example #7
0
    def test_client_allows_to_export_last_query(self, mock_urlopen):
        # given
        response = Mock()
        response.read.return_value = """
            [ {
  "timestamp" : "2015-12-30T14:14:49.000Z",
  "result" : [ {
    "dimension" : "aaaa",
    "metric" : 100
  } ]
            } ]
        """.encode("utf-8")
        mock_urlopen.return_value = response
        client = create_client()
        client.topn(datasource="testdatasource",
                    granularity="all",
                    intervals="2015-12-29/pt1h",
                    aggregations={"count": doublesum("count")},
                    dimension="user_name",
                    metric="count",
                    filter=Dimension("user_lang") == "en",
                    threshold=1,
                    context={"timeout": 1000})

        # when / then
        # assert that last_query.export_tsv method was called (it should throw an exception, given empty path)
        with pytest.raises(TypeError):
            client.export_tsv(None)
Example #8
0
    def test_build_filtered_aggregator(self):
        filter_ = filters.Filter(dimension="dim", value="val")
        agg_input = {
            "agg1": aggregators.filtered(filter_, aggregators.count("metric1")),
            "agg2": aggregators.filtered(filter_, aggregators.longsum("metric2")),
            "agg3": aggregators.filtered(filter_, aggregators.doublesum("metric3")),
            "agg4": aggregators.filtered(filter_, aggregators.min("metric4")),
            "agg5": aggregators.filtered(filter_, aggregators.max("metric5")),
            "agg6": aggregators.filtered(filter_, aggregators.hyperunique("metric6")),
            "agg7": aggregators.filtered(filter_, aggregators.cardinality("dim1")),
            "agg8": aggregators.filtered(filter_, aggregators.cardinality(["dim1", "dim2"], by_row=True)),
        }
        base = {"type": "filtered", "filter": {"type": "selector", "dimension": "dim", "value": "val"}}

        aggs = [
            {"name": "agg1", "type": "count", "fieldName": "metric1"},
            {"name": "agg2", "type": "longSum", "fieldName": "metric2"},
            {"name": "agg3", "type": "doubleSum", "fieldName": "metric3"},
            {"name": "agg4", "type": "min", "fieldName": "metric4"},
            {"name": "agg5", "type": "max", "fieldName": "metric5"},
            {"name": "agg6", "type": "hyperUnique", "fieldName": "metric6"},
            {"name": "agg7", "type": "cardinality", "fieldNames": ["dim1"], "byRow": False},
            {"name": "agg8", "type": "cardinality", "fieldNames": ["dim1", "dim2"], "byRow": True},
        ]
        expected = []
        for agg in aggs:
            exp = deepcopy(base)
            exp.update({"aggregator": agg})
            expected.append(exp)

        built_agg = aggregators.build_aggregators(agg_input)
        expected = sorted(built_agg, key=lambda k: itemgetter("name")(itemgetter("aggregator")(k)))
        actual = sorted(expected, key=lambda k: itemgetter("name")(itemgetter("aggregator")(k)))
        assert expected == actual
Example #9
0
 def test_filtered_aggregator(self):
     filter_ = filters.Filter(dimension="dim", value="val")
     aggs = [
         aggregators.count("metric1"),
         aggregators.longsum("metric2"),
         aggregators.doublesum("metric3"),
         aggregators.doublemin("metric4"),
         aggregators.doublemax("metric5"),
         aggregators.hyperunique("metric6"),
         aggregators.cardinality("dim1"),
         aggregators.cardinality(["dim1", "dim2"], by_row=True),
         aggregators.thetasketch("dim1"),
         aggregators.thetasketch("metric7"),
         aggregators.thetasketch("metric8",
                                 isinputthetasketch=True,
                                 size=8192),
     ]
     for agg in aggs:
         expected = {
             "type": "filtered",
             "filter": {
                 "type": "selector",
                 "dimension": "dim",
                 "value": "val"
             },
             "aggregator": agg,
         }
         actual = aggregators.filtered(filter_, agg)
         assert actual == expected
 def test_filtered_aggregator(self):
     filter_ = filters.Filter(dimension='dim', value='val')
     aggs = [aggregators.count('metric1'),
             aggregators.longsum('metric2'),
             aggregators.doublesum('metric3'),
             aggregators.doublemin('metric4'),
             aggregators.doublemax('metric5'),
             aggregators.hyperunique('metric6'),
             aggregators.cardinality('dim1'),
             aggregators.cardinality(['dim1', 'dim2'], by_row=True),
             aggregators.thetasketch('dim1'),
             aggregators.thetasketch('metric7'),
             aggregators.thetasketch('metric8', isinputthetasketch=True, size=8192)
            ]
     for agg in aggs:
         expected = {
             'type': 'filtered',
             'filter': {
                 'type': 'selector',
                 'dimension': 'dim',
                 'value': 'val'
             },
             'aggregator': agg
         }
         actual = aggregators.filtered(filter_, agg)
         assert actual == expected
Example #11
0
 def test_build_aggregators(self):
     agg_input = {
         'agg1': aggregators.count('metric1'),
         'agg2': aggregators.longsum('metric2'),
         'agg3': aggregators.doublesum('metric3'),
         'agg4': aggregators.min('metric4'),
         'agg5': aggregators.max('metric5'),
         'agg6': aggregators.hyperunique('metric6'),
         'agg7': aggregators.cardinality('dim1'),
         'agg8': aggregators.cardinality(['dim1', 'dim2'], by_row=True)
     }
     built_agg = aggregators.build_aggregators(agg_input)
     expected = [
         {
             'name': 'agg1',
             'type': 'count',
             'fieldName': 'metric1'
         },
         {
             'name': 'agg2',
             'type': 'longSum',
             'fieldName': 'metric2'
         },
         {
             'name': 'agg3',
             'type': 'doubleSum',
             'fieldName': 'metric3'
         },
         {
             'name': 'agg4',
             'type': 'min',
             'fieldName': 'metric4'
         },
         {
             'name': 'agg5',
             'type': 'max',
             'fieldName': 'metric5'
         },
         {
             'name': 'agg6',
             'type': 'hyperUnique',
             'fieldName': 'metric6'
         },
         {
             'name': 'agg7',
             'type': 'cardinality',
             'fieldNames': ['dim1'],
             'byRow': False
         },
         {
             'name': 'agg8',
             'type': 'cardinality',
             'fieldNames': ['dim1', 'dim2'],
             'byRow': True
         },
     ]
     assert (sorted(built_agg, key=itemgetter('name')) == sorted(
         expected, key=itemgetter('name')))
Example #12
0
 def druid_timeseries_query_args(self):
     return {
         'datasource': 'pageviews-hourly',
         'granularity': self._granularity,
         'intervals': self._interval,
         'aggregations': {
             'pageviews': doublesum('view_count')
         },
         'filter': self.druid_filter()
     }
Example #13
0
    def player_score_count():
        """Route to retrieve count of each strike type per player"""

        group = druid_client.groupby(
            datasource='denormalized_strike_events',
            granularity='all',
            intervals='2015-01-01/2018-01-01',
            dimensions=["first_name", "last_name", "strike_type"],
            aggregations={"count": doublesum("count")})

        return group.result_json
Example #14
0
    def __init__(self, dimension, field, aggregation_suffix=''):
        self.sum_key = '%s%s' % (field, aggregation_suffix)
        self.dimension = dimension
        self.dimension_filter = Filter(dimension=dimension, value=field)

        aggregations = {
            self.sum_key:
            filtered_aggregator(filter=self.dimension_filter,
                                agg=doublesum('sum'))
        }

        super(SumCalculation, self).__init__(aggregations=aggregations)
Example #15
0
 def __init__(self, query_client=None):
     ''' Class to query pydruid and return the data as a pandas dataframe.
     Pivoted to contain '''
     self.datasource = DATASOURCE.name
     self.granularity = 'month'
     self.intervals = '%s/%s' % (START_DATE_STR, TODAY_DATE_STR)
     self.dimensions = []
     self.field_dimension = DEFAULT_FIELD
     self.filter = DEFAULT_FILTER
     self.agg_alias = 'sum'
     self.aggregations = {self.agg_alias: doublesum('sum'), 'count': count('count')}
     self.query_client = query_client or DruidQueryClient
Example #16
0
    def test_build_filtered_aggregator(self):
        filter_ = filters.Filter(dimension='dim', value='val')
        agg_input = {
            'agg1': aggregators.filtered(filter_,
                                         aggregators.count('metric1')),
            'agg2': aggregators.filtered(filter_,
                                         aggregators.longsum('metric2')),
            'agg3': aggregators.filtered(filter_,
                                         aggregators.doublesum('metric3')),
            'agg4': aggregators.filtered(filter_,
                                         aggregators.min('metric4')),
            'agg5': aggregators.filtered(filter_,
                                         aggregators.max('metric5')),
            'agg6': aggregators.filtered(filter_,
                                         aggregators.hyperunique('metric6')),
            'agg7': aggregators.filtered(filter_,
                                         aggregators.cardinality('dim1')),
            'agg8': aggregators.filtered(filter_,
                                         aggregators.cardinality(['dim1', 'dim2'], by_row=True)),
        }
        base = {
            'type': 'filtered',
            'filter': {
                'type': 'selector',
                'dimension': 'dim',
                'value': 'val'
            }
        }

        aggs = [
            {'name': 'agg1', 'type': 'count', 'fieldName': 'metric1'},
            {'name': 'agg2', 'type': 'longSum', 'fieldName': 'metric2'},
            {'name': 'agg3', 'type': 'doubleSum', 'fieldName': 'metric3'},
            {'name': 'agg4', 'type': 'min', 'fieldName': 'metric4'},
            {'name': 'agg5', 'type': 'max', 'fieldName': 'metric5'},
            {'name': 'agg6', 'type': 'hyperUnique', 'fieldName': 'metric6'},
            {'name': 'agg7', 'type': 'cardinality', 'fieldNames': ['dim1'], 'byRow': False},
            {'name': 'agg8', 'type': 'cardinality', 'fieldNames': ['dim1', 'dim2'], 'byRow': True},
        ]
        expected = []
        for agg in aggs:
            exp = deepcopy(base)
            exp.update({'aggregator': agg})
            expected.append(exp)

        built_agg = aggregators.build_aggregators(agg_input)
        expected = sorted(built_agg, key=lambda k: itemgetter('name')(
            itemgetter('aggregator')(k)))
        actual = sorted(expected, key=lambda k: itemgetter('name')(
            itemgetter('aggregator')(k)))
        assert expected == actual
Example #17
0
 def test_cube_query(self):
     query = PyDruid("http://pipeline.qiniu.com", 'v2/stream/cubes/query')
     query.set_qiniu("", "")
     top = query.topn(
         datasource='domain_top_statics',
         granularity='all',
         intervals='2019-08-13/pt1h',  # utc time of 2014 oscars
         aggregations={'count': doublesum('count')},
         metric='count',
         dimension='Country',
         threshold=10)
     df = query.export_pandas()
     print(df)
     top.export_tsv('top.tsv')
Example #18
0
    def _parse_metric(self):
        if self._metric == 'uv':
            return {"aggregations": {"result": cardinality(self._field)}}

        elif self._metric == 'pv':
            return {"aggregations": {"result": count(self._field)}}

        elif self._metric == 'longsum':
            return {"aggregations": {"result": longsum(self._field)}}

        elif self._metric == 'doublesum':
            return {"aggregations": {"result": doublesum(self._field)}}

        else:
            raise ParseArgException("Parse metric failed")
Example #19
0
    def test_druid_returns_error(self):
        # given
        client = AsyncPyDruid("http://localhost:%s" % (self.get_http_port(), ),
                              "druid/v2/fail_request")

        # when / then
        with pytest.raises(IOError):
            yield client.topn(datasource="testdatasource",
                              granularity="all",
                              intervals="2015-12-29/pt1h",
                              aggregations={"count": doublesum("count")},
                              dimension="user_name",
                              metric="count",
                              filter=Dimension("user_lang") == "en",
                              threshold=1,
                              context={"timeout": 1000})
Example #20
0
    def test_druid_returns_error(self, mock_urlopen):
        # given
        mock_urlopen.side_effect = _http_error(500, "Druid error")
        client = create_client()

        # when / then
        with pytest.raises(IOError):
            client.topn(datasource="testdatasource",
                        granularity="all",
                        intervals="2015-12-29/pt1h",
                        aggregations={"count": doublesum("count")},
                        dimension="user_name",
                        metric="count",
                        filter=Dimension("user_lang") == "en",
                        threshold=1,
                        context={"timeout": 1000})
Example #21
0
    def __init__(self, dimension, field, weight_field):
        super(WeightedAverageCalculation,
              self).__init__(dimension, field, self.SUFFIX)

        self.weight_key = '%s%s' % (weight_field, self.SUFFIX)
        self.weight_filter = Filter(dimension=dimension, value=weight_field)

        weight_aggregation = {
            self.weight_key:
            filtered_aggregator(filter=self.weight_filter,
                                agg=doublesum('sum'))
        }
        self.add_aggregations(weight_aggregation)

        weighted_avg = '%s / %s' % (self.sum_key, self.weight_key)
        self.add_post_aggregation_from_formula(field, weighted_avg)
Example #22
0
    def strike_type_count():
        """Route to retrieve count of each strike type."""

        group = druid_client.groupby(
            datasource='denormalized_strike_events',
            granularity='all',
            intervals='2015-01-01/2018-01-01',
            dimensions=["strike_type"],
            aggregations={"count": doublesum("count")})
        strike_type_count = {}
        for entry in group.result:
            key = entry["event"]["strike_type"]
            value = int(entry["event"]["count"])
            strike_type_count[key] = value

        return strike_type_count
Example #23
0
    def test_druid_returns_error(self):
        # given
        client = AsyncPyDruid("http://localhost:%s" % (self.get_http_port(), ),
                              "druid/v2/fail_request")

        # when / then
        with pytest.raises(IOError):
            yield client.topn(
                    datasource="testdatasource",
                    granularity="all",
                    intervals="2015-12-29/pt1h",
                    aggregations={"count": doublesum("count")},
                    dimension="user_name",
                    metric="count",
                    filter=Dimension("user_lang") == "en",
                    threshold=1,
                    context={"timeout": 1000})
Example #24
0
    def get(self, request):
        query = create_druid_client()
        last_week_start_date = (datetime.now() -
                                timedelta(days=7)).strftime("%Y-%m-%d")
        today_date = datetime.now().strftime("%Y-%m-%d")

        query_result = query.groupby(
            datasource='celtra3',
            granularity='week',
            dimensions=['adId'],
            intervals=["{0}/{1}".format(last_week_start_date, today_date)],
            aggregations={
                'user': hyperunique('user'),
                'impressions': doublesum('impressions')
            },
        )

        return Response(query_result.result)
Example #25
0
    def strikes_of_a_type(strike_event):
        """Returns total count of a type of a selected strike event."""

        group = druid_client.groupby(
            datasource='denormalized_strike_events',
            granularity='all',
            intervals='2015-01-01/2018-01-01',
            dimensions=["strike_type"],
            filter=Dimension("strike_type") == escape(strike_event),
            aggregations={"count": doublesum("count")})

        strike_type_count = {}
        for entry in group.result:
            key = entry["event"]["strike_type"]
            value = int(entry["event"]["count"])
            strike_type_count[key] = value

        return strike_type_count
Example #26
0
    def test_client_allows_to_export_last_query(self):
        # given
        client = AsyncPyDruid("http://localhost:%s" % (self.get_http_port(), ),
                              "druid/v2/return_results")
        yield client.topn(datasource="testdatasource",
                          granularity="all",
                          intervals="2015-12-29/pt1h",
                          aggregations={"count": doublesum("count")},
                          dimension="user_name",
                          metric="count",
                          filter=Dimension("user_lang") == "en",
                          threshold=1,
                          context={"timeout": 1000})

        # when / then
        # assert that last_query.export_tsv method was called (it should throw an exception, given empty path)
        with pytest.raises(TypeError):
            client.export_tsv(None)
Example #27
0
    def test_druid_returns_error(self, mock_urlopen):
        # given
        ex = urllib.error.HTTPError(None, 500, "Druid error", None, None)
        mock_urlopen.side_effect = ex
        client = create_client()

        # when / then
        with pytest.raises(IOError):
            client.topn(
                    datasource="testdatasource",
                    granularity="all",
                    intervals="2015-12-29/pt1h",
                    aggregations={"count": doublesum("count")},
                    dimension="user_name",
                    metric="count",
                    filter=Dimension("user_lang") == "en",
                    threshold=1,
                    context={"timeout": 1000})
Example #28
0
    def test_client_allows_to_export_last_query(self):
        # given
        client = AsyncPyDruid("http://localhost:%s" % (self.get_http_port(), ),
                              "druid/v2/return_results")
        yield client.topn(
                datasource="testdatasource",
                granularity="all",
                intervals="2015-12-29/pt1h",
                aggregations={"count": doublesum("count")},
                dimension="user_name",
                metric="count",
                filter=Dimension("user_lang") == "en",
                threshold=1,
                context={"timeout": 1000})

        # when / then
        # assert that last_query.export_tsv method was called (it should throw an exception, given empty path)
        with pytest.raises(TypeError):
            client.export_tsv(None)
Example #29
0
    def test_client_allows_passing_default_parameters(self):
        # given
        client = AsyncPyDruid("http://localhost:%s" % (self.get_http_port(), ),
                              "druid/v2/return_results",
                              defaults=dict(request_timeout=120))
        top = yield client.topn(datasource="testdatasource",
                                granularity="all",
                                intervals="2015-12-29/pt1h",
                                aggregations={"count": doublesum("count")},
                                dimension="user_name",
                                metric="count",
                                filter=Dimension("user_lang") == "en",
                                threshold=1,
                                context={"timeout": 1000})

        # then
        self.assertIsNotNone(top)
        self.assertEqual(len(top.result), 1)
        self.assertEqual(len(top.result[0]['result']), 1)
Example #30
0
 def test_build_aggregators(self):
     agg_input = {
         'agg1': aggregators.count('metric1'),
         'agg2': aggregators.longsum('metric2'),
         'agg3': aggregators.doublesum('metric3'),
         'agg4': aggregators.min('metric4'),
         'agg5': aggregators.max('metric5'),
         'agg6': aggregators.hyperunique('metric6')
     }
     built_agg = aggregators.build_aggregators(agg_input)
     expected = [
         {'name': 'agg1', 'type': 'count', 'fieldName': 'metric1'},
         {'name': 'agg2', 'type': 'longSum', 'fieldName': 'metric2'},
         {'name': 'agg3', 'type': 'doubleSum', 'fieldName': 'metric3'},
         {'name': 'agg4', 'type': 'min', 'fieldName': 'metric4'},
         {'name': 'agg5', 'type': 'max', 'fieldName': 'metric5'},
         {'name': 'agg6', 'type': 'hyperUnique', 'fieldName': 'metric6'},
     ]
     assert (sorted(built_agg, key=itemgetter('name')) ==
             sorted(expected, key=itemgetter('name')))
Example #31
0
 def test_filtered_aggregator(self):
     filter_ = filters.Filter(dimension="dim", value="val")
     aggs = [
         aggregators.count("metric1"),
         aggregators.longsum("metric2"),
         aggregators.doublesum("metric3"),
         aggregators.min("metric4"),
         aggregators.max("metric5"),
         aggregators.hyperunique("metric6"),
         aggregators.cardinality("dim1"),
         aggregators.cardinality(["dim1", "dim2"], by_row=True),
     ]
     for agg in aggs:
         expected = {
             "type": "filtered",
             "filter": {"type": "selector", "dimension": "dim", "value": "val"},
             "aggregator": agg,
         }
         actual = aggregators.filtered(filter_, agg)
         assert actual == expected
Example #32
0
 def positions_delta(self, product_name, min_num_employees, start_dt, end_dt):
     """
     :type product_name: Union[str,unicode]
     :type min_num_employees: int
     :type start_dt: datetime
     :type end_dt: datetime
     """
     query = self.client.timeseries(
         datasource=TABLE_NAME,
         granularity='month',
         intervals=[start_dt.strftime(YMD_FORMAT) + '/' + end_dt.strftime(YMD_FORMAT)],
         filter=((Dimension('product_name') == product_name) &
                 (Dimension('customer_num_employees') > min_num_employees)),
         aggregations={"qty": doublesum("qty")},
     )
     print query.result
     delta = 0
     for item in query.result:
         delta += item['result']['qty']
     return delta
Example #33
0
 def test_filtered_aggregator(self):
     filter_ = filters.Filter(dimension='dim', value='val')
     aggs = [aggregators.count('metric1'),
             aggregators.longsum('metric2'),
             aggregators.doublesum('metric3'),
             aggregators.min('metric4'),
             aggregators.max('metric5'),
             aggregators.hyperunique('metric6')]
     for agg in aggs:
         expected = {
             'type': 'filtered',
             'filter': {
                 'type': 'selector',
                 'dimension': 'dim',
                 'value': 'val'
             },
             'aggregator': agg
         }
         actual = aggregators.filtered(filter_, agg)
         assert actual == expected
Example #34
0
    def test_druid_returns_results(self):
        # given
        client = AsyncPyDruid("http://localhost:%s" % (self.get_http_port(), ),
                              "druid/v2/return_results")

        # when
        top = yield client.topn(
                datasource="testdatasource",
                granularity="all",
                intervals="2015-12-29/pt1h",
                aggregations={"count": doublesum("count")},
                dimension="user_name",
                metric="count",
                filter=Dimension("user_lang") == "en",
                threshold=1,
                context={"timeout": 1000})

        # then
        self.assertIsNotNone(top)
        self.assertEqual(len(top.result), 1)
        self.assertEqual(len(top.result[0]['result']), 1)
Example #35
0
 def test_build_aggregators(self):
     agg_input = {
         "agg1": aggregators.count("metric1"),
         "agg2": aggregators.longsum("metric2"),
         "agg3": aggregators.doublesum("metric3"),
         "agg4": aggregators.min("metric4"),
         "agg5": aggregators.max("metric5"),
         "agg6": aggregators.hyperunique("metric6"),
         "agg7": aggregators.cardinality("dim1"),
         "agg8": aggregators.cardinality(["dim1", "dim2"], by_row=True),
     }
     built_agg = aggregators.build_aggregators(agg_input)
     expected = [
         {"name": "agg1", "type": "count", "fieldName": "metric1"},
         {"name": "agg2", "type": "longSum", "fieldName": "metric2"},
         {"name": "agg3", "type": "doubleSum", "fieldName": "metric3"},
         {"name": "agg4", "type": "min", "fieldName": "metric4"},
         {"name": "agg5", "type": "max", "fieldName": "metric5"},
         {"name": "agg6", "type": "hyperUnique", "fieldName": "metric6"},
         {"name": "agg7", "type": "cardinality", "fieldNames": ["dim1"], "byRow": False},
         {"name": "agg8", "type": "cardinality", "fieldNames": ["dim1", "dim2"], "byRow": True},
     ]
     assert sorted(built_agg, key=itemgetter("name")) == sorted(expected, key=itemgetter("name"))
Example #36
0
 def test_build_aggregators(self):
     agg_input = {
         'agg1': aggregators.count('metric1'),
         'agg2': aggregators.longsum('metric2'),
         'agg3': aggregators.doublesum('metric3'),
         'agg4': aggregators.min('metric4'),
         'agg5': aggregators.max('metric5'),
         'agg6': aggregators.hyperunique('metric6'),
         'agg7': aggregators.cardinality('dim1'),
         'agg8': aggregators.cardinality(['dim1', 'dim2'], by_row=True)
     }
     built_agg = aggregators.build_aggregators(agg_input)
     expected = [
         {'name': 'agg1', 'type': 'count', 'fieldName': 'metric1'},
         {'name': 'agg2', 'type': 'longSum', 'fieldName': 'metric2'},
         {'name': 'agg3', 'type': 'doubleSum', 'fieldName': 'metric3'},
         {'name': 'agg4', 'type': 'min', 'fieldName': 'metric4'},
         {'name': 'agg5', 'type': 'max', 'fieldName': 'metric5'},
         {'name': 'agg6', 'type': 'hyperUnique', 'fieldName': 'metric6'},
         {'name': 'agg7', 'type': 'cardinality', 'fieldNames': ['dim1'], 'byRow': False},
         {'name': 'agg8', 'type': 'cardinality', 'fieldNames': ['dim1', 'dim2'], 'byRow': True},
     ]
     assert (sorted(built_agg, key=itemgetter('name')) ==
             sorted(expected, key=itemgetter('name')))
    def test_build_filtered_aggregator(self):
        filter_ = filters.Filter(dimension='dim', value='val')
        agg_input = {
            'agg1': aggregators.filtered(filter_,
                                         aggregators.count('metric1')),
            'agg2': aggregators.filtered(filter_,
                                         aggregators.longsum('metric2')),
            'agg3': aggregators.filtered(filter_,
                                         aggregators.doublesum('metric3')),
            'agg4': aggregators.filtered(filter_,
                                         aggregators.doublemin('metric4')),
            'agg5': aggregators.filtered(filter_,
                                         aggregators.doublemax('metric5')),
            'agg6': aggregators.filtered(filter_,
                                         aggregators.hyperunique('metric6')),
            'agg7': aggregators.filtered(filter_,
                                         aggregators.cardinality('dim1')),
            'agg8': aggregators.filtered(filter_,
                                         aggregators.cardinality(['dim1', 'dim2'], by_row=True)),
            'agg9': aggregators.filtered(filter_,
                                         aggregators.thetasketch('dim1')),
            'agg10': aggregators.filtered(filter_,
                                         aggregators.thetasketch('metric7')),
            'agg11': aggregators.filtered(filter_,
                                         aggregators.thetasketch('metric8', isinputthetasketch = True, size=8192)),
        }
        base = {
            'type': 'filtered',
            'filter': {
                'type': 'selector',
                'dimension': 'dim',
                'value': 'val'
            }
        }

        aggs = [
            {'name': 'agg1', 'type': 'count', 'fieldName': 'metric1'},
            {'name': 'agg2', 'type': 'longSum', 'fieldName': 'metric2'},
            {'name': 'agg3', 'type': 'doubleSum', 'fieldName': 'metric3'},
            {'name': 'agg4', 'type': 'doubleMin', 'fieldName': 'metric4'},
            {'name': 'agg5', 'type': 'doubleMax', 'fieldName': 'metric5'},
            {'name': 'agg6', 'type': 'hyperUnique', 'fieldName': 'metric6'},
            {'name': 'agg7', 'type': 'cardinality', 'fieldNames': ['dim1'], 'byRow': False},
            {'name': 'agg8', 'type': 'cardinality', 'fieldNames': ['dim1', 'dim2'], 'byRow': True},
            {'name': 'agg9', 'type': 'thetaSketch', 'fieldName': 'dim1', 'isInputThetaSketch': False, 'size': 16384},
            {'name': 'agg10', 'type': 'thetaSketch', 'fieldName': 'metric7', 'isInputThetaSketch': False, 'size': 16384},
            {'name': 'agg11', 'type': 'thetaSketch', 'fieldName': 'metric8', 'isInputThetaSketch': True, 'size': 8192}

        ]
        expected = []
        for agg in aggs:
            exp = deepcopy(base)
            exp.update({'aggregator': agg})
            expected.append(exp)

        built_agg = aggregators.build_aggregators(agg_input)
        expected = sorted(built_agg, key=lambda k: itemgetter('name')(
            itemgetter('aggregator')(k)))
        actual = sorted(expected, key=lambda k: itemgetter('name')(
            itemgetter('aggregator')(k)))
        assert expected == actual
from pydruid.client import *
from pydruid.utils.aggregators import doublesum

query = PyDruid("http://localhost:32769", 'druid/v2')

ts = query.topn(datasource='demo',
                granularity='all',
                intervals='2016-10-02/p10w',
                aggregations={'value': doublesum('value')},
                dimension='gdp',
                metric='value',
                threshold=10)
print(ts.result_json)
Example #39
0
    def test_druid_returns_html_error(self, mock_urlopen):
        # given
        message = textwrap.dedent("""
            <html>
            <head>
            <meta http-equiv="Content-Type" content="text/html;charset=ISO-8859-1"/>
            <title>Error 500 </title>
            </head>
            <body>
            <h2>HTTP ERROR: 500</h2>
            <p>Problem accessing /druid/v2/. Reason:
            <pre>    javax.servlet.ServletException: java.lang.OutOfMemoryError: GC overhead limit exceeded</pre></p>
            <hr /><a href="http://eclipse.org/jetty">Powered by Jetty:// 9.3.19.v20170502</a><hr/>
            </body>
            </html>
        """).strip()
        ex = urllib.error.HTTPError(None, 500, message, None, None)
        mock_urlopen.side_effect = ex
        client = create_client()

        # when / then
        with pytest.raises(IOError) as e:
            client.topn(
                    datasource="testdatasource",
                    granularity="all",
                    intervals="2015-12-29/pt1h",
                    aggregations={"count": doublesum("count")},
                    dimension="user_name",
                    metric="count",
                    filter=Dimension("user_lang") == "en",
                    threshold=1,
                    context={"timeout": 1000})

        assert str(e.value) == textwrap.dedent("""
            HTTP Error 500: <html>
            <head>
            <meta http-equiv="Content-Type" content="text/html;charset=ISO-8859-1"/>
            <title>Error 500 </title>
            </head>
            <body>
            <h2>HTTP ERROR: 500</h2>
            <p>Problem accessing /druid/v2/. Reason:
            <pre>    javax.servlet.ServletException: java.lang.OutOfMemoryError: GC overhead limit exceeded</pre></p>
            <hr /><a href="http://eclipse.org/jetty">Powered by Jetty:// 9.3.19.v20170502</a><hr/>
            </body>
            </html> 
             Druid Error: javax.servlet.ServletException: java.lang.OutOfMemoryError: GC overhead limit exceeded 
             Query is: {
                "aggregations": [
                    {
                        "fieldName": "count",
                        "name": "count",
                        "type": "doubleSum"
                    }
                ],
                "context": {
                    "timeout": 1000
                },
                "dataSource": "testdatasource",
                "dimension": "user_name",
                "filter": {
                    "dimension": "user_lang",
                    "type": "selector",
                    "value": "en"
                },
                "granularity": "all",
                "intervals": "2015-12-29/pt1h",
                "metric": "count",
                "queryType": "topN",
                "threshold": 1
            }
        """).strip()
Example #40
0
 def sum_calculation(cls, dimension, field, interval_creator):
     dimension_filter = Filter(dimension=dimension, value=field)
     base_aggregation = filtered_aggregator(filter=dimension_filter,
                                            agg=doublesum('sum'))
     return cls(field, base_aggregation, interval_creator)
Example #41
0
    def test_build_filtered_aggregator(self):
        filter_ = filters.Filter(dimension="dim", value="val")
        agg_input = {
            "agg1":
            aggregators.filtered(filter_, aggregators.count("metric1")),
            "agg2":
            aggregators.filtered(filter_, aggregators.longsum("metric2")),
            "agg3":
            aggregators.filtered(filter_, aggregators.doublesum("metric3")),
            "agg4":
            aggregators.filtered(filter_, aggregators.doublemin("metric4")),
            "agg5":
            aggregators.filtered(filter_, aggregators.doublemax("metric5")),
            "agg6":
            aggregators.filtered(filter_, aggregators.hyperunique("metric6")),
            "agg7":
            aggregators.filtered(filter_, aggregators.cardinality("dim1")),
            "agg8":
            aggregators.filtered(
                filter_, aggregators.cardinality(["dim1", "dim2"],
                                                 by_row=True)),
            "agg9":
            aggregators.filtered(filter_, aggregators.thetasketch("dim1")),
            "agg10":
            aggregators.filtered(filter_, aggregators.thetasketch("metric7")),
            "agg11":
            aggregators.filtered(
                filter_,
                aggregators.thetasketch("metric8",
                                        isinputthetasketch=True,
                                        size=8192),
            ),
        }
        base = {
            "type": "filtered",
            "filter": {
                "type": "selector",
                "dimension": "dim",
                "value": "val"
            },
        }

        aggs = [
            {
                "name": "agg1",
                "type": "count",
                "fieldName": "metric1"
            },
            {
                "name": "agg2",
                "type": "longSum",
                "fieldName": "metric2"
            },
            {
                "name": "agg3",
                "type": "doubleSum",
                "fieldName": "metric3"
            },
            {
                "name": "agg4",
                "type": "doubleMin",
                "fieldName": "metric4"
            },
            {
                "name": "agg5",
                "type": "doubleMax",
                "fieldName": "metric5"
            },
            {
                "name": "agg6",
                "type": "hyperUnique",
                "fieldName": "metric6"
            },
            {
                "name": "agg7",
                "type": "cardinality",
                "fieldNames": ["dim1"],
                "byRow": False,
            },
            {
                "name": "agg8",
                "type": "cardinality",
                "fieldNames": ["dim1", "dim2"],
                "byRow": True,
            },
            {
                "name": "agg9",
                "type": "thetaSketch",
                "fieldName": "dim1",
                "isInputThetaSketch": False,
                "size": 16384,
            },
            {
                "name": "agg10",
                "type": "thetaSketch",
                "fieldName": "metric7",
                "isInputThetaSketch": False,
                "size": 16384,
            },
            {
                "name": "agg11",
                "type": "thetaSketch",
                "fieldName": "metric8",
                "isInputThetaSketch": True,
                "size": 8192,
            },
        ]
        expected = []
        for agg in aggs:
            exp = deepcopy(base)
            exp.update({"aggregator": agg})
            expected.append(exp)

        built_agg = aggregators.build_aggregators(agg_input)
        expected = sorted(built_agg,
                          key=lambda k: itemgetter("name")
                          (itemgetter("aggregator")(k)))
        actual = sorted(expected,
                        key=lambda k: itemgetter("name")
                        (itemgetter("aggregator")(k)))
        assert expected == actual
Example #42
0
 def test_build_aggregators(self):
     agg_input = {
         "agg1":
         aggregators.count("metric1"),
         "agg2":
         aggregators.longsum("metric2"),
         "agg3":
         aggregators.doublesum("metric3"),
         "agg4":
         aggregators.doublemin("metric4"),
         "agg5":
         aggregators.doublemax("metric5"),
         "agg6":
         aggregators.hyperunique("metric6"),
         "agg7":
         aggregators.cardinality("dim1"),
         "agg8":
         aggregators.cardinality(["dim1", "dim2"], by_row=True),
         "agg9":
         aggregators.thetasketch("dim1"),
         "agg10":
         aggregators.thetasketch("metric7"),
         "agg11":
         aggregators.thetasketch("metric8",
                                 isinputthetasketch=True,
                                 size=8192),
     }
     built_agg = aggregators.build_aggregators(agg_input)
     expected = [
         {
             "name": "agg1",
             "type": "count",
             "fieldName": "metric1"
         },
         {
             "name": "agg2",
             "type": "longSum",
             "fieldName": "metric2"
         },
         {
             "name": "agg3",
             "type": "doubleSum",
             "fieldName": "metric3"
         },
         {
             "name": "agg4",
             "type": "doubleMin",
             "fieldName": "metric4"
         },
         {
             "name": "agg5",
             "type": "doubleMax",
             "fieldName": "metric5"
         },
         {
             "name": "agg6",
             "type": "hyperUnique",
             "fieldName": "metric6"
         },
         {
             "name": "agg7",
             "type": "cardinality",
             "fieldNames": ["dim1"],
             "byRow": False,
         },
         {
             "name": "agg8",
             "type": "cardinality",
             "fieldNames": ["dim1", "dim2"],
             "byRow": True,
         },
         {
             "name": "agg9",
             "type": "thetaSketch",
             "fieldName": "dim1",
             "isInputThetaSketch": False,
             "size": 16384,
         },
         {
             "name": "agg10",
             "type": "thetaSketch",
             "fieldName": "metric7",
             "isInputThetaSketch": False,
             "size": 16384,
         },
         {
             "name": "agg11",
             "type": "thetaSketch",
             "fieldName": "metric8",
             "isInputThetaSketch": True,
             "size": 8192,
         },
     ]
     assert sorted(built_agg,
                   key=itemgetter("name")) == sorted(expected,
                                                     key=itemgetter("name"))
Example #43
0
# draw and show it
ax.relim()
ax.autoscale_view(True, True, True)
fig.canvas.draw()
plt.show(block=False)

# loop to update the data
while True:
    try:
        query = PyDruid("http://localhost:8082", 'druid/v2')
        ts = query.timeseries(
            datasource=datasource,
            granularity='minute',
            intervals='2019-10-29/p4w',
            aggregations={'count': doublesum('count')},
        )
        df = query.export_pandas()
        x = df['timestamp'].map(lambda x: x[8:16])

        y = df['count']

        # set the new data
        if not li:
            li, = ax.plot(x, y)
        else:
            li.set_ydata(y)

        fig.canvas.draw()

        time.sleep(0.01)