Ejemplo n.º 1
0
    def test_build_query_none_type(self):
        # given
        expected_query_dict = {
            "queryType": None,
            "dataSource": "things",
            "aggregations": [{"fieldName": "thing", "name": "count", "type": "count"}],
            "filter": {"dimension": "one", "type": "selector", "value": 1},
            "having": {"aggregation": "sum", "type": "greaterThan", "value": 1},
            "dimension": "dim1",
        }

        builder = QueryBuilder()

        # when
        builder_dict = {
            "datasource": "things",
            "aggregations": {"count": aggregators.count("thing")},
            "filter": filters.Dimension("one") == 1,
            "having": having.Aggregation("sum") > 1,
            "dimension": "dim1",
        }
        query = builder.build_query(None, builder_dict)

        # then
        assert query.query_dict == expected_query_dict

        # you should be able to pass `None` to dimension/having/filter
        for v in ["dimension", "having", "filter"]:
            expected_query_dict[v] = None
            builder_dict[v] = None

            query = builder.build_query(None, builder_dict)

            assert query.query_dict == expected_query_dict
Ejemplo n.º 2
0
 def test_ne_dimension(self):
     d = filters.Dimension('dim')
     actual = filters.Filter.build_filter(d != 'val')
     expected = {
         'field': {
             'dimension': 'dim',
             'type': 'selector',
             'value': 'val'
         },
         'type': 'not'
     }
     assert actual == expected
Ejemplo n.º 3
0
 def test_ne_dimension(self):
     d = filters.Dimension("dim")
     actual = filters.Filter.build_filter(d != "val")
     expected = {
         "field": {
             "dimension": "dim",
             "type": "selector",
             "value": "val"
         },
         "type": "not",
     }
     assert actual == expected
Ejemplo n.º 4
0
    def test_build_query_none_type(self):
        # given
        expected_query_dict = {
            'queryType':
            None,
            'dataSource':
            'things',
            'aggregations': [{
                'fieldName': 'thing',
                'name': 'count',
                'type': 'count'
            }],
            'filter': {
                'dimension': 'one',
                'type': 'selector',
                'value': 1
            },
            'having': {
                'aggregation': 'sum',
                'type': 'greaterThan',
                'value': 1
            },
            'dimension':
            'dim1',
        }

        builder = QueryBuilder()

        # when
        builder_dict = {
            'datasource': 'things',
            'aggregations': {
                'count': aggregators.count('thing'),
            },
            'filter': filters.Dimension('one') == 1,
            'having': having.Aggregation('sum') > 1,
            'dimension': 'dim1',
        }
        query = builder.build_query(None, builder_dict)

        # then
        assert query.query_dict == expected_query_dict

        # you should be able to pass `None` to dimension/having/filter
        for v in ['dimension', 'having', 'filter']:
            expected_query_dict[v] = None
            builder_dict[v] = None

            query = builder.build_query(None, builder_dict)

            assert query.query_dict == expected_query_dict
Ejemplo n.º 5
0
    def test_build_query(self):
        # given
        expected_query_dict = {
            'queryType': None,
            'dataSource': 'things',
            'aggregations': [{'fieldName': 'thing', 'name': 'count', 'type': 'count'}],
            'postAggregations': [{
                'fields': [{
                    'fieldName': 'sum', 'type': 'fieldAccess',
                }, {
                    'fieldName': 'count', 'type': 'fieldAccess',
                }],
                'fn': '/',
                'name': 'avg',
                'type': 'arithmetic',
            }],
            'pagingSpec': {'pagingIdentifies': {}, 'threshold': 1},
            'filter': {'dimension': 'one', 'type': 'selector', 'value': 1},
            'having': {'aggregation': 'sum', 'type': 'greaterThan', 'value': 1},
            'new_key': 'value',
            'virtualColumns': [{
                    'type': 'expression', 'name': 'foo', 'expression': "concat('foo' + page)", 'outputType': 'STRING'
                }],
        }

        builder = QueryBuilder()

        # when
        query = builder.build_query(None, {
            'datasource': 'things',
            'aggregations': {
                'count': aggregators.count('thing'),
            },
            'post_aggregations': {
                'avg': (postaggregator.Field('sum') /
                        postaggregator.Field('count')),
            },
            'paging_spec': {
                'pagingIdentifies': {},
                'threshold': 1,
            },
            'filter': filters.Dimension('one') == 1,
            'having': having.Aggregation('sum') > 1,
            'new_key': 'value',
            'virtualColumns':
                [VirtualColumn(type='expression', name='foo', expression="concat('foo' + page)", outputType='STRING')]
        })

        # then
        assert query.query_dict == expected_query_dict
Ejemplo n.º 6
0
    def test_build_query(self):
        # given
        expected_query_dict = {
            "queryType": None,
            "dataSource": "things",
            "aggregations": [{"fieldName": "thing", "name": "count", "type": "count"}],
            "postAggregations": [
                {
                    "fields": [
                        {"fieldName": "sum", "type": "fieldAccess"},
                        {"fieldName": "count", "type": "fieldAccess"},
                    ],
                    "fn": "/",
                    "name": "avg",
                    "type": "arithmetic",
                }
            ],
            "pagingSpec": {"pagingIdentifies": {}, "threshold": 1},
            "filter": {"dimension": "one", "type": "selector", "value": 1},
            "having": {"aggregation": "sum", "type": "greaterThan", "value": 1},
            "new_key": "value",
        }

        builder = QueryBuilder()

        # when
        query = builder.build_query(
            None,
            {
                "datasource": "things",
                "aggregations": {"count": aggregators.count("thing")},
                "post_aggregations": {
                    "avg": (postaggregator.Field("sum") / postaggregator.Field("count"))
                },
                "paging_spec": {"pagingIdentifies": {}, "threshold": 1},
                "filter": filters.Dimension("one") == 1,
                "having": having.Aggregation("sum") > 1,
                "new_key": "value",
            },
        )

        # then
        assert query.query_dict == expected_query_dict
Ejemplo n.º 7
0
    def test_build_subquery(self):
        # given
        expected_query_dict = {
            "query": {
                "queryType": "groupBy",
                "dataSource": "things",
                "aggregations": [
                    {"fieldName": "thing", "name": "count", "type": "count"}
                ],
                "postAggregations": [
                    {
                        "fields": [
                            {"fieldName": "sum", "type": "fieldAccess"},
                            {"fieldName": "count", "type": "fieldAccess"},
                        ],
                        "fn": "/",
                        "name": "avg",
                        "type": "arithmetic",
                    }
                ],
                "filter": {"dimension": "one", "type": "selector", "value": 1},
                "having": {"aggregation": "sum", "type": "greaterThan", "value": 1},
            },
            "type": "query",
        }

        builder = QueryBuilder()

        # when
        subquery_dict = builder.subquery(
            {
                "datasource": "things",
                "aggregations": {"count": aggregators.count("thing")},
                "post_aggregations": {
                    "avg": (postaggregator.Field("sum") / postaggregator.Field("count"))
                },
                "filter": filters.Dimension("one") == 1,
                "having": having.Aggregation("sum") > 1,
            }
        )

        # then
        assert subquery_dict == expected_query_dict
Ejemplo n.º 8
0
def druid_simple_groupby(dimensions, filter_list=[], filter_type="and",
                         datasource=settings.DRUID_SPRAYDAY_DATASOURCE):
    """
    Inputs:
        dimensions => list of dimensions to group by
        filter_list => list of list of things to filter with e.g.
                        filter_list=[['target_area_id', operator.ne, 1],
                                     ['sprayable', operator.eq, "true"],
                                     ['dimension', operator, "value"]])
        filter_type => type of Druid filter to perform
    """
    query = PyDruid(get_druid_broker_url(), 'druid/v2')
    params = dict(
        datasource=datasource,
        granularity='all',
        intervals=settings.DRUID_INTERVAL,
        limit_spec={
            "type": "default",
            "limit": 50000,
        }
    )
    params['dimensions'] = dimensions
    if filter_list:
        fields = []
        for this_filter in filter_list:
            compare_dim = filters.Dimension(this_filter[0])
            comparison_operator = this_filter[1]  # e.g. operator.eq
            compare_dim_value = this_filter[2]
            fields.append(comparison_operator(compare_dim, compare_dim_value))
        params['filter'] = filters.Filter(
            type=filter_type,
            fields=fields
        )

    try:
        request = query.groupby(**params)
    except OSError:
        pass
    else:
        return request.result
    return []
Ejemplo n.º 9
0
    def test_build_subquery(self):
        # given
        expected_query_dict = {
            'query': {
                'queryType': 'groupBy',
                'dataSource': 'things',
                'aggregations': [{'fieldName': 'thing', 'name': 'count', 'type': 'count'}],
                'postAggregations': [{
                    'fields': [{
                        'fieldName': 'sum', 'type': 'fieldAccess',
                    }, {
                        'fieldName': 'count', 'type': 'fieldAccess',
                    }],
                    'fn': '/',
                    'name': 'avg',
                    'type': 'arithmetic',
                }],
                'filter': {'dimension': 'one', 'type': 'selector', 'value': 1},
                'having': {'aggregation': 'sum', 'type': 'greaterThan', 'value': 1},
            },
            'type': 'query'
        }

        builder = QueryBuilder()

        # when
        subquery_dict = builder.subquery({
            'datasource': 'things',
            'aggregations': {
                'count': aggregators.count('thing'),
            },
            'post_aggregations': {
                'avg': (postaggregator.Field('sum') /
                        postaggregator.Field('count')),
            },
            'filter': filters.Dimension('one') == 1,
            'having': having.Aggregation('sum') > 1,
        })

        # then
        assert subquery_dict == expected_query_dict
Ejemplo n.º 10
0
def get_druid_data(dimensions=None, filter_list=[], filter_type="and",
                   order_by=["target_area_name"],
                   datasource=settings.DRUID_SPRAYDAY_DATASOURCE):
    """
    Runs a query against Druid, returns data with metrics
    Inputs:
        dimensions => list of dimensions to group by
        filter_list => list of list of things to filter with e.g.
                        filter_list=[['target_area_id', operator.ne, 1],
                                     ['sprayable', operator.eq, "true"],
                                     ['dimension', operator, "value"]])
        filter_type => type of Druid filter to perform,
        order_by => field(s) to order the data by
    """
    query = PyDruid(get_druid_broker_url(), 'druid/v2')
    params = dict(
        datasource=datasource,
        granularity='all',
        intervals=settings.DRUID_INTERVAL,
        aggregations={
            'num_not_sprayable': aggregators.filtered(
                filters.Filter(
                    type='and',
                    fields=[filters.Dimension('sprayable') == 'false']
                ),
                aggregators.longsum('count')
            ),
            'num_not_sprayed': aggregators.filtered(
                filters.Filter(
                    type='and',
                    fields=[filters.Dimension('sprayable') == 'true',
                            filters.Dimension('sprayed') ==
                            settings.MSPRAY_WAS_NOT_SPRAYED_VALUE]
                ),
                aggregators.longsum('count')
            ),
            'num_sprayed': aggregators.filtered(
                filters.Dimension('sprayed') ==
                settings.MSPRAY_WAS_SPRAYED_VALUE,
                aggregators.longsum('count')
            ),
            'num_new': aggregators.filtered(
                filters.Dimension('is_new') == 'true',
                aggregators.longsum('count')
            ),
            'num_new_no_duplicates': aggregators.filtered(
                filters.Filter(
                    type='and',
                    fields=[filters.Dimension('is_duplicate') == 'false',
                            filters.Dimension('is_new') == 'true']
                ),
                aggregators.longsum('count')
            ),
            'num_duplicate': aggregators.filtered(
                filters.Dimension('is_duplicate') == 'true',
                aggregators.longsum('count')
            ),
            'num_sprayed_no_duplicates': aggregators.filtered(
                filters.Filter(
                    type='and',
                    fields=[filters.Dimension('is_duplicate') == 'false',
                            filters.Dimension('sprayed') ==
                            settings.MSPRAY_WAS_SPRAYED_VALUE]
                ),
                aggregators.longsum('count')
            ),
            'num_not_sprayed_no_duplicates': aggregators.filtered(
                filters.Filter(
                    type='and',
                    fields=[filters.Dimension('is_duplicate') == 'false',
                            filters.Dimension('sprayable') == 'true',
                            filters.Dimension('sprayed') ==
                            settings.MSPRAY_WAS_NOT_SPRAYED_VALUE]
                ),
                aggregators.longsum('count')
            ),
            'num_sprayed_duplicates': aggregators.filtered(
                filters.Filter(
                    type='and',
                    fields=[filters.Dimension('is_duplicate') == 'true',
                            filters.Dimension('sprayable') == 'true',
                            filters.Dimension('sprayed') ==
                            settings.MSPRAY_WAS_SPRAYED_VALUE]
                ),
                aggregators.longsum('count')
            ),
            'num_not_sprayable_no_duplicates': aggregators.filtered(
                filters.Filter(
                    type='and',
                    fields=[filters.Dimension('is_duplicate') == 'false',
                            filters.Dimension('sprayable') == 'false']
                ),
                aggregators.longsum('count')
            ),
            'num_refused': aggregators.filtered(
                filters.Filter(
                    type='and',
                    fields=[filters.Dimension('is_duplicate') == 'false',
                            filters.Dimension('is_refused') == 'true',
                            filters.Dimension('sprayed') ==
                            settings.MSPRAY_WAS_NOT_SPRAYED_VALUE]
                ),
                aggregators.longsum('count')
            ),
        },
        post_aggregations={
            'num_found': Field('num_sprayed_no_duplicates') +
            Field('num_sprayed_duplicates') +
            Field('num_not_sprayed_no_duplicates')
        },
        limit_spec={
            "type": "default",
            "limit": 50000,
            "columns": order_by
        }
    )
    if filter_list:
        fields = []
        for this_filter in filter_list:
            compare_dim = filters.Dimension(this_filter[0])
            comparison_operator = this_filter[1]  # e.g. operator.eq
            compare_dim_value = this_filter[2]
            fields.append(comparison_operator(compare_dim, compare_dim_value))
        params['filter'] = filters.Filter(
            type=filter_type,
            fields=fields
        )

    if dimensions is None:
        params['dimensions'] = ['target_area_id', 'target_area_name',
                                'target_area_structures']
    else:
        params['dimensions'] = dimensions

    try:
        request = query.groupby(**params)
    except OSError:
        return []
    else:
        return request.result
Ejemplo n.º 11
0
 def test_dimension(self):
     d = filters.Dimension('dim')
     actual = filters.Filter.build_filter(d == 'val')
     expected = {'type': 'selector', 'dimension': 'dim', 'value': 'val'}
     assert actual == expected
Ejemplo n.º 12
0
 def test_dimension(self):
     d = filters.Dimension("dim")
     actual = filters.Filter.build_filter(d == "val")
     expected = {"type": "selector", "dimension": "dim", "value": "val"}
     assert actual == expected