예제 #1
0
 def test_or_filter_multiple(self):
     f1 = filters.Filter(dimension="dim1", value="val1")
     f2 = filters.Filter(dimension="dim2", value="val2")
     f3 = filters.Filter(dimension="dim3", value="val3")
     filter = filters.Filter(type="or", fields=[f1, f2, f3])
     actual = filters.Filter.build_filter(filter)
     expected = {
         "type":
         "or",
         "fields": [
             {
                 "type": "selector",
                 "dimension": "dim1",
                 "value": "val1"
             },
             {
                 "type": "selector",
                 "dimension": "dim2",
                 "value": "val2"
             },
             {
                 "type": "selector",
                 "dimension": "dim3",
                 "value": "val3"
             },
         ],
     }
     assert actual == expected
예제 #2
0
    def test_search_filter(self):
        # Without caseSensitive param - default:false
        actual = filters.Filter.build_filter(
            filters.Filter(type="search", dimension="dim", value='val'))
        expected = {
            'type': 'search',
            'dimension': 'dim',
            'query': {
                'type': 'contains',
                'caseSensitive': 'false',
                'value': 'val'
            }
        }
        assert actual == expected

        # With caseSensitive param
        actual = filters.Filter.build_filter(
            filters.Filter(type="search",
                           dimension="dim",
                           value='val',
                           caseSensitive='true'))
        expected = {
            'type': 'search',
            'dimension': 'dim',
            'query': {
                'type': 'contains',
                'caseSensitive': 'true',
                'value': 'val'
            }
        }
        assert actual == expected
예제 #3
0
 def test_nested_filtered_aggregator(self):
     filter1 = filters.Filter(dimension='dim1', value='val')
     filter2 = filters.Filter(dimension='dim2', value='val')
     agg = aggregators.filtered(
         filter1, aggregators.filtered(filter2,
                                       aggregators.count('metric1')))
     actual = aggregators.build_aggregators({'agg_name': agg})
     # the innermost aggregation must have 'agg_name'
     expected = [{
         'type': 'filtered',
         'aggregator': {
             'type': 'filtered',
             'aggregator': {
                 'fieldName': 'metric1',
                 'type': 'count',
                 'name': 'agg_name'
             },
             'filter': {
                 'dimension': 'dim2',
                 'value': 'val',
                 'type': 'selector'
             }
         },
         'filter': {
             'dimension': 'dim1',
             'value': 'val',
             'type': 'selector'
         }
     }]
     assert expected == actual
예제 #4
0
 def test_nested_filtered_aggregator(self):
     filter1 = filters.Filter(dimension="dim1", value="val")
     filter2 = filters.Filter(dimension="dim2", value="val")
     agg = aggregators.filtered(
         filter1, aggregators.filtered(filter2,
                                       aggregators.count("metric1")))
     actual = aggregators.build_aggregators({"agg_name": agg})
     # the innermost aggregation must have 'agg_name'
     expected = [{
         "type": "filtered",
         "aggregator": {
             "type": "filtered",
             "aggregator": {
                 "fieldName": "metric1",
                 "type": "count",
                 "name": "agg_name",
             },
             "filter": {
                 "dimension": "dim2",
                 "value": "val",
                 "type": "selector"
             },
         },
         "filter": {
             "dimension": "dim1",
             "value": "val",
             "type": "selector"
         },
     }]
     assert expected == actual
예제 #5
0
    def test_search_filter(self):
        # Without caseSensitive param - default:false
        actual = filters.Filter.build_filter(
            filters.Filter(type="search", dimension="dim", value="val"))
        expected = {
            "type": "search",
            "dimension": "dim",
            "query": {
                "type": "contains",
                "caseSensitive": "false",
                "value": "val"
            },
        }
        assert actual == expected

        # With caseSensitive param
        actual = filters.Filter.build_filter(
            filters.Filter(type="search",
                           dimension="dim",
                           value="val",
                           caseSensitive="true"))
        expected = {
            "type": "search",
            "dimension": "dim",
            "query": {
                "type": "contains",
                "caseSensitive": "true",
                "value": "val"
            },
        }
        assert actual == expected
예제 #6
0
 def test_nested_not_or_filter(self):
     f1 = filters.Filter(dimension='dim1', value='val1')
     f2 = filters.Filter(dimension='dim2', value='val2')
     actual = filters.Filter.build_filter(~(f1 | f2))
     expected = {
         'type': 'not',
         'field': {'type': 'or',
                   'fields': [{'type': 'selector', 'dimension': 'dim1', 'value': 'val1'},
                              {'type': 'selector', 'dimension': 'dim2', 'value': 'val2'}]}
     }
     assert actual == expected
예제 #7
0
 def test_and_filter(self):
     f1 = filters.Filter(dimension='dim1', value='val1')
     f2 = filters.Filter(dimension='dim2', value='val2')
     actual = filters.Filter.build_filter(f1 & f2)
     expected = {
         'type': 'and',
         'fields': [
             {'type': 'selector', 'dimension': 'dim1', 'value': 'val1'},
             {'type': 'selector', 'dimension': 'dim2', 'value': 'val2'}
         ]
     }
     assert actual == expected
예제 #8
0
 def test_nested_mix_filter(self):
     f1 = filters.Filter(dimension='dim1', value='val1')
     f2 = filters.Filter(dimension='dim2', value='val2')
     f3 = filters.Filter(dimension='dim3', value='val3')
     f4 = filters.Filter(dimension='dim4', value='val4')
     f5 = filters.Filter(dimension='dim5', value='val5')
     f6 = filters.Filter(dimension='dim6', value='val6')
     f7 = filters.Filter(dimension='dim7', value='val7')
     f8 = filters.Filter(dimension='dim8', value='val8')
     actual = filters.Filter.build_filter(f1 & ~f2 & f3 & (f4 | ~f5 | f6 | (f7 & ~f8)))
     expected = {
         'fields': [{'dimension': 'dim1', 'type': 'selector', 'value': 'val1'},
                    {'field': {'dimension': 'dim2', 'type': 'selector', 'value': 'val2'},
                     'type': 'not'},
                    {'dimension': 'dim3', 'type': 'selector', 'value': 'val3'},
                    {'fields': [{'dimension': 'dim4', 'type': 'selector', 'value': 'val4'},
                                {'field': {'dimension': 'dim5', 'type': 'selector',
                                           'value': 'val5'},
                                 'type': 'not'},
                                {'dimension': 'dim6', 'type': 'selector', 'value': 'val6'},
                                {'fields': [
                                    {'dimension': 'dim7', 'type': 'selector', 'value': 'val7'},
                                    {'field': {'dimension': 'dim8', 'type': 'selector',
                                               'value': 'val8'},
                                     'type': 'not'}],
                                 'type': 'and'}],
                     'type': 'or'}],
         'type': 'and'
     }
     assert actual == expected
예제 #9
0
 def test_and_filter_multiple(self):
     f1 = filters.Filter(dimension='dim1', value='val1')
     f2 = filters.Filter(dimension='dim2', value='val2')
     f3 = filters.Filter(dimension='dim3', value='val3')
     filter = filters.Filter(type='and', fields=[f1, f2, f3])
     actual = filters.Filter.build_filter(filter)
     expected = {
         'type': 'and',
         'fields': [
             {'type': 'selector', 'dimension': 'dim1', 'value': 'val1'},
             {'type': 'selector', 'dimension': 'dim2', 'value': 'val2'},
             {'type': 'selector', 'dimension': 'dim3', 'value': 'val3'}
         ]
     }
     assert actual == expected
예제 #10
0
 def test_in_filter(self):
     actual = filters.Filter.build_filter(
         filters.Filter(type='in', dimension='dim',
                        values=['val1', 'val2', 'val3']))
     expected = {'type': 'in', 'dimension': 'dim',
                 'values': ['val1', 'val2', 'val3']}
     assert actual == expected
예제 #11
0
 def test_filtered_aggregator(self):
     filter_ = filters.Filter(dimension='dim', value='val')
     aggs = [aggregators.count('metric1'),
             aggregators.longsum('metric2'),
             aggregators.doublesum('metric3'),
             aggregators.doublemin('metric4'),
             aggregators.doublemax('metric5'),
             aggregators.hyperunique('metric6'),
             aggregators.cardinality('dim1'),
             aggregators.cardinality(['dim1', 'dim2'], by_row=True),
             aggregators.thetasketch('dim1'),
             aggregators.thetasketch('metric7'),
             aggregators.thetasketch('metric8', isinputthetasketch=True, size=8192)
            ]
     for agg in aggs:
         expected = {
             'type': 'filtered',
             'filter': {
                 'type': 'selector',
                 'dimension': 'dim',
                 'value': 'val'
             },
             'aggregator': agg
         }
         actual = aggregators.filtered(filter_, agg)
         assert actual == expected
예제 #12
0
 def test_filtered_aggregator(self):
     filter_ = filters.Filter(dimension="dim", value="val")
     aggs = [
         aggregators.count("metric1"),
         aggregators.longsum("metric2"),
         aggregators.doublesum("metric3"),
         aggregators.doublemin("metric4"),
         aggregators.doublemax("metric5"),
         aggregators.hyperunique("metric6"),
         aggregators.cardinality("dim1"),
         aggregators.cardinality(["dim1", "dim2"], by_row=True),
         aggregators.thetasketch("dim1"),
         aggregators.thetasketch("metric7"),
         aggregators.thetasketch("metric8",
                                 isinputthetasketch=True,
                                 size=8192),
     ]
     for agg in aggs:
         expected = {
             "type": "filtered",
             "filter": {
                 "type": "selector",
                 "dimension": "dim",
                 "value": "val"
             },
             "aggregator": agg,
         }
         actual = aggregators.filtered(filter_, agg)
         assert actual == expected
예제 #13
0
 def test_selector_filter_extraction_fn(self):
     extraction_fn = dimensions.RegexExtraction('([a-b])')
     f = filters.Filter(dimension='dim', value='v',
                        extraction_function=extraction_fn)
     actual = filters.Filter.build_filter(f)
     expected = {'type': 'selector', 'dimension': 'dim', 'value': 'v',
                 'extractionFn': {'type': 'regex', 'expr': '([a-b])'}}
     assert actual == expected
예제 #14
0
 def test_extraction_filter(self):
     extraction_fn = dimensions.PartialExtraction('([a-b])')
     f = filters.Filter(type='extraction', dimension='dim', value='v',
                        extraction_function=extraction_fn)
     actual = filters.Filter.build_filter(f)
     expected = {'type': 'extraction', 'dimension': 'dim', 'value': 'v',
                 'extractionFn': {'type': 'partial', 'expr': '([a-b])'}}
     assert actual == expected
예제 #15
0
 def test_not_filter(self):
     f = filters.Filter(dimension='dim', value='val')
     actual = filters.Filter.build_filter(~f)
     expected = {
         'type': 'not',
         'field': {'type': 'selector', 'dimension': 'dim', 'value': 'val'}
     }
     assert actual == expected
예제 #16
0
 def test_javascript_filter(self):
     actual = filters.Filter.build_filter(
         filters.Filter(type="javascript",
                        dimension="dim",
                        function="function(x){return true}"))
     expected = {
         "type": "javascript",
         "dimension": "dim",
         "function": "function(x){return true}",
     }
     assert actual == expected
예제 #17
0
 def test_in_filter(self):
     actual = filters.Filter.build_filter(
         filters.Filter(type="in",
                        dimension="dim",
                        values=["val1", "val2", "val3"]))
     expected = {
         "type": "in",
         "dimension": "dim",
         "values": ["val1", "val2", "val3"],
     }
     assert actual == expected
예제 #18
0
 def test_columnComparison_filter(self):
     actual = filters.Filter.build_filter(
         filters.Filter(type='columnComparison', dimensions=[
             'dim1',
             dimensions.DimensionSpec('dim2', 'dim2')
         ]))
     expected = {'type': 'columnComparison', 'dimensions': [
             'dim1',
             {'type': 'default', 'dimension': 'dim2', 'outputName': 'dim2'}
         ]}
     assert actual == expected
예제 #19
0
 def test_not_filter(self):
     f = ~filters.Filter(dimension='dim', value='val')
     actual = filters.Filter.build_filter(f)
     # Call `build_filter` twice to make sure it does not
     # change the passed filter object argument `f`.
     actual = filters.Filter.build_filter(f)
     expected = {
         'type': 'not',
         'field': {'type': 'selector', 'dimension': 'dim', 'value': 'val'}
     }
     assert actual == expected
예제 #20
0
 def test_javascript_filter(self):
     actual = filters.Filter.build_filter(
         filters.Filter(type='javascript',
                        dimension='dim',
                        function='function(x){return true}'))
     expected = {
         'type': 'javascript',
         'dimension': 'dim',
         'function': 'function(x){return true}'
     }
     assert actual == expected
예제 #21
0
 def test_or_filter(self):
     f1 = filters.Filter(dimension="dim1", value="val1")
     f2 = filters.Filter(dimension="dim2", value="val2")
     actual = filters.Filter.build_filter(f1 | f2)
     expected = {
         "type":
         "or",
         "fields": [
             {
                 "type": "selector",
                 "dimension": "dim1",
                 "value": "val1"
             },
             {
                 "type": "selector",
                 "dimension": "dim2",
                 "value": "val2"
             },
         ],
     }
     assert actual == expected
예제 #22
0
 def test_not_filter(self):
     f = ~filters.Filter(dimension="dim", value="val")
     actual = filters.Filter.build_filter(f)
     # Call `build_filter` twice to make sure it does not
     # change the passed filter object argument `f`.
     actual = filters.Filter.build_filter(f)
     expected = {
         "type": "not",
         "field": {
             "type": "selector",
             "dimension": "dim",
             "value": "val"
         },
     }
     assert actual == expected
예제 #23
0
 def test_selector_filter_extraction_fn(self):
     extraction_fn = dimensions.RegexExtraction("([a-b])")
     f = filters.Filter(dimension="dim",
                        value="v",
                        extraction_function=extraction_fn)
     actual = filters.Filter.build_filter(f)
     expected = {
         "type": "selector",
         "dimension": "dim",
         "value": "v",
         "extractionFn": {
             "type": "regex",
             "expr": "([a-b])"
         },
     }
     assert actual == expected
예제 #24
0
 def test_extraction_filter(self):
     extraction_fn = dimensions.PartialExtraction("([a-b])")
     f = filters.Filter(
         type="extraction",
         dimension="dim",
         value="v",
         extraction_function=extraction_fn,
     )
     actual = filters.Filter.build_filter(f)
     expected = {
         "type": "extraction",
         "dimension": "dim",
         "value": "v",
         "extractionFn": {
             "type": "partial",
             "expr": "([a-b])"
         },
     }
     assert actual == expected
예제 #25
0
 def test_columnComparison_filter(self):
     actual = filters.Filter.build_filter(
         filters.Filter(
             type="columnComparison",
             dimensions=["dim1",
                         dimensions.DimensionSpec("dim2", "dim2")],
         ))
     expected = {
         "type":
         "columnComparison",
         "dimensions": [
             "dim1",
             {
                 "type": "default",
                 "dimension": "dim2",
                 "outputName": "dim2"
             },
         ],
     }
     assert actual == expected
예제 #26
0
파일: druid.py 프로젝트: Frazerbesa/mspray
def druid_simple_groupby(dimensions, filter_list=[], filter_type="and",
                         datasource=settings.DRUID_SPRAYDAY_DATASOURCE):
    """
    Inputs:
        dimensions => list of dimensions to group by
        filter_list => list of list of things to filter with e.g.
                        filter_list=[['target_area_id', operator.ne, 1],
                                     ['sprayable', operator.eq, "true"],
                                     ['dimension', operator, "value"]])
        filter_type => type of Druid filter to perform
    """
    query = PyDruid(get_druid_broker_url(), 'druid/v2')
    params = dict(
        datasource=datasource,
        granularity='all',
        intervals=settings.DRUID_INTERVAL,
        limit_spec={
            "type": "default",
            "limit": 50000,
        }
    )
    params['dimensions'] = dimensions
    if filter_list:
        fields = []
        for this_filter in filter_list:
            compare_dim = filters.Dimension(this_filter[0])
            comparison_operator = this_filter[1]  # e.g. operator.eq
            compare_dim_value = this_filter[2]
            fields.append(comparison_operator(compare_dim, compare_dim_value))
        params['filter'] = filters.Filter(
            type=filter_type,
            fields=fields
        )

    try:
        request = query.groupby(**params)
    except OSError:
        pass
    else:
        return request.result
    return []
예제 #27
0
파일: druid.py 프로젝트: Frazerbesa/mspray
def get_druid_data(dimensions=None, filter_list=[], filter_type="and",
                   order_by=["target_area_name"],
                   datasource=settings.DRUID_SPRAYDAY_DATASOURCE):
    """
    Runs a query against Druid, returns data with metrics
    Inputs:
        dimensions => list of dimensions to group by
        filter_list => list of list of things to filter with e.g.
                        filter_list=[['target_area_id', operator.ne, 1],
                                     ['sprayable', operator.eq, "true"],
                                     ['dimension', operator, "value"]])
        filter_type => type of Druid filter to perform,
        order_by => field(s) to order the data by
    """
    query = PyDruid(get_druid_broker_url(), 'druid/v2')
    params = dict(
        datasource=datasource,
        granularity='all',
        intervals=settings.DRUID_INTERVAL,
        aggregations={
            'num_not_sprayable': aggregators.filtered(
                filters.Filter(
                    type='and',
                    fields=[filters.Dimension('sprayable') == 'false']
                ),
                aggregators.longsum('count')
            ),
            'num_not_sprayed': aggregators.filtered(
                filters.Filter(
                    type='and',
                    fields=[filters.Dimension('sprayable') == 'true',
                            filters.Dimension('sprayed') ==
                            settings.MSPRAY_WAS_NOT_SPRAYED_VALUE]
                ),
                aggregators.longsum('count')
            ),
            'num_sprayed': aggregators.filtered(
                filters.Dimension('sprayed') ==
                settings.MSPRAY_WAS_SPRAYED_VALUE,
                aggregators.longsum('count')
            ),
            'num_new': aggregators.filtered(
                filters.Dimension('is_new') == 'true',
                aggregators.longsum('count')
            ),
            'num_new_no_duplicates': aggregators.filtered(
                filters.Filter(
                    type='and',
                    fields=[filters.Dimension('is_duplicate') == 'false',
                            filters.Dimension('is_new') == 'true']
                ),
                aggregators.longsum('count')
            ),
            'num_duplicate': aggregators.filtered(
                filters.Dimension('is_duplicate') == 'true',
                aggregators.longsum('count')
            ),
            'num_sprayed_no_duplicates': aggregators.filtered(
                filters.Filter(
                    type='and',
                    fields=[filters.Dimension('is_duplicate') == 'false',
                            filters.Dimension('sprayed') ==
                            settings.MSPRAY_WAS_SPRAYED_VALUE]
                ),
                aggregators.longsum('count')
            ),
            'num_not_sprayed_no_duplicates': aggregators.filtered(
                filters.Filter(
                    type='and',
                    fields=[filters.Dimension('is_duplicate') == 'false',
                            filters.Dimension('sprayable') == 'true',
                            filters.Dimension('sprayed') ==
                            settings.MSPRAY_WAS_NOT_SPRAYED_VALUE]
                ),
                aggregators.longsum('count')
            ),
            'num_sprayed_duplicates': aggregators.filtered(
                filters.Filter(
                    type='and',
                    fields=[filters.Dimension('is_duplicate') == 'true',
                            filters.Dimension('sprayable') == 'true',
                            filters.Dimension('sprayed') ==
                            settings.MSPRAY_WAS_SPRAYED_VALUE]
                ),
                aggregators.longsum('count')
            ),
            'num_not_sprayable_no_duplicates': aggregators.filtered(
                filters.Filter(
                    type='and',
                    fields=[filters.Dimension('is_duplicate') == 'false',
                            filters.Dimension('sprayable') == 'false']
                ),
                aggregators.longsum('count')
            ),
            'num_refused': aggregators.filtered(
                filters.Filter(
                    type='and',
                    fields=[filters.Dimension('is_duplicate') == 'false',
                            filters.Dimension('is_refused') == 'true',
                            filters.Dimension('sprayed') ==
                            settings.MSPRAY_WAS_NOT_SPRAYED_VALUE]
                ),
                aggregators.longsum('count')
            ),
        },
        post_aggregations={
            'num_found': Field('num_sprayed_no_duplicates') +
            Field('num_sprayed_duplicates') +
            Field('num_not_sprayed_no_duplicates')
        },
        limit_spec={
            "type": "default",
            "limit": 50000,
            "columns": order_by
        }
    )
    if filter_list:
        fields = []
        for this_filter in filter_list:
            compare_dim = filters.Dimension(this_filter[0])
            comparison_operator = this_filter[1]  # e.g. operator.eq
            compare_dim_value = this_filter[2]
            fields.append(comparison_operator(compare_dim, compare_dim_value))
        params['filter'] = filters.Filter(
            type=filter_type,
            fields=fields
        )

    if dimensions is None:
        params['dimensions'] = ['target_area_id', 'target_area_name',
                                'target_area_structures']
    else:
        params['dimensions'] = dimensions

    try:
        request = query.groupby(**params)
    except OSError:
        return []
    else:
        return request.result
예제 #28
0
 def test_invalid_filter(self):
     with pytest.raises(NotImplementedError):
         filters.Filter(type='invalid', dimension='dim', value='val')
예제 #29
0
 def test_selector_filter(self):
     actual = filters.Filter.build_filter(
         filters.Filter(dimension='dim', value='val'))
     expected = {'type': 'selector', 'dimension': 'dim', 'value': 'val'}
     assert actual == expected
예제 #30
0
 def test_like_filter(self):
     actual = filters.Filter.build_filter(
         filters.Filter(type="like", dimension="dim", pattern="%val%"))
     expected = {'type': 'like', 'dimension': 'dim', 'pattern': '%val%'}
     assert actual == expected