def test_or_filter_multiple(self): f1 = filters.Filter(dimension="dim1", value="val1") f2 = filters.Filter(dimension="dim2", value="val2") f3 = filters.Filter(dimension="dim3", value="val3") filter = filters.Filter(type="or", fields=[f1, f2, f3]) actual = filters.Filter.build_filter(filter) expected = { "type": "or", "fields": [ { "type": "selector", "dimension": "dim1", "value": "val1" }, { "type": "selector", "dimension": "dim2", "value": "val2" }, { "type": "selector", "dimension": "dim3", "value": "val3" }, ], } assert actual == expected
def test_search_filter(self): # Without caseSensitive param - default:false actual = filters.Filter.build_filter( filters.Filter(type="search", dimension="dim", value='val')) expected = { 'type': 'search', 'dimension': 'dim', 'query': { 'type': 'contains', 'caseSensitive': 'false', 'value': 'val' } } assert actual == expected # With caseSensitive param actual = filters.Filter.build_filter( filters.Filter(type="search", dimension="dim", value='val', caseSensitive='true')) expected = { 'type': 'search', 'dimension': 'dim', 'query': { 'type': 'contains', 'caseSensitive': 'true', 'value': 'val' } } assert actual == expected
def test_nested_filtered_aggregator(self): filter1 = filters.Filter(dimension='dim1', value='val') filter2 = filters.Filter(dimension='dim2', value='val') agg = aggregators.filtered( filter1, aggregators.filtered(filter2, aggregators.count('metric1'))) actual = aggregators.build_aggregators({'agg_name': agg}) # the innermost aggregation must have 'agg_name' expected = [{ 'type': 'filtered', 'aggregator': { 'type': 'filtered', 'aggregator': { 'fieldName': 'metric1', 'type': 'count', 'name': 'agg_name' }, 'filter': { 'dimension': 'dim2', 'value': 'val', 'type': 'selector' } }, 'filter': { 'dimension': 'dim1', 'value': 'val', 'type': 'selector' } }] assert expected == actual
def test_nested_filtered_aggregator(self): filter1 = filters.Filter(dimension="dim1", value="val") filter2 = filters.Filter(dimension="dim2", value="val") agg = aggregators.filtered( filter1, aggregators.filtered(filter2, aggregators.count("metric1"))) actual = aggregators.build_aggregators({"agg_name": agg}) # the innermost aggregation must have 'agg_name' expected = [{ "type": "filtered", "aggregator": { "type": "filtered", "aggregator": { "fieldName": "metric1", "type": "count", "name": "agg_name", }, "filter": { "dimension": "dim2", "value": "val", "type": "selector" }, }, "filter": { "dimension": "dim1", "value": "val", "type": "selector" }, }] assert expected == actual
def test_search_filter(self): # Without caseSensitive param - default:false actual = filters.Filter.build_filter( filters.Filter(type="search", dimension="dim", value="val")) expected = { "type": "search", "dimension": "dim", "query": { "type": "contains", "caseSensitive": "false", "value": "val" }, } assert actual == expected # With caseSensitive param actual = filters.Filter.build_filter( filters.Filter(type="search", dimension="dim", value="val", caseSensitive="true")) expected = { "type": "search", "dimension": "dim", "query": { "type": "contains", "caseSensitive": "true", "value": "val" }, } assert actual == expected
def test_nested_not_or_filter(self): f1 = filters.Filter(dimension='dim1', value='val1') f2 = filters.Filter(dimension='dim2', value='val2') actual = filters.Filter.build_filter(~(f1 | f2)) expected = { 'type': 'not', 'field': {'type': 'or', 'fields': [{'type': 'selector', 'dimension': 'dim1', 'value': 'val1'}, {'type': 'selector', 'dimension': 'dim2', 'value': 'val2'}]} } assert actual == expected
def test_and_filter(self): f1 = filters.Filter(dimension='dim1', value='val1') f2 = filters.Filter(dimension='dim2', value='val2') actual = filters.Filter.build_filter(f1 & f2) expected = { 'type': 'and', 'fields': [ {'type': 'selector', 'dimension': 'dim1', 'value': 'val1'}, {'type': 'selector', 'dimension': 'dim2', 'value': 'val2'} ] } assert actual == expected
def test_nested_mix_filter(self): f1 = filters.Filter(dimension='dim1', value='val1') f2 = filters.Filter(dimension='dim2', value='val2') f3 = filters.Filter(dimension='dim3', value='val3') f4 = filters.Filter(dimension='dim4', value='val4') f5 = filters.Filter(dimension='dim5', value='val5') f6 = filters.Filter(dimension='dim6', value='val6') f7 = filters.Filter(dimension='dim7', value='val7') f8 = filters.Filter(dimension='dim8', value='val8') actual = filters.Filter.build_filter(f1 & ~f2 & f3 & (f4 | ~f5 | f6 | (f7 & ~f8))) expected = { 'fields': [{'dimension': 'dim1', 'type': 'selector', 'value': 'val1'}, {'field': {'dimension': 'dim2', 'type': 'selector', 'value': 'val2'}, 'type': 'not'}, {'dimension': 'dim3', 'type': 'selector', 'value': 'val3'}, {'fields': [{'dimension': 'dim4', 'type': 'selector', 'value': 'val4'}, {'field': {'dimension': 'dim5', 'type': 'selector', 'value': 'val5'}, 'type': 'not'}, {'dimension': 'dim6', 'type': 'selector', 'value': 'val6'}, {'fields': [ {'dimension': 'dim7', 'type': 'selector', 'value': 'val7'}, {'field': {'dimension': 'dim8', 'type': 'selector', 'value': 'val8'}, 'type': 'not'}], 'type': 'and'}], 'type': 'or'}], 'type': 'and' } assert actual == expected
def test_and_filter_multiple(self): f1 = filters.Filter(dimension='dim1', value='val1') f2 = filters.Filter(dimension='dim2', value='val2') f3 = filters.Filter(dimension='dim3', value='val3') filter = filters.Filter(type='and', fields=[f1, f2, f3]) actual = filters.Filter.build_filter(filter) expected = { 'type': 'and', 'fields': [ {'type': 'selector', 'dimension': 'dim1', 'value': 'val1'}, {'type': 'selector', 'dimension': 'dim2', 'value': 'val2'}, {'type': 'selector', 'dimension': 'dim3', 'value': 'val3'} ] } assert actual == expected
def test_in_filter(self): actual = filters.Filter.build_filter( filters.Filter(type='in', dimension='dim', values=['val1', 'val2', 'val3'])) expected = {'type': 'in', 'dimension': 'dim', 'values': ['val1', 'val2', 'val3']} assert actual == expected
def test_filtered_aggregator(self): filter_ = filters.Filter(dimension='dim', value='val') aggs = [aggregators.count('metric1'), aggregators.longsum('metric2'), aggregators.doublesum('metric3'), aggregators.doublemin('metric4'), aggregators.doublemax('metric5'), aggregators.hyperunique('metric6'), aggregators.cardinality('dim1'), aggregators.cardinality(['dim1', 'dim2'], by_row=True), aggregators.thetasketch('dim1'), aggregators.thetasketch('metric7'), aggregators.thetasketch('metric8', isinputthetasketch=True, size=8192) ] for agg in aggs: expected = { 'type': 'filtered', 'filter': { 'type': 'selector', 'dimension': 'dim', 'value': 'val' }, 'aggregator': agg } actual = aggregators.filtered(filter_, agg) assert actual == expected
def test_filtered_aggregator(self): filter_ = filters.Filter(dimension="dim", value="val") aggs = [ aggregators.count("metric1"), aggregators.longsum("metric2"), aggregators.doublesum("metric3"), aggregators.doublemin("metric4"), aggregators.doublemax("metric5"), aggregators.hyperunique("metric6"), aggregators.cardinality("dim1"), aggregators.cardinality(["dim1", "dim2"], by_row=True), aggregators.thetasketch("dim1"), aggregators.thetasketch("metric7"), aggregators.thetasketch("metric8", isinputthetasketch=True, size=8192), ] for agg in aggs: expected = { "type": "filtered", "filter": { "type": "selector", "dimension": "dim", "value": "val" }, "aggregator": agg, } actual = aggregators.filtered(filter_, agg) assert actual == expected
def test_selector_filter_extraction_fn(self): extraction_fn = dimensions.RegexExtraction('([a-b])') f = filters.Filter(dimension='dim', value='v', extraction_function=extraction_fn) actual = filters.Filter.build_filter(f) expected = {'type': 'selector', 'dimension': 'dim', 'value': 'v', 'extractionFn': {'type': 'regex', 'expr': '([a-b])'}} assert actual == expected
def test_extraction_filter(self): extraction_fn = dimensions.PartialExtraction('([a-b])') f = filters.Filter(type='extraction', dimension='dim', value='v', extraction_function=extraction_fn) actual = filters.Filter.build_filter(f) expected = {'type': 'extraction', 'dimension': 'dim', 'value': 'v', 'extractionFn': {'type': 'partial', 'expr': '([a-b])'}} assert actual == expected
def test_not_filter(self): f = filters.Filter(dimension='dim', value='val') actual = filters.Filter.build_filter(~f) expected = { 'type': 'not', 'field': {'type': 'selector', 'dimension': 'dim', 'value': 'val'} } assert actual == expected
def test_javascript_filter(self): actual = filters.Filter.build_filter( filters.Filter(type="javascript", dimension="dim", function="function(x){return true}")) expected = { "type": "javascript", "dimension": "dim", "function": "function(x){return true}", } assert actual == expected
def test_in_filter(self): actual = filters.Filter.build_filter( filters.Filter(type="in", dimension="dim", values=["val1", "val2", "val3"])) expected = { "type": "in", "dimension": "dim", "values": ["val1", "val2", "val3"], } assert actual == expected
def test_columnComparison_filter(self): actual = filters.Filter.build_filter( filters.Filter(type='columnComparison', dimensions=[ 'dim1', dimensions.DimensionSpec('dim2', 'dim2') ])) expected = {'type': 'columnComparison', 'dimensions': [ 'dim1', {'type': 'default', 'dimension': 'dim2', 'outputName': 'dim2'} ]} assert actual == expected
def test_not_filter(self): f = ~filters.Filter(dimension='dim', value='val') actual = filters.Filter.build_filter(f) # Call `build_filter` twice to make sure it does not # change the passed filter object argument `f`. actual = filters.Filter.build_filter(f) expected = { 'type': 'not', 'field': {'type': 'selector', 'dimension': 'dim', 'value': 'val'} } assert actual == expected
def test_javascript_filter(self): actual = filters.Filter.build_filter( filters.Filter(type='javascript', dimension='dim', function='function(x){return true}')) expected = { 'type': 'javascript', 'dimension': 'dim', 'function': 'function(x){return true}' } assert actual == expected
def test_or_filter(self): f1 = filters.Filter(dimension="dim1", value="val1") f2 = filters.Filter(dimension="dim2", value="val2") actual = filters.Filter.build_filter(f1 | f2) expected = { "type": "or", "fields": [ { "type": "selector", "dimension": "dim1", "value": "val1" }, { "type": "selector", "dimension": "dim2", "value": "val2" }, ], } assert actual == expected
def test_not_filter(self): f = ~filters.Filter(dimension="dim", value="val") actual = filters.Filter.build_filter(f) # Call `build_filter` twice to make sure it does not # change the passed filter object argument `f`. actual = filters.Filter.build_filter(f) expected = { "type": "not", "field": { "type": "selector", "dimension": "dim", "value": "val" }, } assert actual == expected
def test_selector_filter_extraction_fn(self): extraction_fn = dimensions.RegexExtraction("([a-b])") f = filters.Filter(dimension="dim", value="v", extraction_function=extraction_fn) actual = filters.Filter.build_filter(f) expected = { "type": "selector", "dimension": "dim", "value": "v", "extractionFn": { "type": "regex", "expr": "([a-b])" }, } assert actual == expected
def test_extraction_filter(self): extraction_fn = dimensions.PartialExtraction("([a-b])") f = filters.Filter( type="extraction", dimension="dim", value="v", extraction_function=extraction_fn, ) actual = filters.Filter.build_filter(f) expected = { "type": "extraction", "dimension": "dim", "value": "v", "extractionFn": { "type": "partial", "expr": "([a-b])" }, } assert actual == expected
def test_columnComparison_filter(self): actual = filters.Filter.build_filter( filters.Filter( type="columnComparison", dimensions=["dim1", dimensions.DimensionSpec("dim2", "dim2")], )) expected = { "type": "columnComparison", "dimensions": [ "dim1", { "type": "default", "dimension": "dim2", "outputName": "dim2" }, ], } assert actual == expected
def druid_simple_groupby(dimensions, filter_list=[], filter_type="and", datasource=settings.DRUID_SPRAYDAY_DATASOURCE): """ Inputs: dimensions => list of dimensions to group by filter_list => list of list of things to filter with e.g. filter_list=[['target_area_id', operator.ne, 1], ['sprayable', operator.eq, "true"], ['dimension', operator, "value"]]) filter_type => type of Druid filter to perform """ query = PyDruid(get_druid_broker_url(), 'druid/v2') params = dict( datasource=datasource, granularity='all', intervals=settings.DRUID_INTERVAL, limit_spec={ "type": "default", "limit": 50000, } ) params['dimensions'] = dimensions if filter_list: fields = [] for this_filter in filter_list: compare_dim = filters.Dimension(this_filter[0]) comparison_operator = this_filter[1] # e.g. operator.eq compare_dim_value = this_filter[2] fields.append(comparison_operator(compare_dim, compare_dim_value)) params['filter'] = filters.Filter( type=filter_type, fields=fields ) try: request = query.groupby(**params) except OSError: pass else: return request.result return []
def get_druid_data(dimensions=None, filter_list=[], filter_type="and", order_by=["target_area_name"], datasource=settings.DRUID_SPRAYDAY_DATASOURCE): """ Runs a query against Druid, returns data with metrics Inputs: dimensions => list of dimensions to group by filter_list => list of list of things to filter with e.g. filter_list=[['target_area_id', operator.ne, 1], ['sprayable', operator.eq, "true"], ['dimension', operator, "value"]]) filter_type => type of Druid filter to perform, order_by => field(s) to order the data by """ query = PyDruid(get_druid_broker_url(), 'druid/v2') params = dict( datasource=datasource, granularity='all', intervals=settings.DRUID_INTERVAL, aggregations={ 'num_not_sprayable': aggregators.filtered( filters.Filter( type='and', fields=[filters.Dimension('sprayable') == 'false'] ), aggregators.longsum('count') ), 'num_not_sprayed': aggregators.filtered( filters.Filter( type='and', fields=[filters.Dimension('sprayable') == 'true', filters.Dimension('sprayed') == settings.MSPRAY_WAS_NOT_SPRAYED_VALUE] ), aggregators.longsum('count') ), 'num_sprayed': aggregators.filtered( filters.Dimension('sprayed') == settings.MSPRAY_WAS_SPRAYED_VALUE, aggregators.longsum('count') ), 'num_new': aggregators.filtered( filters.Dimension('is_new') == 'true', aggregators.longsum('count') ), 'num_new_no_duplicates': aggregators.filtered( filters.Filter( type='and', fields=[filters.Dimension('is_duplicate') == 'false', filters.Dimension('is_new') == 'true'] ), aggregators.longsum('count') ), 'num_duplicate': aggregators.filtered( filters.Dimension('is_duplicate') == 'true', aggregators.longsum('count') ), 'num_sprayed_no_duplicates': aggregators.filtered( filters.Filter( type='and', fields=[filters.Dimension('is_duplicate') == 'false', filters.Dimension('sprayed') == settings.MSPRAY_WAS_SPRAYED_VALUE] ), aggregators.longsum('count') ), 'num_not_sprayed_no_duplicates': aggregators.filtered( filters.Filter( type='and', fields=[filters.Dimension('is_duplicate') == 'false', filters.Dimension('sprayable') == 'true', filters.Dimension('sprayed') == settings.MSPRAY_WAS_NOT_SPRAYED_VALUE] ), aggregators.longsum('count') ), 'num_sprayed_duplicates': aggregators.filtered( filters.Filter( type='and', fields=[filters.Dimension('is_duplicate') == 'true', filters.Dimension('sprayable') == 'true', filters.Dimension('sprayed') == settings.MSPRAY_WAS_SPRAYED_VALUE] ), aggregators.longsum('count') ), 'num_not_sprayable_no_duplicates': aggregators.filtered( filters.Filter( type='and', fields=[filters.Dimension('is_duplicate') == 'false', filters.Dimension('sprayable') == 'false'] ), aggregators.longsum('count') ), 'num_refused': aggregators.filtered( filters.Filter( type='and', fields=[filters.Dimension('is_duplicate') == 'false', filters.Dimension('is_refused') == 'true', filters.Dimension('sprayed') == settings.MSPRAY_WAS_NOT_SPRAYED_VALUE] ), aggregators.longsum('count') ), }, post_aggregations={ 'num_found': Field('num_sprayed_no_duplicates') + Field('num_sprayed_duplicates') + Field('num_not_sprayed_no_duplicates') }, limit_spec={ "type": "default", "limit": 50000, "columns": order_by } ) if filter_list: fields = [] for this_filter in filter_list: compare_dim = filters.Dimension(this_filter[0]) comparison_operator = this_filter[1] # e.g. operator.eq compare_dim_value = this_filter[2] fields.append(comparison_operator(compare_dim, compare_dim_value)) params['filter'] = filters.Filter( type=filter_type, fields=fields ) if dimensions is None: params['dimensions'] = ['target_area_id', 'target_area_name', 'target_area_structures'] else: params['dimensions'] = dimensions try: request = query.groupby(**params) except OSError: return [] else: return request.result
def test_invalid_filter(self): with pytest.raises(NotImplementedError): filters.Filter(type='invalid', dimension='dim', value='val')
def test_selector_filter(self): actual = filters.Filter.build_filter( filters.Filter(dimension='dim', value='val')) expected = {'type': 'selector', 'dimension': 'dim', 'value': 'val'} assert actual == expected
def test_like_filter(self): actual = filters.Filter.build_filter( filters.Filter(type="like", dimension="dim", pattern="%val%")) expected = {'type': 'like', 'dimension': 'dim', 'pattern': '%val%'} assert actual == expected