def test_with_scrubber_cleaning(self): whitelist = {'hits': ('foo', 'bar', 'baz')} data = { 'hits': [ {'foo': "Bla bla", 'bar': "contact me on [email protected]", 'baz': "when I visited http://www.p0rn.com"}, {'foo': "Ble ble [email protected]", 'bar': "other things on https://google.com here", 'baz': "talk to [email protected]"}, ] } cleaner = Cleaner( whitelist, clean_scrub=( ('bar', scrubber.EMAIL), ('bar', scrubber.URL), ('baz', scrubber.URL), ) ) cleaner.start(data) expect = { 'hits': [ {'foo': "Bla bla", 'bar': "contact me on ", 'baz': "when I visited "}, {'foo': "Ble ble [email protected]", 'bar': "other things on here", # because 'baz' doesn't have an EMAIL scrubber 'baz': "talk to [email protected]"}, ] } eq_(data, expect)
def test_dict_data_with_lists(self): whitelist = { 'hits': { Cleaner.ANY: ('foo', 'bar') } } data = { 'hits': { 'WaterWolf': [ {'foo': 1, 'bar': 2, 'baz': 3}, {'foo': 4, 'bar': 5, 'baz': 6} ], 'NightTrain': [ {'foo': 7, 'bar': 8, 'baz': 9}, {'foo': 10, 'bar': 11, 'baz': 12} ] } } cleaner = Cleaner(whitelist) cleaner.start(data) expect = { 'hits': { 'WaterWolf': [ {'foo': 1, 'bar': 2}, {'foo': 4, 'bar': 5} ], 'NightTrain': [ {'foo': 7, 'bar': 8}, {'foo': 10, 'bar': 11} ] } } eq_(data, expect)
def test_simple_list(self): allowlist = ('foo', 'bar') data = [ { 'foo': 1, 'bar': 2, 'baz': 3, }, { 'foo': 7, 'bar': 8, 'baz': 9, }, ] cleaner = Cleaner(allowlist) cleaner.start(data) expect = [ { 'foo': 1, 'bar': 2, }, { 'foo': 7, 'bar': 8, }, ] assert data == expect
def test_all_dict_data(self): whitelist = {Cleaner.ANY: ('foo', 'bar')} data = { 'WaterWolf': { 'foo': 1, 'bar': 2, 'baz': 3, }, 'NightTrain': { 'foo': 7, 'bar': 8, 'baz': 9, }, } cleaner = Cleaner(whitelist) cleaner.start(data) expect = { 'WaterWolf': { 'foo': 1, 'bar': 2, }, 'NightTrain': { 'foo': 7, 'bar': 8, }, } eq_(data, expect)
def test_all_dict_data(self): allowlist = {Cleaner.ANY: ('foo', 'bar')} data = { 'WaterWolf': { 'foo': 1, 'bar': 2, 'baz': 3, }, 'NightTrain': { 'foo': 7, 'bar': 8, 'baz': 9, }, } cleaner = Cleaner(allowlist) cleaner.start(data) expect = { 'WaterWolf': { 'foo': 1, 'bar': 2, }, 'NightTrain': { 'foo': 7, 'bar': 8, }, } assert data == expect
def test_simple_list(self): whitelist = ('foo', 'bar') data = [ { 'foo': 1, 'bar': 2, 'baz': 3, }, { 'foo': 7, 'bar': 8, 'baz': 9, }, ] cleaner = Cleaner(whitelist) cleaner.start(data) expect = [ { 'foo': 1, 'bar': 2, }, { 'foo': 7, 'bar': 8, }, ] eq_(data, expect)
def test_all_dict_data(self): whitelist = {Cleaner.ANY: ('foo', 'bar')} data = { 'Firefox': { 'foo': 1, 'bar': 2, 'baz': 3, }, 'Thunderbird': { 'foo': 7, 'bar': 8, 'baz': 9, }, } cleaner = Cleaner(whitelist) cleaner.start(data) expect = { 'Firefox': { 'foo': 1, 'bar': 2, }, 'Thunderbird': { 'foo': 7, 'bar': 8, }, } eq_(data, expect)
def test_dict_data_with_lists(self): whitelist = { 'hits': { Cleaner.ANY: ('foo', 'bar') } } data = { 'hits': { 'Firefox': [ {'foo': 1, 'bar': 2, 'baz': 3}, {'foo': 4, 'bar': 5, 'baz': 6} ], 'Thunderbird': [ {'foo': 7, 'bar': 8, 'baz': 9}, {'foo': 10, 'bar': 11, 'baz': 12} ] } } cleaner = Cleaner(whitelist) cleaner.start(data) expect = { 'hits': { 'Firefox': [ {'foo': 1, 'bar': 2}, {'foo': 4, 'bar': 5} ], 'Thunderbird': [ {'foo': 7, 'bar': 8}, {'foo': 10, 'bar': 11} ] } } eq_(data, expect)
def test_all_dict_data_deeper(self): whitelist = {Cleaner.ANY: {Cleaner.ANY: ('foo', 'bar')}} data = { 'WaterWolf': { '2012': { 'foo': 1, 'bar': 2, 'baz': 3, }, '2013': { 'foo': 4, 'bar': 5, 'baz': 6, } }, 'NightTrain': { '2012': { 'foo': 7, 'bar': 8, 'baz': 9, }, '2013': { 'foo': 10, 'bar': 11, 'baz': 12, } }, } cleaner = Cleaner(whitelist) cleaner.start(data) expect = { 'WaterWolf': { '2012': { 'foo': 1, 'bar': 2, }, '2013': { 'foo': 4, 'bar': 5, } }, 'NightTrain': { '2012': { 'foo': 7, 'bar': 8, }, '2013': { 'foo': 10, 'bar': 11, } }, } eq_(data, expect)
def test_all_dict_data_deeper(self): whitelist = {Cleaner.ANY: {Cleaner.ANY: ('foo', 'bar')}} data = { 'Firefox': { '2012': { 'foo': 1, 'bar': 2, 'baz': 3, }, '2013': { 'foo': 4, 'bar': 5, 'baz': 6, } }, 'Thunderbird': { '2012': { 'foo': 7, 'bar': 8, 'baz': 9, }, '2013': { 'foo': 10, 'bar': 11, 'baz': 12, } }, } cleaner = Cleaner(whitelist) cleaner.start(data) expect = { 'Firefox': { '2012': { 'foo': 1, 'bar': 2, }, '2013': { 'foo': 4, 'bar': 5, } }, 'Thunderbird': { '2012': { 'foo': 7, 'bar': 8, }, '2013': { 'foo': 10, 'bar': 11, } }, } eq_(data, expect)
def test_plain_dict(self): whitelist = ('foo', 'bar') data = { 'foo': 1, 'bar': 2, 'baz': 3, } cleaner = Cleaner(whitelist) cleaner.start(data) expect = { 'foo': 1, 'bar': 2, } assert data == expect
def test_plain_dict(self): allowlist = ('foo', 'bar') data = { 'foo': 1, 'bar': 2, 'baz': 3, } cleaner = Cleaner(allowlist) cleaner.start(data) expect = { 'foo': 1, 'bar': 2, } assert data == expect
def test_plain_dict(self): whitelist = ('foo', 'bar') data = { 'foo': 1, 'bar': 2, 'baz': 3, } cleaner = Cleaner(whitelist) cleaner.start(data) expect = { 'foo': 1, 'bar': 2, } eq_(data, expect)
def test_simplest_case_with_warning(self, p_warn): whitelist = {'hits': ('foo', 'bar')} data = { 'hits': [ {'foo': 1, 'bar': 2, 'baz': 3}, {'foo': 4, 'bar': 5, 'baz': 6}, ] } cleaner = Cleaner(whitelist, debug=True) cleaner.start(data) p_warn.assert_called_with("Skipping 'baz'")
def test_simplest_case_with_warning(self, p_warn): allowlist = {"hits": ("foo", "bar")} data = { "hits": [{ "foo": 1, "bar": 2, "baz": 3 }, { "foo": 4, "bar": 5, "baz": 6 }] } cleaner = Cleaner(allowlist, debug=True) cleaner.start(data) p_warn.assert_called_with("Skipping 'baz'")
def test_simplest_case(self): allowlist = {"hits": ("foo", "bar")} data = { "hits": [{ "foo": 1, "bar": 2, "baz": 3 }, { "foo": 4, "bar": 5, "baz": 6 }] } cleaner = Cleaner(allowlist) cleaner.start(data) expect = {"hits": [{"foo": 1, "bar": 2}, {"foo": 4, "bar": 5}]} assert data == expect
def test_dict_data_with_lists(self): whitelist = {'hits': {Cleaner.ANY: ('foo', 'bar')}} data = { 'hits': { 'WaterWolf': [{ 'foo': 1, 'bar': 2, 'baz': 3 }, { 'foo': 4, 'bar': 5, 'baz': 6 }], 'NightTrain': [{ 'foo': 7, 'bar': 8, 'baz': 9 }, { 'foo': 10, 'bar': 11, 'baz': 12 }] } } cleaner = Cleaner(whitelist) cleaner.start(data) expect = { 'hits': { 'WaterWolf': [{ 'foo': 1, 'bar': 2 }, { 'foo': 4, 'bar': 5 }], 'NightTrain': [{ 'foo': 7, 'bar': 8 }, { 'foo': 10, 'bar': 11 }] } } assert data == expect
def test_with_scrubber_cleaning(self): whitelist = {'hits': ('foo', 'bar', 'baz')} data = { 'hits': [ { 'foo': "Bla bla", 'bar': "contact me on [email protected]", 'baz': "when I visited http://www.p0rn.com" }, { 'foo': "Ble ble [email protected]", 'bar': "other things on https://google.com here", 'baz': "talk to [email protected]" }, ] } cleaner = Cleaner(whitelist, clean_scrub=( ('bar', scrubber.EMAIL), ('bar', scrubber.URL), ('baz', scrubber.URL), )) cleaner.start(data) expect = { 'hits': [ { 'foo': "Bla bla", 'bar': "contact me on ", 'baz': "when I visited " }, { 'foo': "Ble ble [email protected]", 'bar': "other things on here", # because 'baz' doesn't have an EMAIL scrubber 'baz': "talk to [email protected]" }, ] } assert data == expect
def test_simplest_case(self): allowlist = {'hits': ('foo', 'bar')} data = { 'hits': [ {'foo': 1, 'bar': 2, 'baz': 3}, {'foo': 4, 'bar': 5, 'baz': 6}, ] } cleaner = Cleaner(allowlist) cleaner.start(data) expect = { 'hits': [ {'foo': 1, 'bar': 2}, {'foo': 4, 'bar': 5}, ] } assert data == expect
def test_simplest_case(self): whitelist = {'hits': ('foo', 'bar')} data = { 'hits': [ {'foo': 1, 'bar': 2, 'baz': 3}, {'foo': 4, 'bar': 5, 'baz': 6}, ] } cleaner = Cleaner(whitelist) cleaner.start(data) expect = { 'hits': [ {'foo': 1, 'bar': 2}, {'foo': 4, 'bar': 5}, ] } assert data == expect
def test_simplest_case(self): whitelist = {'hits': ('foo', 'bar')} data = { 'hits': [ {'foo': 1, 'bar': 2, 'baz': 3}, {'foo': 4, 'bar': 5, 'baz': 6}, ] } cleaner = Cleaner(whitelist) cleaner.start(data) expect = { 'hits': [ {'foo': 1, 'bar': 2}, {'foo': 4, 'bar': 5}, ] } eq_(data, expect)
def model_wrapper(request, model_name): if model_name in BLACKLIST: raise http.Http404("Don't know what you're talking about!") model = None for source in MODELS_MODULES: try: model = getattr(source, model_name) break except AttributeError: pass try: model = getattr(source, model_name + 'Middleware') except AttributeError: pass if model is None or not is_valid_model_class(model): raise http.Http404('no service called `%s`' % model_name) required_permissions = getattr(model(), 'API_REQUIRED_PERMISSIONS', None) if isinstance(required_permissions, six.string_types): required_permissions = [required_permissions] if (required_permissions and (not request.user.is_active or not has_permissions(request.user, required_permissions))): permission_names = [] for permission in required_permissions: codename = permission.split('.', 1)[1] try: permission_names.append( Permission.objects.get(codename=codename).name) except Permission.DoesNotExist: permission_names.append(codename) # you're not allowed to use this model return http.JsonResponse( { 'error': "Use of this endpoint requires the '%s' permission" % (', '.join(permission_names), ) }, status=403) # it being set to None means it's been deliberately disabled if getattr(model, 'API_WHITELIST', False) is False: raise APIWhitelistError('No API_WHITELIST defined for %r' % model) instance = model() # Any additional headers we intend to set on the response headers = {} # Certain models need to know who the user is to be able to # internally use that to determine its output. instance.api_user = request.user if request.method == 'POST': function = instance.post else: function = instance.get if not function: return http.HttpResponseNotAllowed([request.method]) # assume first that it won't need a binary response binary_response = False request_data = request.method == 'GET' and request.GET or request.POST form = FormWrapper(model, request_data) if form.is_valid(): try: result = function(**form.cleaned_data) except ValueError as e: if 'No JSON object could be decoded' in e: return http.HttpResponseBadRequest( json.dumps({'error': 'Not a valid JSON response'}), content_type='application/json; charset=UTF-8') raise except NOT_FOUND_EXCEPTIONS as exception: return http.HttpResponseNotFound( json.dumps({ 'error': ('%s: %s' % (type(exception).__name__, exception)) }), content_type='application/json; charset=UTF-8') except BAD_REQUEST_EXCEPTIONS as exception: return http.HttpResponseBadRequest( json.dumps({ 'error': ('%s: %s' % (type(exception).__name__, exception)) }), content_type='application/json; charset=UTF-8') # Some models allows to return a binary reponse. It does so based on # the models `BINARY_RESPONSE` dict in which all keys and values # need to be in the valid query. For example, if the query is # `?foo=bar&other=thing&bar=baz` and the `BINARY_RESPONSE` dict is # exactly: {'foo': 'bar', 'bar': 'baz'} it will return a binary # response with content type `application/octet-stream`. for key, value in model.API_BINARY_RESPONSE.items(): if form.cleaned_data.get(key) == value: binary_response = True else: binary_response = False break if binary_response: # if you don't have all required permissions, you'll get a 403 required_permissions = model.API_BINARY_PERMISSIONS if isinstance(required_permissions, six.string_types): required_permissions = [required_permissions] if required_permissions and not has_permissions( request.user, required_permissions): permission_names = [] for permission in required_permissions: codename = permission.split('.', 1)[1] try: permission_names.append( Permission.objects.get(codename=codename).name) except Permission.DoesNotExist: permission_names.append(codename) # you're not allowed to get the binary response return http.HttpResponseForbidden( "Binary response requires the '%s' permission\n" % (', '.join(permission_names))) elif not request.user.has_perm('crashstats.view_pii'): if callable(model.API_WHITELIST): whitelist = model.API_WHITELIST() else: whitelist = model.API_WHITELIST if result and whitelist: cleaner = Cleaner( whitelist, # if True, uses warnings.warn() to show fields # not whitelisted debug=settings.DEBUG, ) cleaner.start(result) else: # custom override of the status code return {'errors': dict(form.errors)}, 400 if binary_response: assert model.API_BINARY_FILENAME, 'No API_BINARY_FILENAME set on model' response = http.HttpResponse(result, content_type='application/octet-stream') filename = model.API_BINARY_FILENAME % form.cleaned_data response[ 'Content-Disposition'] = 'attachment; filename="%s"' % filename return response if getattr(model, 'deprecation_warning', False): if isinstance(result, dict): result['DEPRECATION_WARNING'] = model.deprecation_warning # If you return a tuple of two dicts, the second one becomes # the extra headers. # return result, { headers['DEPRECATION-WARNING'] = model.deprecation_warning.replace( '\n', ' ') if model.cache_seconds: # We can set a Cache-Control header. # We say 'private' because the content can depend on the user # and we don't want the response to be collected in HTTP proxies # by mistake. headers['Cache-Control'] = 'private, max-age={}'.format( model.cache_seconds) return result, headers
def model_wrapper(request, model_name): if model_name in API_DONT_SERVE_LIST: raise http.Http404("Don't know what you're talking about!") model = None for source in MODELS_MODULES: try: model = getattr(source, model_name) break except AttributeError: pass try: model = getattr(source, model_name + "Middleware") except AttributeError: pass if model is None or not is_valid_model_class(model): raise http.Http404("no service called `%s`" % model_name) required_permissions = getattr(model(), "API_REQUIRED_PERMISSIONS", None) if required_permissions and ( not request.user.is_active or not request.user.has_perms(required_permissions)): permission_names = [] for permission in required_permissions: codename = permission.split(".", 1)[1] try: permission_names.append( Permission.objects.get(codename=codename).name) except Permission.DoesNotExist: permission_names.append(codename) # you're not allowed to use this model return http.JsonResponse( { "error": "Use of this endpoint requires the '%s' permission" % (", ".join(permission_names), ) }, status=403, ) instance = model() # Any additional headers we intend to set on the response headers = {} # Certain models need to know who the user is to be able to # internally use that to determine its output. instance.api_user = request.user if request.method == "POST": function = instance.post else: function = instance.get if not function: return http.HttpResponseNotAllowed([request.method]) # assume first that it won't need a binary response binary_response = False request_data = request.method == "GET" and request.GET or request.POST form = MiddlewareModelForm(model, request_data) if form.is_valid(): try: result = function(**form.cleaned_data) except NOT_FOUND_EXCEPTIONS as exception: return http.HttpResponseNotFound( json.dumps({ "error": ("%s: %s" % (type(exception).__name__, exception)) }), content_type="application/json; charset=UTF-8", ) except BAD_REQUEST_EXCEPTIONS as exception: return http.HttpResponseBadRequest( json.dumps({ "error": ("%s: %s" % (type(exception).__name__, exception)) }), content_type="application/json; charset=UTF-8", ) # Some models allows to return a binary reponse. It does so based on # the models `BINARY_RESPONSE` dict in which all keys and values # need to be in the valid query. For example, if the query is # `?foo=bar&other=thing&bar=baz` and the `BINARY_RESPONSE` dict is # exactly: {'foo': 'bar', 'bar': 'baz'} it will return a binary # response with content type `application/octet-stream`. for key, value in model.API_BINARY_RESPONSE.items(): if form.cleaned_data.get(key) == value: binary_response = True else: binary_response = False break if binary_response: # if you don't have all required permissions, you'll get a 403 required_permissions = model.API_BINARY_PERMISSIONS if required_permissions and not request.user.has_perms( required_permissions): permission_names = [] for permission in required_permissions: codename = permission.split(".", 1)[1] try: permission_names.append( Permission.objects.get(codename=codename).name) except Permission.DoesNotExist: permission_names.append(codename) # you're not allowed to get the binary response return http.HttpResponseForbidden( "Binary response requires the '%s' permission\n" % (", ".join(permission_names))) elif not request.user.has_perm("crashstats.view_pii"): if callable(model.API_ALLOWLIST): allowlist = model.API_ALLOWLIST() else: allowlist = model.API_ALLOWLIST if result and allowlist: cleaner = Cleaner( allowlist, # if True, uses warnings.warn() to show fields # not allowlisted debug=settings.DEBUG, ) cleaner.start(result) else: # custom override of the status code return {"errors": dict(form.errors)}, 400 if binary_response: assert model.API_BINARY_FILENAME, "No API_BINARY_FILENAME set on model" response = http.HttpResponse(result, content_type="application/octet-stream") filename = model.API_BINARY_FILENAME % form.cleaned_data response[ "Content-Disposition"] = 'attachment; filename="%s"' % filename return response if getattr(model, "deprecation_warning", False): if isinstance(result, dict): result["DEPRECATION_WARNING"] = model.deprecation_warning headers["DEPRECATION-WARNING"] = model.deprecation_warning.replace( "\n", " ") if model.cache_seconds: # We can set a Cache-Control header. # We say 'private' because the content can depend on the user # and we don't want the response to be collected in HTTP proxies # by mistake. headers["Cache-Control"] = f"private, max-age={model.cache_seconds}" return result, headers
def model_wrapper(request, model_name): if model_name in BLACKLIST: raise http.Http404("Don't know what you're talking about!") model = None for source in MODELS_MODULES: try: model = getattr(source, model_name) break except AttributeError: pass try: model = getattr(source, model_name + 'Middleware') except AttributeError: pass if model is None or not is_valid_model_class(model): raise http.Http404('no service called `%s`' % model_name) required_permissions = getattr(model(), 'API_REQUIRED_PERMISSIONS', None) if isinstance(required_permissions, six.string_types): required_permissions = [required_permissions] if ( required_permissions and ( not request.user.is_active or not has_permissions(request.user, required_permissions) ) ): permission_names = [] for permission in required_permissions: codename = permission.split('.', 1)[1] try: permission_names.append(Permission.objects.get(codename=codename).name) except Permission.DoesNotExist: permission_names.append(codename) # you're not allowed to use this model return http.JsonResponse({ 'error': "Use of this endpoint requires the '%s' permission" % ( ', '.join(permission_names), ) }, status=403) # it being set to None means it's been deliberately disabled if getattr(model, 'API_WHITELIST', False) is False: raise APIWhitelistError('No API_WHITELIST defined for %r' % model) instance = model() # Any additional headers we intend to set on the response headers = {} # Certain models need to know who the user is to be able to # internally use that to determine its output. instance.api_user = request.user if request.method == 'POST': function = instance.post else: function = instance.get if not function: return http.HttpResponseNotAllowed([request.method]) # assume first that it won't need a binary response binary_response = False request_data = request.method == 'GET' and request.GET or request.POST form = FormWrapper(model, request_data) if form.is_valid(): try: result = function(**form.cleaned_data) except ValueError as e: if 'No JSON object could be decoded' in e: return http.HttpResponseBadRequest( json.dumps({'error': 'Not a valid JSON response'}), content_type='application/json; charset=UTF-8' ) raise except NOT_FOUND_EXCEPTIONS as exception: return http.HttpResponseNotFound( json.dumps({'error': ('%s: %s' % (type(exception).__name__, exception))}), content_type='application/json; charset=UTF-8' ) except BAD_REQUEST_EXCEPTIONS as exception: return http.HttpResponseBadRequest( json.dumps({'error': ('%s: %s' % (type(exception).__name__, exception))}), content_type='application/json; charset=UTF-8' ) # Some models allows to return a binary reponse. It does so based on # the models `BINARY_RESPONSE` dict in which all keys and values # need to be in the valid query. For example, if the query is # `?foo=bar&other=thing&bar=baz` and the `BINARY_RESPONSE` dict is # exactly: {'foo': 'bar', 'bar': 'baz'} it will return a binary # response with content type `application/octet-stream`. for key, value in model.API_BINARY_RESPONSE.items(): if form.cleaned_data.get(key) == value: binary_response = True else: binary_response = False break if binary_response: # if you don't have all required permissions, you'll get a 403 required_permissions = model.API_BINARY_PERMISSIONS if isinstance(required_permissions, six.string_types): required_permissions = [required_permissions] if required_permissions and not has_permissions(request.user, required_permissions): permission_names = [] for permission in required_permissions: codename = permission.split('.', 1)[1] try: permission_names.append(Permission.objects.get(codename=codename).name) except Permission.DoesNotExist: permission_names.append(codename) # you're not allowed to get the binary response return http.HttpResponseForbidden( "Binary response requires the '%s' permission\n" % (', '.join(permission_names)) ) elif not request.user.has_perm('crashstats.view_pii'): if callable(model.API_WHITELIST): whitelist = model.API_WHITELIST() else: whitelist = model.API_WHITELIST if result and whitelist: cleaner = Cleaner( whitelist, # if True, uses warnings.warn() to show fields # not whitelisted debug=settings.DEBUG, ) cleaner.start(result) else: # custom override of the status code return {'errors': dict(form.errors)}, 400 if binary_response: assert model.API_BINARY_FILENAME, 'No API_BINARY_FILENAME set on model' response = http.HttpResponse(result, content_type='application/octet-stream') filename = model.API_BINARY_FILENAME % form.cleaned_data response['Content-Disposition'] = 'attachment; filename="%s"' % filename return response if getattr(model, 'deprecation_warning', False): if isinstance(result, dict): result['DEPRECATION_WARNING'] = model.deprecation_warning # If you return a tuple of two dicts, the second one becomes # the extra headers. # return result, { headers['DEPRECATION-WARNING'] = model.deprecation_warning.replace('\n', ' ') if model.cache_seconds: # We can set a Cache-Control header. # We say 'private' because the content can depend on the user # and we don't want the response to be collected in HTTP proxies # by mistake. headers['Cache-Control'] = 'private, max-age={}'.format(model.cache_seconds) return result, headers