def test_batch(self): max_count = get_limit_config(self.config, "max_post_records") max_bytes = get_limit_config(self.config, "max_post_bytes") self.assertEquals(max_bytes, 1024 * 1024) # Test that batch uploads are correctly processed. # Uploading max_count-5 small objects should succeed. wbos = [{'id': str(i), 'payload': 'X'} for i in range(max_count - 5)] res = self.app.post_json(self.root + '/storage/col2', wbos) res = res.json self.assertEquals(len(res['success']), max_count - 5) self.assertEquals(len(res['failed']), 0) # Uploading max_count+5 items should produce five failures. wbos = [{'id': str(i), 'payload': 'X'} for i in range(max_count + 5)] res = self.app.post_json(self.root + '/storage/col2', wbos) res = res.json self.assertEquals(len(res['success']), max_count) self.assertEquals(len(res['failed']), 5) # The test config has max_bytes=1M. # Uploading 5 210MB items should produce one failure. wbos = [{'id': str(i), 'payload': "X" * (210 * 1024)} for i in range(5)] res = self.app.post_json(self.root + '/storage/col2', wbos) res = res.json self.assertEquals(len(res['success']), 4) self.assertEquals(len(res['failed']), 1)
def extract_batch_state(request): """Validator to extract the batch state of a request for slightly tidier code in the views. If the "batch" parameter is has no value or has a value of "true" then a new batch will be created. If the "commit" parameter is has a value of "true", this batch is to be committed and deleted. """ # Don't extract or validate any of these params # if the batch-upload feature is disabled. settings = request.registry.settings if not settings.get("storage.batch_upload_enabled", False): return request.validated["batch"] = False batch_id = request.GET.get("batch") if batch_id is not None: if TRUE_REGEX.match(batch_id): batch_id = True else: try: batch_id = int(b64decode(batch_id)) except TypeError: try: batch_id = int(batch_id) except ValueError: msg = "Invalid batch ID: \"%s\"" % (batch_id,) request.errors.add("batch", "id", msg) request.validated["batch"] = batch_id elif batch_id is None and "batch" in request.GET: request.validated["batch"] = True request.validated["commit"] = False commit = request.GET.get("commit") if commit is not None: if TRUE_REGEX.match(commit): request.validated["commit"] = True else: msg = "commit parameter must be \"true\" to apply batches" request.errors.add("batch", "commit", msg) LIMITS = ( ("X-Weave-Records", "max_post_records"), ("X-Weave-Bytes", "max_post_bytes"), ("X-Weave-Total-Records", "max_total_records"), ("X-Weave-Total-Bytes", "max_total_bytes"), ) for (header, setting) in LIMITS: try: count = int(request.headers[header]) except ValueError: msg = "Invalid integer value: %s" % (request.headers[header],) request.errors.add("header", header, msg) continue except KeyError: continue if count > get_limit_config(request, setting): raise json_error(400, "size-limit-exceeded")
def get_info_configuration(request): # Don't return batch-related limits if the feature isn't enabled. if request.registry.settings.get("storage.batch_upload_enabled", False): LIMIT_NAMES = ("max_post_records", "max_post_bytes", "max_total_records", "max_total_bytes") else: LIMIT_NAMES = ("max_request_bytes",) limits = {} for name in LIMIT_NAMES: limits[name] = get_limit_config(request, name) # This limit is hard-coded for now. limits["max_record_payload_bytes"] = MAX_PAYLOAD_SIZE return limits
def get_info_configuration(request): # Don't return batch-related limits if the feature isn't enabled. if request.registry.settings.get("storage.batch_upload_enabled", False): LIMIT_NAMES = ( "max_post_records", "max_post_bytes", "max_total_records", "max_total_bytes", ) else: LIMIT_NAMES = ("max_request_bytes", ) limits = {} for name in LIMIT_NAMES: limits[name] = get_limit_config(request, name) # This limit is hard-coded for now. limits["max_record_payload_bytes"] = MAX_PAYLOAD_SIZE return limits
def parse_multiple_bsos(request): """Validator to parse a list of BSOs from the request body. This validator accepts a list of BSOs in either application/json or application/newlines format, parses and validates them. Valid BSOs are placed under the key "bsos". Invalid BSOs are placed under the key "invalid_bsos". """ content_type = request.content_type try: if content_type in ("application/json", "text/plain", None): bso_datas = json_loads(request.body) elif content_type == "application/newlines": bso_datas = [json_loads(ln) for ln in request.body.split("\n")] else: msg = "Unsupported Media Type: %s" % (content_type, ) request.errors.add("header", "Content-Type", msg) request.errors.status = 415 return except ValueError: request.errors.add("body", "bsos", "Invalid JSON in request body") return if not isinstance(bso_datas, (tuple, list)): request.errors.add("body", "bsos", "Input data was not a list") return BATCH_MAX_COUNT = get_limit_config(request, "max_post_records") BATCH_MAX_BYTES = get_limit_config(request, "max_post_bytes") valid_bsos = {} invalid_bsos = {} total_bytes = 0 for count, bso_data in enumerate(bso_datas): try: bso = BSO(bso_data) except ValueError: msg = "Input data was not a list of BSOs" request.errors.add("body", "bsos", msg) return try: id = bso["id"] except KeyError: request.errors.add("body", "bsos", "Input BSO has no ID") return if id in valid_bsos: request.errors.add("body", "bsos", "Input BSO has duplicate ID") return consistent, msg = bso.validate() if not consistent: invalid_bsos[id] = msg # Log status on how many invalid BSOs we get, and why. logmsg = "Invalid BSO %s/%s/%s (%s): %s" userid = request.matchdict["userid"] collection = request.matchdict.get("collection") logger.info(logmsg, userid, collection, id, msg, bso) continue if count >= BATCH_MAX_COUNT: invalid_bsos[id] = "retry bso" continue total_bytes += len(bso.get("payload", "")) if total_bytes >= BATCH_MAX_BYTES: invalid_bsos[id] = "retry bytes" continue valid_bsos[id] = bso request.validated["bsos"] = valid_bsos.values() request.validated["invalid_bsos"] = invalid_bsos
def extract_batch_state(request): """Validator to extract the batch state of a request for slightly tidier code in the views. If the "batch" parameter is has no value or has a value of "true" then a new batch will be created. If the "commit" parameter is has a value of "true", this batch is to be committed and deleted. """ request.validated["batch"] = False batch_id = request.GET.get("batch") if batch_id is not None: if TRUE_REGEX.match(batch_id): batch_id = True else: try: batch_id = int(b64decode(batch_id)) except TypeError: try: batch_id = int(batch_id) except ValueError: msg = "Invalid batch ID: \"%s\"" % (batch_id, ) request.errors.add("batch", "id", msg) request.validated["batch"] = batch_id elif batch_id is None and "batch" in request.GET: request.validated["batch"] = True request.validated["commit"] = False commit = request.GET.get("commit") if commit is not None: if TRUE_REGEX.match(commit): request.validated["commit"] = True else: msg = "commit parameter must be \"true\" to apply batches" request.errors.add("batch", "commit", msg) # If batch uploads are not enabled in the config then # we want to: # * silently ignore attempts to start a new batch, which # will cause clients to fall back to non-batch mode. # * error out on attempts to continue an existing batch, # since we can't possibly do what the client expects. settings = request.registry.settings if not settings.get("storage.batch_upload_enabled", False): if request.validated["batch"]: if request.validated["batch"] is not True: request.errors.add("batch", "id", "Batch uploads disabled") LIMITS = ( ("X-Weave-Records", "max_post_records"), ("X-Weave-Bytes", "max_post_bytes"), ("X-Weave-Total-Records", "max_total_records"), ("X-Weave-Total-Bytes", "max_total_bytes"), ) for (header, setting) in LIMITS: try: count = int(request.headers[header]) except ValueError: msg = "Invalid integer value: %s" % (request.headers[header], ) request.errors.add("header", header, msg) continue except KeyError: continue if count > get_limit_config(request, setting): raise json_error(400, "size-limit-exceeded")
def parse_multiple_bsos(request): """Validator to parse a list of BSOs from the request body. This validator accepts a list of BSOs in either application/json or application/newlines format, parses and validates them. Valid BSOs are placed under the key "bsos". Invalid BSOs are placed under the key "invalid_bsos". """ content_type = request.content_type try: if content_type in ("application/json", "text/plain", None): bso_datas = json_loads(request.body) elif content_type == "application/newlines": bso_datas = [json_loads(ln) for ln in request.body.split("\n")] else: msg = "Unsupported Media Type: %s" % (content_type,) request.errors.add("header", "Content-Type", msg) request.errors.status = 415 return except ValueError: request.errors.add("body", "bsos", "Invalid JSON in request body") return if not isinstance(bso_datas, (tuple, list)): request.errors.add("body", "bsos", "Input data was not a list") return BATCH_MAX_COUNT = get_limit_config(request, "max_post_records") BATCH_MAX_BYTES = get_limit_config(request, "max_post_bytes") valid_bsos = {} invalid_bsos = {} total_bytes = 0 for count, bso_data in enumerate(bso_datas): try: bso = BSO(bso_data) except ValueError: msg = "Input data was not a list of BSOs" request.errors.add("body", "bsos", msg) return try: id = bso["id"] except KeyError: request.errors.add("body", "bsos", "Input BSO has no ID") return if id in valid_bsos: request.errors.add("body", "bsos", "Input BSO has duplicate ID") return consistent, msg = bso.validate() if not consistent: invalid_bsos[id] = msg # Log status on how many invalid BSOs we get, and why. logmsg = "Invalid BSO %s/%s/%s (%s): %s" userid = request.matchdict["userid"] collection = request.matchdict.get("collection") logger.info(logmsg, userid, collection, id, msg, bso) continue if count >= BATCH_MAX_COUNT: invalid_bsos[id] = "retry bso" continue total_bytes += len(bso.get("payload", "")) if total_bytes >= BATCH_MAX_BYTES: invalid_bsos[id] = "retry bytes" continue valid_bsos[id] = bso request.validated["bsos"] = valid_bsos.values() request.validated["invalid_bsos"] = invalid_bsos
def extract_batch_state(request): """Validator to extract the batch state of a request for slightly tidier code in the views. If the "batch" parameter is has no value or has a value of "true" then a new batch will be created. If the "commit" parameter is has a value of "true", this batch is to be committed and deleted. """ request.validated["batch"] = False batch_id = request.GET.get("batch") if batch_id is not None: if TRUE_REGEX.match(batch_id): batch_id = True else: try: batch_id = int(b64decode(batch_id)) except TypeError: try: batch_id = int(batch_id) except ValueError: msg = "Invalid batch ID: \"%s\"" % (batch_id,) request.errors.add("batch", "id", msg) request.validated["batch"] = batch_id elif batch_id is None and "batch" in request.GET: request.validated["batch"] = True request.validated["commit"] = False commit = request.GET.get("commit") if commit is not None: if TRUE_REGEX.match(commit): request.validated["commit"] = True else: msg = "commit parameter must be \"true\" to apply batches" request.errors.add("batch", "commit", msg) # If batch uploads are not enabled in the config then # we want to: # * silently ignore attempts to start a new batch, which # will cause clients to fall back to non-batch mode. # * error out on attempts to continue an existing batch, # since we can't possibly do what the client expects. settings = request.registry.settings if not settings.get("storage.batch_upload_enabled", False): if request.validated["batch"]: if request.validated["batch"] is not True: request.errors.add("batch", "id", "Batch uploads disabled") LIMITS = ( ("X-Weave-Records", "max_post_records"), ("X-Weave-Bytes", "max_post_bytes"), ("X-Weave-Total-Records", "max_total_records"), ("X-Weave-Total-Bytes", "max_total_bytes"), ) for (header, setting) in LIMITS: try: count = int(request.headers[header]) except ValueError: msg = "Invalid integer value: %s" % (request.headers[header],) request.errors.add("header", header, msg) continue except KeyError: continue if count > get_limit_config(request, setting): raise json_error(400, "size-limit-exceeded")