async def test_slow_server(host): if not pytest.enable_microbatch: pytest.skip() A, B = 0.2, 1 data = '{"a": %s, "b": %s}' % (A, B) time_start = time.time() req_count = 10 tasks = tuple( pytest.assert_request( "POST", f"http://{host}/echo_with_delay", headers=(("Content-Type", "application/json"), ), data=data, timeout=30, assert_status=200, assert_data=data.encode(), ) for i in range(req_count)) await asyncio.gather(*tasks) assert time.time() - time_start < 12 req_count = 100 tasks = tuple( pytest.assert_request( "POST", f"http://{host}/echo_with_delay", headers=(("Content-Type", "application/json"), ), data=data, assert_status=lambda i: i in (200, 429), ) for i in range(req_count)) await asyncio.gather(*tasks)
async def test_batch_size_limit(host): A, B = 0.0002, 0.01 data = '{"a": %s, "b": %s}' % (A, B) # test for max_batch_size=None tasks = tuple( pytest.assert_request( "POST", f"http://{host}/echo_batch_size", headers=(("Content-Type", "application/json"), ), data=data, assert_status=lambda i: i in (200, 429), ) for _ in range(100)) await asyncio.gather(*tasks) await asyncio.sleep(1) batch_bucket = [] tasks = tuple( pytest.assert_request( "POST", f"http://{host}/echo_batch_size", headers=(("Content-Type", "application/json"), ), data=data, assert_status=200, assert_data=lambda d: (d == b'429: Too Many Requests' or batch_bucket.append( int(d.decode())) or True), ) for _ in range(30)) await asyncio.gather(*tasks) # batch size would be 1 only because of the bentoml_config.yml # microbatch.max_batch_size=1 assert all(b == 1 for b in batch_bucket), batch_bucket
async def test_tensorflow_2_artifact_with_docker(host): await pytest.assert_request( "POST", f"http://{host}/predict1", headers=(("Content-Type", "application/json"),), data=json.dumps({"instances": test_data}), assert_status=200, assert_data=b'[[15.0]]', ) await pytest.assert_request( "POST", f"http://{host}/predict2", headers=(("Content-Type", "application/json"),), data=json.dumps({"instances": test_data}), assert_status=200, assert_data=b'[[15.0]]', ) tasks = tuple( pytest.assert_request( "POST", f"http://{host}/predict3", headers=(("Content-Type", "application/json"),), data=json.dumps(i), assert_status=200, assert_data=b'[15.0]', ) for i in ragged_data ) await asyncio.gather(*tasks)
async def test_api_server_inference_result(host): req_count = 2 if pytest.enable_microbatch else 1 tasks = tuple( pytest.assert_request( "POST", f"http://{host}/predict_direct_json", headers=(("Content-Type", "application/json"), ), data=json.dumps({"in": i}), assert_status=200, assert_data=bytes('{"in": %s}' % i, 'ascii'), ) for i in range(req_count)) tasks += tuple( pytest.assert_request( "POST", f"http://{host}/predict_direct_json", data=json.dumps({"in": i}), assert_status=400, ) for i in range(req_count)) await asyncio.gather(*tasks)
async def test_api_server_json(host): req_count = 3 tasks = tuple( pytest.assert_request( "POST", f"http://{host}/predict_json", headers=(("Content-Type", "application/json"), ), data=json.dumps({"in": i}), assert_data=bytes('{"in": %s}' % i, 'ascii'), ) for i in range(req_count)) await asyncio.gather(*tasks)