Ejemplo n.º 1
0
def test_update_dynamic_profiling_result():
    model = ModelService.get_models('ResNet50')[0]
    dummy_info_tuple = InfoTuple(avg=1, p50=1, p95=1, p99=1)
    updated_info_tuple = InfoTuple(avg=1, p50=2, p95=1, p99=1)
    dpr = DynamicProfileResultBO(device_id='gpu:01',
                                 device_name='Tesla K40c',
                                 batch=1,
                                 memory=ProfileMemory(1000, 2000, 0.5),
                                 latency=ProfileLatency(
                                     init_latency=dummy_info_tuple,
                                     preprocess_latency=dummy_info_tuple,
                                     inference_latency=updated_info_tuple,
                                     postprocess_latency=dummy_info_tuple,
                                 ),
                                 throughput=ProfileThroughput(
                                     batch_formation_throughput=1,
                                     preprocess_throughput=1,
                                     inference_throughput=1,
                                     postprocess_throughput=1,
                                 ))
    # check update
    assert ModelService.update_dynamic_profiling_result(model.id, dpr)
    # check result
    model = ModelService.get_models('ResNet50')[0]
    assert model.profile_result.dynamic_results[0].memory.memory_usage == 2000
    assert model.profile_result.dynamic_results[
        0].latency.inference_latency.p50 == 2
Ejemplo n.º 2
0
    def diagnose(self,
                 batch_size: int = None,
                 device='cuda',
                 timeout=30) -> DynamicProfileResultBO:
        """Start diagnosing and profiling model.

        Args:
            batch_size (int): Batch size.
            device (str): Device name.
            timeout (float): Waiting for docker container timeout in second. Default timeout period is 30s.
        """
        model_status = False
        retry_time = 0  # use binary exponential backoff algorithm
        tick = time.time()
        while time.time() - tick < timeout:
            if self.inspector.check_model_status():
                model_status = True
                break
            retry_time += 1
            # get backoff time in s
            backoff_time = random.randint(0, 2**retry_time - 1) * 1e-3
            time.sleep(backoff_time)

        if not model_status:  # raise an error as model is not served.
            raise ServiceException('Model not served!')

        if batch_size is not None:
            self.inspector.set_batch_size(batch_size)

        result = self.inspector.run_model(server_name=self.server_name,
                                          device=device)

        dpr_bo = DynamicProfileResultBO(
            ip=get_ip(),
            device_id=result['device_id'],
            device_name=result['device_name'],
            batch=result['batch_size'],
            memory=ProfileMemory(
                total_memory=result['total_gpu_memory'],
                memory_usage=result['gpu_memory_used'],
                utilization=result['gpu_utilization'],
            ),
            latency=ProfileLatency(inference_latency=result['latency'], ),
            throughput=ProfileThroughput(
                inference_throughput=result['total_throughput']),
            create_time=result['completed_time'],
        )

        return dpr_bo
Ejemplo n.º 3
0
def test_register_dynamic_profiling_result():
    model = ModelService.get_models('ResNet50')[0]
    dummy_info_tuple = InfoTuple(avg=1, p50=1, p95=1, p99=1)
    dpr = DynamicProfileResultBO(device_id='gpu:01',
                                 device_name='Tesla K40c',
                                 batch=1,
                                 memory=ProfileMemory(1000, 1000, 0.5),
                                 latency=ProfileLatency(
                                     init_latency=dummy_info_tuple,
                                     preprocess_latency=dummy_info_tuple,
                                     inference_latency=dummy_info_tuple,
                                     postprocess_latency=dummy_info_tuple,
                                 ),
                                 throughput=ProfileThroughput(
                                     batch_formation_throughput=1,
                                     preprocess_throughput=1,
                                     inference_throughput=1,
                                     postprocess_throughput=1,
                                 ))
    assert ModelService.append_dynamic_profiling_result(model.id, dpr)
Ejemplo n.º 4
0
def test_delete_dynamic_profiling_result():
    model = ModelService.get_models('ResNet50')[0]
    dummy_info_tuple1 = InfoTuple(avg=1, p50=1, p95=1, p99=2)
    dummy_info_tuple2 = InfoTuple(avg=1, p50=1, p95=1, p99=1)

    dpr = DynamicProfileResultBO(device_id='gpu:02',
                                 device_name='Tesla K40c',
                                 batch=1,
                                 memory=ProfileMemory(1000, 1000, 0.5),
                                 latency=ProfileLatency(
                                     init_latency=dummy_info_tuple1,
                                     preprocess_latency=dummy_info_tuple2,
                                     inference_latency=dummy_info_tuple2,
                                     postprocess_latency=dummy_info_tuple2,
                                 ),
                                 throughput=ProfileThroughput(
                                     batch_formation_throughput=1,
                                     preprocess_throughput=1,
                                     inference_throughput=1,
                                     postprocess_throughput=1,
                                 ))
    ModelService.append_dynamic_profiling_result(model.id, dpr)

    # reload
    model = ModelService.get_models('ResNet50')[0]
    dpr_bo = model.profile_result.dynamic_results[0]
    dpr_bo2 = model.profile_result.dynamic_results[1]

    # check delete
    assert ModelService.delete_dynamic_profiling_result(
        model.id, dpr_bo.ip, dpr_bo.device_id)

    # check result
    model = ModelService.get_models('ResNet50')[0]
    assert len(model.profile_result.dynamic_results) == 1

    dpr_left = model.profile_result.dynamic_results[0]
    assert dpr_bo2.latency.init_latency.avg == dpr_left.latency.init_latency.avg