예제 #1
0
파일: server.py 프로젝트: kevinaer/sparseml
    def predict():
        # load raw images
        raw_data = flask.request.get_data()
        inputs = bytes_to_arrays(raw_data)
        print(f"Received {len(inputs)} images from client")

        # pre-processing
        preprocess_start_time = time.time()
        if not args.quantized_inputs:
            inputs = [inputs[0].astype(numpy.float32) / 255.0]
        preprocess_time = time.time() - preprocess_start_time
        print(f"Pre-processing time: {preprocess_time * 1000.0:.4f}ms")

        # inference
        print("Executing model")
        outputs, elapsed_time = engine.timed_run(inputs)
        print(f"Inference time: {elapsed_time * 1000.0:.4f}ms")

        # post-processing
        postprocess_start_time = time.time()
        outputs = postprocessor.pre_nms_postprocess(outputs)
        postprocess_time = time.time() - postprocess_start_time
        print(f"Post-processing, pre-nms time: {postprocess_time * 1000.0:.4f}ms")

        # NMS
        nms_start_time = time.time()
        outputs = postprocess_nms(outputs)
        nms_time = time.time() - nms_start_time
        print(f"nms time: {nms_time * 1000.0:.4f}ms")

        return arrays_to_bytes(outputs)
예제 #2
0
def annotate(args):
    save_dir = _get_save_dir(args)
    model = _load_model(args)
    loader, saver, is_video = get_yolo_loader_and_saver(
        args.source, save_dir, args.image_shape, args)
    is_webcam = args.source.isnumeric()

    postprocessor = (YoloPostprocessor(args.image_shape) if args.engine
                     in [DEEPSPARSE_ENGINE, ORT_ENGINE] else None)

    for iteration, (inp, source_img) in enumerate(loader):
        if args.device not in ["cpu", None]:
            torch.cuda.synchronize()
        iter_start = time.time()

        # pre-processing
        batch = _preprocess_batch(args, inp)

        # inference
        outputs = _run_model(args, model, batch)

        # post-processing
        if postprocessor:
            outputs = postprocessor.pre_nms_postprocess(outputs)

        # NMS
        outputs = postprocess_nms(outputs)[0]

        if args.device not in ["cpu", None]:
            torch.cuda.synchronize()

        # annotate
        measured_fps = (args.target_fps or
                        (1.0 /
                         (time.time() - iter_start)) if is_video else None)
        annotated_img = annotate_image(
            source_img,
            outputs,
            model_input_size=args.image_shape,
            images_per_sec=measured_fps,
        )

        # display
        if is_webcam:
            cv2.imshow("annotations", annotated_img)
            cv2.waitKey(1)

        # save
        if saver:
            saver.save_frame(annotated_img)

        iter_end = time.time()
        elapsed_time = 1000 * (iter_end - iter_start)
        _LOGGER.info(f"Inference {iteration} processed in {elapsed_time} ms")

    if saver:
        saver.close()
    _LOGGER.info(f"Results saved to {save_dir}")
예제 #3
0
def benchmark_yolo(args):
    model = _load_model(args)
    print("Loading dataset")
    dataset, _ = load_images(args.data_path, tuple(args.image_shape))
    total_iterations = args.num_iterations + args.num_warmup_iterations
    data_loader = _iter_batches(dataset, args.batch_size, total_iterations)

    print(
        (f"Running for {args.num_warmup_iterations} warmup iterations "
         f"and {args.num_iterations} benchmarking iterations"),
        flush=True,
    )

    postprocessor = (YoloPostprocessor(args.image_shape) if args.engine
                     in [DEEPSPARSE_ENGINE, ORT_ENGINE] else None)

    results = BenchmarkResults()
    progress_bar = tqdm(total=args.num_iterations)

    for iteration, batch in enumerate(data_loader):
        if args.device not in ["cpu", None]:
            torch.cuda.synchronize()
        iter_start = time.time()

        # pre-processing
        batch = _preprocess_batch(args, batch)

        # inference
        outputs = _run_model(args, model, batch)

        # post-processing
        if postprocessor:
            outputs = postprocessor.pre_nms_postprocess(outputs)

        # NMS
        outputs = postprocess_nms(outputs)

        if args.device not in ["cpu", None]:
            torch.cuda.synchronize()
        iter_end = time.time()

        if iteration >= args.num_warmup_iterations:
            results.append_batch(
                time_start=iter_start,
                time_end=iter_end,
                batch_size=args.batch_size,
            )
            progress_bar.update(1)

    progress_bar.close()

    print(f"Benchmarking complete. End-to-end results:\n{results}")

    print(
        f"End-to-end per image time: {results.ms_per_batch / args.batch_size}ms"
    )