コード例 #1
0
import cv2 as cv
import time

from openvino.inference_engine import IECore

face_xml = "./intel/face-detection-0205/FP16-INT8/face-detection-0205.xml"
face_bin = "./intel/face-detection-0205/FP16-INT8/face-detection-0205.bin"

ie = IECore()
for device in ie.available_devices:
    print(device)

# Read IR
net = ie.read_network(model=face_xml, weights=face_bin)

input_blob = next(iter(net.input_info))
out_blob = next(iter(net.outputs))

# 输入设置
n, c, h, w = net.input_info[input_blob].input_data.shape

# 设备关联推理创建
exec_net = ie.load_network(network=net, device_name="CPU")

# cap = cv.VideoCapture("./people-detection.mp4")
cap = cv.VideoCapture(0)
while True:

    inf_start = time.time()
    ret, src = cap.read()
    if ret is not True:
コード例 #2
0
def test_batch_size_getter():
    ie = IECore()
    net = ie.read_network(model=test_net_xml, weights=test_net_bin)
    assert net.batch_size == 1
コード例 #3
0
def test_name():
    ie = IECore()
    net = ie.read_network(model=test_net_xml, weights=test_net_bin)
    assert net.name == "test_model"
コード例 #4
0
def main():
    log.basicConfig(format="[ %(levelname)s ] %(message)s",
                    level=log.INFO,
                    stream=sys.stdout)
    args = build_argparser().parse_args()
    model_xml = args.model
    model_bin = os.path.splitext(model_xml)[0] + ".bin"

    # Plugin initialization for specified device and load extensions library if specified
    #plugin = IEPlugin(device=args.device, plugin_dirs=args.plugin_dir)
    ie = IECore()
    #print(dir(IEPlugin))
    if args.cpu_extension and 'CPU' in args.device:
        #plugin.add_cpu_extension(args.cpu_extension)
        ie.add_extension(args.cpu_extension, args.device)
    else:
        ie.set_config({"PERF_COUNT": "YES"}, "GPU")
    # Read IR
    log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin))
    net = IENetwork(model=model_xml, weights=model_bin)

    if args.device == "CPU":
        supported_layers = ie.query_network(net, args.device)
        not_supported_layers = [
            l for l in net.layers.keys() if l not in supported_layers
        ]
        if len(not_supported_layers) != 0:
            log.error(
                "Following layers are not supported by the plugin for specified device {}:\n {}"
                .format(plugin.device, ', '.join(not_supported_layers)))
            log.error(
                "Please try to specify cpu extensions library path in sample's command line parameters using -l "
                "or --cpu_extension command line argument")
            sys.exit(1)

    assert len(
        net.inputs.keys()) == 1, "Sample supports only single input topologies"
    assert len(
        net.outputs) == 1, "Sample supports only single output topologies"

    log.info("Preparing input blobs")
    input_blob = next(iter(net.inputs))
    out_blob = next(iter(net.outputs))

    net.batch_size = len(args.input)

    # Read and pre-process input images
    n, c, h, w = net.inputs[input_blob].shape
    print("size is", n)
    images = np.ndarray(shape=(n, c, h, w))
    for i in range(n):
        image = cv2.imread(args.input[i])
        if image.shape[:-1] != (h, w):
            log.warning("Image {} is resized from {} to {}".format(
                args.input[i], image.shape[:-1], (h, w)))
            image = cv2.resize(image, (w, h))
        image = image.transpose(
            (2, 0, 1))  # Change data layout from HWC to CHW
        images[i] = image
    log.info("Batch size is {}".format(n))

    # Loading model to the plugin
    log.info("Loading model to the plugin")
    exec_net = ie.load_network(network=net,
                               num_requests=2,
                               device_name=args.device)

    del net
    # Start sync inference
    log.info("Starting inference ({} iterations)".format(args.number_iter))
    infer_time = []
    for i in range(args.number_iter):
        t0 = time()
        res = exec_net.infer(inputs={input_blob: images})
        infer_time.append((time() - t0) * 1000)
    # Processing output blob
    log.info("Processing output blob")
    res = res[out_blob]
    log.info("Top {} results: ".format(args.number_top))
    args.labels = "models/squeezenet/FP32/squeezenet1.1.labels"
    if args.labels:
        with open(args.labels, 'r') as f:
            labels_map = [x.split(sep=' ', maxsplit=1)[-1].strip() for x in f]
    else:
        labels_map = None
    for i, probs in enumerate(res):
        probs = np.squeeze(probs)
        top_ind = np.argsort(probs)[-args.number_top:][::-1]
        print("Image {}\n".format(args.input[i]))
        for id in top_ind:
            det_label = labels_map[id] if labels_map else "#{}".format(id)
            print("{:<5}{:.7f} label {}".format(id, probs[id], det_label))
        print("\n")
    total_inference = np.sum(np.asarray(infer_time))
    log.info("Average running time of one iteration: {} ms".format(
        np.average(np.asarray(infer_time))))
    log.info("total running time of inference: {} ms".format(total_inference))
    log.info("Throughput: {} FPS".format(
        (1000 * args.number_iter * n) / total_inference))
    print("\n")

    #printing performance counts
    exec_net.requests[0].infer({input_blob: images[0]})
    if args.perf_counts:

        perf_counts = exec_net.requests[0].get_perf_counts()
        print("performance counts:\n")
        total = 0
        for layer, stats in perf_counts.items():
            total += stats['real_time']
            print(
                "{:<40} {:<15} {:<10} {:<15} {:<8} {:<5} {:<5} {:<5} {:<10} {:<15}"
                .format(layer, stats['status'], 'layerType:',
                        stats['layer_type'], 'realTime:', stats['real_time'],
                        'cpu:', stats['cpu_time'], 'execType:',
                        stats['exec_type']))
            print("{:<20} {:<7} {:<20}".format('TotalTime:', total,
                                               'microseconds'))
    log.info("Execution successful")

    del exec_net
    del ie
class Model_FacialLandmarkDetection:
    '''
    Class for the Face Detection Model.
    '''
    def __init__(self, model_name, device='CPU', extensions=None):
        '''
        TODO: Use this to set your instance variables.
	    DONE
        '''
        self.plugin = None
        self.network = None
        self.device = device
        self.extensions = extensions
        self.output_path = "../outputs/"

        self.model_weights = model_name + '.bin'
        self.model_structure = model_name + '.xml'

        try:
            self.model = IENetwork(self.model_structure, self.model_weights)
        except Exception as e:
            raise ValueError(
                "Could not Initialise the network. Have you enterred the correct model path?"
            )

        self.input_name = next(iter(self.model.inputs))
        self.input_shape = self.model.inputs[self.input_name].shape
        self.output_name = next(iter(self.model.outputs))
        self.output_shape = self.model.outputs[self.output_name].shape

    def load_model(self):
        '''
        TODO: You will need to complete this method.
        This method is for loading the model to the device specified by the user.
        If your model requires any Plugins, this is where you can load them.
	    DONE
        '''
        self.plugin = IECore()

        if self.check_model() != True:
            logging.info(
                "Checking whether extensions are available to add to IECore..."
            )

            if (self.extensions != None) and ("CPU" in self.device):
                self.plugin.add_extension(self.extensions, self.device)
                logging.info("Extension added.")
            else:
                logging.error("No extensions available. Exiting with error.")
                exit(1)

        t_0 = time.time()
        self.network = self.plugin.load_network(network=self.model,
                                                device_name=self.device)
        t_1 = time.time()

        with open(os.path.join(self.output_path, 'facial_landmarks.txt'),
                  'w') as f:
            f.write("model_load_time: ")
            f.write(str(t_1 - t_0) + '\n')

    def predict(self, image):
        '''
        TODO: You will need to complete this method.
        This method is meant for running predictions on the input image.
	    DONE
        '''
        # preprocess and prepare input
        p_image = self.preprocess_input(image)
        input_dict = {self.input_name: p_image}

        # run inference
        t_0 = time.time()
        result = self.network.infer(input_dict)
        t_1 = time.time()

        with open(os.path.join(self.output_path, 'facial_landmarks.txt'),
                  'a') as f:
            #f.write("inference_time: ")
            f.write(str(t_1 - t_0) + '\n')

# extract the useful tensor
        outputs = result[self.output_name]

        # get the eye-boxes
        eyes, eye_coords = self.preprocess_output(outputs, image)

        return eyes, eye_coords

    def check_model(self):
        # check for unsupported layers
        supported_layers = self.plugin.query_network(network=self.model,
                                                     device_name=self.device)
        unsupported_layers = [
            l for l in self.model.layers.keys() if l not in supported_layers
        ]

        if len(unsupported_layers) != 0:
            logging.info(
                "Model_FacialLandmarkDetection - Unsupported layers found: {}".
                format(unsupported_layers))
            return False

        return True

    def preprocess_input(self, image):
        '''
        Before feeding the data into the model for inference,
        you might have to preprocess it. This function is where you can do that.
        '''
        input_model_width = self.input_shape[3]
        input_model_height = self.input_shape[2]

        p_image = cv2.resize(image, (input_model_width, input_model_height))
        p_image = p_image.transpose((2, 0, 1))
        p_image = p_image.reshape(1, *p_image.shape)

        return p_image

    def preprocess_output(self, outputs, image):
        '''
        Before feeding the output of this model to the next model,
        you might have to preprocess the output. This function is where you can do that.
        '''
        # get width and height of the original image
        original_width = image.shape[1]
        original_height = image.shape[0]

        # shape of outputs is (1, 10, 1, 1); reshape it
        outputs = outputs.reshape(-1)

        eye_boxes = []
        eye_coords = []
        pix = 15

        # get the left eye
        x_l = int(outputs[0] * original_width)
        y_l = int(outputs[1] * original_height)

        # get the right eye
        x_r = int(outputs[2] * original_width)
        y_r = int(outputs[3] * original_height)

        # left eye box
        left_eye = image[(y_l - pix):(y_l + pix),
                         (x_l - pix):(x_l +
                                      pix)]  # to crop image[y_range, x_range]
        eye_boxes.append(left_eye)
        eye_coords.append([x_l, y_l])
        #cv2.rectangle(image, (x_l - pix, y_l - pix), (x_l + pix, y_l + pix), (0, 55, 255), 1)

        # right eye box
        right_eye = image[(y_r - pix):(y_r + pix),
                          (x_r - pix):(x_r +
                                       pix)]  # to crop image[y_range, x_range]
        eye_boxes.append(right_eye)
        eye_coords.append([x_r, y_r])
        #cv2.rectangle(image, (x_r - pix, y_r - pix), (x_r + pix, y_r + pix), (0, 55, 255), 1)

        return eye_boxes, eye_coords
コード例 #6
0
 def __init__(self, backend_name: str) -> None:
     self.backend_name = backend_name
     log.debug("Creating Inference Engine for %s" % backend_name)
     self.backend = IECore()
     assert backend_name in self.backend.available_devices, (
         'The requested device "' + backend_name + '" is not supported!')
コード例 #7
0
def main():
    args = build_argparser()

    logging.basicConfig(format="[ %(levelname)s ] %(message)s",
                        level=logging.INFO,
                        stream=sys.stdout)
    log = logging.getLogger()

    log.info("Creating Inference Engine")
    ie = IECore()

    if args.device == "CPU" and args.cpu_extension:
        ie.add_extension(args.cpu_extension, 'CPU')

    log.info("Loading model {}".format(args.model))
    net = ie.read_network(args.model, args.model[:-4] + ".bin")

    if len(net.input_info) != 1:
        log.error("Demo supports only models with 1 input layer")
        sys.exit(1)
    input_blob = next(iter(net.input_info))
    input_shape = net.input_info[input_blob].input_data.shape
    if len(net.outputs) != 1:
        log.error("Demo supports only models with 1 output layer")
        sys.exit(1)
    output_blob = next(iter(net.outputs))

    log.info("Loading model to the plugin")
    exec_net = ie.load_network(network=net, device_name=args.device)

    log.info("Preparing input")

    labels = []
    if args.labels:
        with open(args.labels, "r") as file:
            labels = [l.rstrip() for l in file.readlines()]

    batch_size, channels, one, length = input_shape
    if one != 1:
        raise RuntimeError(
            "Wrong third dimension size of model input shape - {} (expected 1)"
            .format(one))

    audio = AudioSource(args.input,
                        channels=channels,
                        samplerate=args.sample_rate)

    hop = length - args.overlap if isinstance(args.overlap, int) else int(
        length * (1.0 - args.overlap))
    if hop < 0:
        log.error(
            "Wrong value for '-ol/--overlap' argument - overlapping more than clip length"
        )
        sys.exit(1)

    log.info("Starting inference")
    outputs = []
    clips = 0
    infer_time = 0
    for idx, chunk in enumerate(
            audio.chunks(length, hop, num_chunks=batch_size)):
        chunk.shape = input_shape
        infer_start_time = time.perf_counter()
        output = exec_net.infer(inputs={input_blob: chunk})
        infer_time += time.perf_counter() - infer_start_time
        clips += batch_size
        output = output[output_blob]
        for batch, data in enumerate(output):
            start_time = (idx * batch_size + batch) * hop / audio.samplerate
            end_time = (
                (idx * batch_size + batch) * hop + length) / audio.samplerate
            outputs.append(data)
            label = np.argmax(data)
            if start_time < audio.duration():
                log.info("[{:.2f}-{:.2f}] - {:6.2%} {:s}".format(
                    start_time, end_time, data[label],
                    labels[label] if labels else "Class {}".format(label)))

    logging.info("Average infer time - {:.1f} ms per clip".format(
        infer_time / clips * 1000))
コード例 #8
0
class FaceDetection:
    """
    Class for the Face Detection Model.
    """
    def __init__(self, model_name, device='CPU', extensions=None):
        self.net = None
        self.plugin = None
        self.input_blob = None
        self.out_blob = None
        self.exec_net = None
        self.model_name = model_name
        self.extensions = extensions
        self.device = device

    def load_model(self):
        """
        TODO: You will need to complete this method.
        This method is for loading the model to the device specified by the user.
        If your model requires any Plugins, this is where you can load them.
        """

        # Fetch XML model
        model_xml = self.model_name
        model_bin = os.path.splitext(model_xml)[0] + ".bin"

        self.plugin = IECore()
        self.net = IENetwork(model=model_xml, weights=model_bin)

        # Add CPU extension to self.plugin and check not supported layers
        if "CPU" in self.device:
            supported_layers = self.plugin.query_network(self.net, self.device)
            not_supported_layers = [
                layer for layer in self.net.layers.keys()
                if layer not in supported_layers
            ]

            if len(not_supported_layers) != 0 and self.device == 'CPU':
                logging.error(f"Unsupported layers: {not_supported_layers}")
                print(f"Not supported layers: {not_supported_layers}")

        # Load model in network
        start_time = time.time()
        self.exec_net = self.plugin.load_network(network=self.net,
                                                 device_name=self.device,
                                                 num_requests=1)
        end_time = time.time()

        # Obtain blob info from network
        self.input_blob = next(iter(self.net.inputs))
        self.out_blob = next(iter(self.net.outputs))

        print(f"Face Detection Model Loading Time: {end_time - start_time}")
        logging.info(
            f"Face Detection Model Loading Time: {end_time - start_time}")

    def predict(self, image, visualize):
        """
        TODO: You will need to complete this method.
        This method is meant for running predictions on the input image.
        """
        # org_img = image.copy
        preprocessed_image = self.preprocess_input(image)
        # infer image
        outputs = self.exec_net.infer({self.input_blob: preprocessed_image})

        coords = self.preprocess_output(outputs)

        if len(coords) == 0:
            logging.warning("No face found in video or image")
            return 0, 0

        coords = coords[0]  # take the first detected face
        height = image.shape[0]
        width = image.shape[1]
        coords = coords * np.array([width, height, width, height])
        coords = coords.astype(np.int32)

        cropped_face = image[coords[1]:coords[3], coords[0]:coords[2]]
        cv2.rectangle(image, (coords[0], coords[1]), (coords[2], coords[3]),
                      (255, 12, 12), 2)

        if visualize:

            # Save Image
            # cv2.imwrite('../output/face_detection1.jpg', cropped_face)
            # cv2.rectangle(image, (coords[0], coords[1]), (coords[2], coords[3]), (255, 12, 12), 2)
            cv2.imshow("Face detected", image)
            cv2.waitKey(0)
        else:
            logging.info("Visualization is off so image is not visible")
        return cropped_face, coords

    def preprocess_input(self, image):
        """
        Before feeding the data into the model for inference,
        you might have to preprocess it. This function is where you can do that.
        """
        preprocessed_image = cv2.resize(image, (672, 384))
        preprocessed_image = preprocessed_image.transpose((2, 0, 1))
        preprocessed_image = preprocessed_image.reshape(1, 3, 384, 672)

        return preprocessed_image

    def preprocess_output(self, outputs):
        """
        Before feeding the output of this model to the next model,
        you might have to preprocess the output. This function is where you can do that.
        """
        coords = []
        outs = outputs[self.out_blob][0][0]
        logging.info(f"Total {len(outs)} face found")
        for out in outs:
            confidence = out[2]
            if confidence > 0.5:  # args.threshold:
                x_min = out[3]
                y_min = out[4]
                x_max = out[5]
                y_max = out[6]
                coords.append([x_min, y_min, x_max, y_max])
            logging.info(f"Face coordinate: {coords}")

        return coords
コード例 #9
0
    def load_models(self, pd_xml, pd_device, lm_xml, lm_device):

        print("Loading Inference Engine")
        self.ie = IECore()
        print("Device info:")
        versions = self.ie.get_versions(pd_device)
        print("{}{}".format(" " * 8, pd_device))
        print("{}MKLDNNPlugin version ......... {}.{}".format(
            " " * 8, versions[pd_device].major, versions[pd_device].minor))
        print("{}Build ........... {}".format(
            " " * 8, versions[pd_device].build_number))

        # Pose detection model
        pd_name = os.path.splitext(pd_xml)[0]
        pd_bin = pd_name + '.bin'
        print(
            "Pose Detection model - Reading network files:\n\t{}\n\t{}".format(
                pd_xml, pd_bin))
        self.pd_net = self.ie.read_network(model=pd_xml, weights=pd_bin)
        # Input blob: input - shape: [1, 3, 128, 128]
        # Output blob: classificators - shape: [1, 896, 1] : scores
        # Output blob: regressors - shape: [1, 896, 12] : bboxes
        self.pd_input_blob = next(iter(self.pd_net.input_info))
        print(
            f"Input blob: {self.pd_input_blob} - shape: {self.pd_net.input_info[self.pd_input_blob].input_data.shape}"
        )
        _, _, self.pd_h, self.pd_w = self.pd_net.input_info[
            self.pd_input_blob].input_data.shape
        for o in self.pd_net.outputs.keys():
            print(f"Output blob: {o} - shape: {self.pd_net.outputs[o].shape}")
            self.pd_scores = "classificators"
            self.pd_bboxes = "regressors"
        print("Loading pose detection model into the plugin")
        self.pd_exec_net = self.ie.load_network(network=self.pd_net,
                                                num_requests=1,
                                                device_name=pd_device)
        self.pd_infer_time_cumul = 0
        self.pd_infer_nb = 0

        self.infer_nb = 0
        self.infer_time_cumul = 0

        # Landmarks model
        if lm_device != pd_device:
            print("Device info:")
            versions = self.ie.get_versions(pd_device)
            print("{}{}".format(" " * 8, pd_device))
            print("{}MKLDNNPlugin version ......... {}.{}".format(
                " " * 8, versions[pd_device].major, versions[pd_device].minor))
            print("{}Build ........... {}".format(
                " " * 8, versions[pd_device].build_number))

        lm_name = os.path.splitext(lm_xml)[0]
        lm_bin = lm_name + '.bin'
        print("Landmark model - Reading network files:\n\t{}\n\t{}".format(
            lm_xml, lm_bin))
        self.lm_net = self.ie.read_network(model=lm_xml, weights=lm_bin)
        # Input blob: input_1 - shape: [1, 3, 256, 256]
        # Output blob: ld_3d - shape: [1, 195]  for full body or [1, 155] for upper body
        # Output blob: output_poseflag - shape: [1, 1]
        # Output blob: output_segmentation - shape: [1, 1, 128, 128]
        self.lm_input_blob = next(iter(self.lm_net.input_info))
        print(
            f"Input blob: {self.lm_input_blob} - shape: {self.lm_net.input_info[self.lm_input_blob].input_data.shape}"
        )
        _, _, self.lm_h, self.lm_w = self.lm_net.input_info[
            self.lm_input_blob].input_data.shape
        for o in self.lm_net.outputs.keys():
            print(f"Output blob: {o} - shape: {self.lm_net.outputs[o].shape}")
        self.lm_score = "output_poseflag"
        self.lm_segmentation = "output_segmentation"
        self.lm_landmarks = "ld_3d"
        print("Loading landmark model to the plugin")
        self.lm_exec_net = self.ie.load_network(network=self.lm_net,
                                                num_requests=1,
                                                device_name=lm_device)
        self.lm_infer_time_cumul = 0
        self.lm_infer_nb = 0
コード例 #10
0
def main():
    log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)
    args = build_argparser().parse_args()
    model_xml = args.model
    model_bin = os.path.splitext(model_xml)[0] + ".bin"

    # Plugin initialization for specified device and load extensions library if specified
    log.info("Creating Inference Engine")
    ie = IECore()
    if args.cpu_extension and 'CPU' in args.device:
        ie.add_extension(args.cpu_extension, "CPU")
    # Read IR
    log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin))
    net = IENetwork(model=model_xml, weights=model_bin)

    if "CPU" in args.device:
        supported_layers = ie.query_network(net, "CPU")
        not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers]
        if len(not_supported_layers) != 0:
            log.error("Following layers are not supported by the plugin for specified device {}:\n {}".
                      format(args.device, ', '.join(not_supported_layers)))
            log.error("Please try to specify cpu extensions library path in sample's command line parameters using -l "
                      "or --cpu_extension command line argument")
            sys.exit(1)

    assert len(net.inputs.keys()) == 1, "Sample supports only single input topologies"
    assert len(net.outputs) == 1, "Sample supports only single output topologies"

    log.info("Preparing input blobs")
    input_blob = next(iter(net.inputs))
    out_blob = next(iter(net.outputs))
    net.batch_size = len(args.input)

    # Read and pre-process input images
    n, c, h, w = net.inputs[input_blob].shape
    images = np.ndarray(shape=(n, c, h, w))
    for i in range(n):
        image = cv2.imread(args.input[i])
        if image.shape[:-1] != (h, w):
            log.warning("Image {} is resized from {} to {}".format(args.input[i], image.shape[:-1], (h, w)))
            image = cv2.resize(image, (w, h))
        image = image.transpose((2, 0, 1))  # Change data layout from HWC to CHW
        images[i] = image
    log.info("Batch size is {}".format(n))

    # Loading model to the plugin
    log.info("Loading model to the plugin")
    exec_net = ie.load_network(network=net, device_name=args.device)

    # Start sync inference
    log.info("Starting inference in synchronous mode")
    res = exec_net.infer(inputs={input_blob: images})

    # Processing output blob
    log.info("Processing output blob")
    res = res[out_blob]
    # res = out_blob

    for batch, data in enumerate(res):
        data = np.swapaxes(data, 0, 2)
        data = np.swapaxes(data, 0, 1)
        data = cv2.cvtColor(data, cv2.COLOR_BGR2RGB)
        data[data < 0] = 0
        data[data > 255] = 255
        # data = data[::] - (args.mean_val_r, args.mean_val_g, args.mean_val_b)
        # out_img = os.path.join(os.path.dirname(__file__), "out_{}.bmp".format(batch))
        res_name = os.path.basename(args.input[i]).split(".")[0]+ "_output.jpg"
        output_dir = "/home/zhu/PycharmProjects/denoise_cnn/openvion/model_IR/20200924"
        out_img = os.path.join(output_dir, res_name)
        cv2.imwrite(out_img, data)
        log.info("Result image was saved to {}".format(out_img))
コード例 #11
0
class Network:
    '''
    Load and store information for working with the Inference Engine,
    and any loaded models.
    '''

    def __init__(self):
        self.plugin = None
        self.network = None
        self.input_blob = None
        self.output_blob = None
        self.exec_net = None
        self.infer_request = None


    def load_model(self, model, device="CPU", cpu_extension=None):
        '''
        Load the model given IR files.
        Defaults to CPU as device for use in the workspace.
        Synchronous requests made within.
        '''
        model_xml = model
        model_bin = os.path.splitext(model_xml)[0] + ".bin"

        # Initialize the plugin
        self.plugin = IECore()

        # Add a CPU extension, if applicable
        if cpu_extension and "CPU" in device:
            self.plugin.add_extension(cpu_extension, device)

        # Read the IR as a IENetwork
        self.network = IENetwork(model=model_xml, weights=model_bin)

        # Load the IENetwork into the plugin
        self.exec_net = self.plugin.load_network(self.network, device)

        # Get the input layer
        self.input_blob = next(iter(self.network.inputs))
        self.output_blob = next(iter(self.network.outputs))

        return


    def get_input_shape(self):
        '''
        Gets the input shape of the network
        '''
        return self.network.inputs[self.input_blob].shape


    def async_inference(self, image):
        '''
        Makes an asynchronous inference request, given an input image.
        '''
        self.exec_net.start_async(request_id=0, 
            inputs={self.input_blob: image})
        return


    def wait(self):
        '''
        Checks the status of the inference request.
        '''
        status = self.exec_net.requests[0].wait(-1)
        return status


    def extract_output(self):
        '''
        Returns a list of the results for the output layer of the network.
        '''
        return self.exec_net.requests[0].outputs[self.output_blob]
class AgeGenderRecognitionModel:
    def __init__(self,
                 model_name,
                 device='CPU',
                 extensions=None,
                 num_requests=1):

        self.model_name = model_name
        self.device = device
        self.extensions = extensions
        self.model_structure = self.model_name
        self.model_weights = self.model_name.split('.')[0] + '.bin'
        self.num_requests = num_requests
        self.plugin = None
        self.network = None
        self.exec_net = None
        self.input_name = None
        self.input_shape = None
        self.output_names = None
        self.is_sync = None

    def load_model(self):

        self.plugin = IECore()
        self.network = self.plugin.read_network(model=self.model_structure,
                                                weights=self.model_weights)
        supported_layers = self.plugin.query_network(network=self.network,
                                                     device_name=self.device)
        unsupported_layers = [
            l for l in self.network.layers.keys() if l not in supported_layers
        ]

        if len(unsupported_layers) != 0 and self.device == 'CPU':
            print("unsupported layers found:{}".format(unsupported_layers))
            if not self.extensions == None:
                print("Adding cpu_extension")
                self.plugin.add_extension(self.extensions, self.device)
                supported_layers = self.plugin.query_network(
                    network=self.network, device_name=self.device)
                unsupported_layers = [
                    l for l in self.network.layers.keys()
                    if l not in supported_layers
                ]
                if len(unsupported_layers) != 0:
                    print(
                        "After adding the extension still unsupported layers found"
                    )
                    exit(1)
                print("After adding the extension the issue is resolved")
            else:
                print("Give the path of cpu extension")
                exit(1)

        self.exec_net = self.plugin.load_network(
            network=self.network,
            device_name=self.device,
            num_requests=self.num_requests)
        if self.num_requests == 1:
            self.is_sync = True
        self.input_name = next(iter(self.network.inputs))
        self.input_shape = self.network.inputs[self.input_name].shape
        self.output_names = [i for i in self.network.outputs]

    def predict(self, image, cur_req_id=None, next_req_id=None):
        img_processed = self.preprocess_input(image.copy())
        if self.is_sync:
            outputs = self.exec_net.infer({self.input_name: img_processed})
            age, gender = self.preprocess_output(outputs)

            return age, gender, True
        self.exec_net.start_async(request_id=next_req_id,
                                  inputs={self.input_name: img_processed})
        if self.exec_net.requests[cur_req_id].wait() == 0:
            outputs = self.exec_net.requests[cur_req_id].outputs
            age, gender = self.preprocess_output(outputs)

            return age, gender, True
        return None, None, False

    def check_model(self):
        ''

    def preprocess_input(self, image):

        image_resized = cv2.resize(image,
                                   (self.input_shape[3], self.input_shape[2]))
        img_processed = np.transpose(np.expand_dims(image_resized, axis=0),
                                     (0, 3, 1, 2))
        return img_processed

    def preprocess_output(self, outputs):

        age = outputs[self.output_names[0]][0][0][0][0] * 100
        gender = np.argmax(outputs[self.output_names[1]])  #0:female 1:male
        return age, gender
class Network:
    """
    Load and configure inference plugins for the specified target devices 
    and performs synchronous and asynchronous modes for the specified infer requests.
    """

    def __init__(self):
        ### TODO: Initialize any class variables desired ###
        self.plugin = None
        self.network = None
        self.input_blob = None
        self.output_blob = None
        self.exec_network = None
        self.infer_request = None
    def load_model(self, model, num_req,device="CPU" ,cpu_extension=None):
                
        ### TODO: Load the model ###
        model_xml = model
        model_bin = os.path.splitext(model_xml)[0] + ".bin"
        ### TODO: Check for supported layers ###
        
        
        
        
        
        ### TODO: Add any necessary extensions ###
        ### TODO: Return the loaded inference plugin ###
        self.plugin = IECore()

        # Add a CPU extension, if applicable
        if cpu_extension and "CPU" in device:
            self.plugin.add_extension(cpu_extension, device)
         # Read the IR as a IENetwork
        self.network = IENetwork(model=model_xml, weights=model_bin)  
        
        # Load the IENetwork into the plugin
        if num_req == 0:
            # Loads network read from IR to the plugin
            self.exec_network = self.plugin.load_network(self.network, device)
        else:
            self.exec_network = self.plugin.load_network(self.network, device,num_req=num_req)

       

        
        
        
        self.input_blob = next(iter(self.network.inputs))
        self.output_blob = next(iter(self.network.outputs))
        

       
        return self.plugin
        ### Note: You may need to update the function parameters. ###
        

    def get_input_shape(self):
        ### TODO: Return the shape of the input layer ###
        return self.network.inputs[self.input_blob].shape
    
    def exec_net(self,image):
        ### TODO: Start an asynchronous request ###
        self.exec_network.start_async(request_id=0,inputs={self.input_blob: image})
        ### TODO: Return any necessary information ###
        ### Note: You may need to update the function parameters. ###
        return self.plugin

    def wait(self,request_id):
        ### TODO: Wait for the request to be complete. ###
        status = self.exec_network.requests[request_id].wait(-1)
        
        ### TODO: Return any necessary information ###
        ### Note: You may need to update the function parameters. ###
        return status

    def get_output(self,request_id,output=None):
        ### TODO: Extract and return the output results
        ### Note: You may need to update the function parameters. ###
        
        if output:
            res = self.infer_request_handle.outputs[output]
        else:
            res = self.exec_network.requests[request_id].outputs[self.output_blob]
        return res
コード例 #14
0
class GazeEstimationModel:
    '''
    Class for the Face Detection Model.
    '''
    def __init__(self, model_name, device='CPU', extensions=None):
        '''
        TODO: Use this to set your instance variables.
        '''
        self.model_name = model_name
        self.device = device
        self.extensions = extensions
        self.model_structure = self.model_name
        self.model_weights = self.model_name.split(".")[0]+'.bin'
        self.plugin = None
        self.network = None
        self.exec_net = None
        self.input_name = None
        self.input_shape = None
        self.output_names = None
        self.output_shape = None

    def load_model(self):
        '''
        TODO: You will need to complete this method.
        This method is for loading the model to the device specified by the user.
        If your model requires any Plugins, this is where you can load them.
        '''
        self.plugin = IECore()
        self.network = self.plugin.read_network(model=self.model_structure, weights=self.model_weights)
        supported_layers = self.plugin.query_network(network=self.network, device_name=self.device)
        unsupported_layers = [l for l in self.network.layers.keys() if l not in supported_layers]
        
        
        if len(unsupported_layers)!=0 and self.device=='CPU':
            print("unsupported layers found:{}".format(unsupported_layers))
            if not self.extensions==None:
                print("Adding cpu_extension")
                self.plugin.add_extension(self.extensions, self.device)
                supported_layers = self.plugin.query_network(network = self.network, device_name=self.device)
                unsupported_layers = [l for l in self.network.layers.keys() if l not in supported_layers]
                if len(unsupported_layers)!=0:
                    print("After adding the extension still unsupported layers found")
                    exit(1)
                print("After adding the extension the issue is resolved")
            else:
                print("Give the path of cpu extension")
                exit(1)
                
        self.exec_net = self.plugin.load_network(network=self.network, device_name=self.device,num_requests=1)
        
        self.input_name = [i for i in self.network.inputs.keys()]
        self.input_shape = self.network.inputs[self.input_name[1]].shape
        self.output_names = [i for i in self.network.outputs.keys()]

        
    def predict(self, left_eye_image, right_eye_image, hpa):
        '''
        TODO: You will need to complete this method.
        This method is meant for running predictions on the input image.
        '''
        le_img_processed, re_img_processed = self.preprocess_input(left_eye_image.copy(), right_eye_image.copy())
        outputs = self.exec_net.infer({'head_pose_angles':hpa, 'left_eye_image':le_img_processed, 'right_eye_image':re_img_processed})
        new_mouse_coord, gaze_vector = self.preprocess_output(outputs,hpa)

        return new_mouse_coord, gaze_vector

    def check_model(self):
        ''

    def preprocess_input(self, left_eye, right_eye):
        '''
        Before feeding the data into the model for inference,
        you might have to preprocess it. This function is where you can do that.
        '''
        le_image_resized = cv2.resize(left_eye, (self.input_shape[3], self.input_shape[2]))
        re_image_resized = cv2.resize(right_eye, (self.input_shape[3], self.input_shape[2]))
        le_img_processed = np.transpose(np.expand_dims(le_image_resized,axis=0), (0,3,1,2))
        re_img_processed = np.transpose(np.expand_dims(re_image_resized,axis=0), (0,3,1,2))
        return le_img_processed, re_img_processed
            

    def preprocess_output(self, outputs,hpa):
        '''
        Before feeding the output of this model to the next model,
        you might have to preprocess the output. This function is where you can do that.
        '''
        
        gaze_vector = outputs[self.output_names[0]].tolist()[0]
        #gaze_vector = gaze_vector / cv2.norm(gaze_vector)
        rollValue = hpa[2] #angle_r_fc output from HeadPoseEstimation model
        cosValue = math.cos(rollValue * math.pi / 180.0)
        sinValue = math.sin(rollValue * math.pi / 180.0)
        
        newx = gaze_vector[0] * cosValue + gaze_vector[1] * sinValue
        newy = -gaze_vector[0] *  sinValue+ gaze_vector[1] * cosValue
        return (newx,newy), gaze_vector
コード例 #15
0
class OpenVINO_Core:

    def __init__(self):
        self.ie = IECore()
        self.name = ""

        self.asyncInference = True
        self.plugin = None
        self.ieNet = None

        self.result_processor = None

        self.exec_net = None

        devices = []
        for device in self.ie.available_devices:
            if 'MYRIAD' in device:
                if not 'MYRIAD' in devices:
                    devices.append('MYRIAD')
            else:
                if not device in devices:
                    devices.append(device)

        self.devices = devices
        self.outputFormat = Output_Format.Unknown
        self.inputFormat = Input_Format.Unknown
        # self.outputName = None
        self._debug = True
        self.ver_major = 0
        self.ver_minor = 0
        self.ver_build = 0

        self.current_hw = None
        self.current_precision = None
        self.current_model = None
        self.request_slot_curr = 1
        self.request_slot_next = 0

    def reset_engine(self):
        self.name = ""
        self.plugin = None
        self.ieNet = None
        self.result_processor = None

        self.exec_net = None
        self.outputFormat = Output_Format.Unknown
        self.inputFormat = Input_Format.Unknown
        # self.outputName = None
        self.ver_major = 0
        self.ver_minor = 0
        self.ver_build = 0
        self.classLabels = {}

        self.current_hw = None
        self.current_precision = None
        self.current_model = None

    def dump(self, obj):
        print('=================================================')
        for attr in dir(obj):
            print('obj.%s = %r' % (attr, getattr(obj,attr)))
        print('=================================================')

    def get_signature(self):
        if len(self.ie.available_devices) > 0:
            device = self.ie.available_devices[0]
            version = self.ie.get_versions(device)

            if os.getenv('OPENVINO_OBJECT_DETECTION_PYTHON'):
                signature = 'OpenVINO {}.{}.{} in Container'.format(version[device].major,version[device].minor, version[device].build_number)
            else:
                signature = 'OpenVINO {}.{}.{}'.format(version[device].major,version[device].minor, version[device].build_number)
        else:
            signature = 'OpenVINO No Hardware Found'

        return signature


    def load_model(self, xml_file, bin_file, device = "MYRIAD", cpu_extension = None, precision = 'FP16'):
        # N : # of images in batch
        # C : Channel
        # H : Height
        # W : Width
        # Input => HWC
        if self._debug:
            logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name))

        try:
            self.reset_engine()

            p_model = Path(xml_file).resolve()
            self.name = str(Path(p_model.name).stem)

            logging.info('==================================================================')
            logging.info('Loading Model')
            logging.info('    Name      : {}'.format(self.name))
            logging.info('    Target    : {}'.format(device))
            logging.info('    Model     : {}'.format(xml_file))
            logging.info('    Precision : {}'.format(precision))

            version_data = self.ie.get_versions(device)

            self.ver_major = int(version_data[device].major)
            self.ver_minor = int(version_data[device].minor)
            self.ver_build = int(version_data[device].build_number)

            # self.plugin = IEPlugin(device=device)

            # if 'MYRIAD' in device:
            #     #https://docs.openvinotoolkit.org/latest/_docs_IE_DG_supported_plugins_MYRIAD.html
            #     self.plugin.set_config({"VPU_FORCE_RESET": "NO"})

            if self.ie:
                del self.ie
            
            self.ie = IECore()

            if self.ver_major >= 2 and self.ver_minor >= 1 and self.ver_build >= 42025:
                self.ieNet = self.ie.read_network(model = xml_file, weights = bin_file)
            else:
                self.ieNet = IENetwork(model = xml_file, weights = bin_file)

            # process input

            # image_tensor : TensorFlow
            # data         : Caffe

            if len(self.ieNet.inputs) > 2:
                logging.warning('!! Too many inputs.  Not supported')
                return  Model_Flag.LoadError

            # don't touch layers.  Somehow touching layer will cause load failure with Myriad
            # logging.info(' -Layers')
            # logging.info('       Type : {}'.format(self.ieNet.layers[key].type))
            # self.dump(self.ieNet.layers[key])

            logging.info('==================================================================')
            logging.info('Output Blobs')

            for key, blob in self.ieNet.outputs.items():

                logging.info('Output Key    : {}'.format(key))
                logging.info('     Layout   : {}'.format(blob.layout))
                logging.info('      Shape   : {}'.format(blob.shape))
                logging.info('  Precision   : {}'.format(blob.precision))
                # logging.info(' -Layers')
                # logging.info('       Type : {}'.format(self.ieNet.layers[key].type))
                # self.dump(self.ieNet.layers[key])
                # blob.precision = precision

            logging.info('==================================================================')
            logging.info('Input Blobs')

            for key, blob in self.ieNet.inputs.items():

                logging.info('Input Key     : {}'.format(key))
                logging.info('     Layout   : {}'.format(blob.layout))
                logging.info('      Shape   : {}'.format(blob.shape))
                logging.info('  Precision   : {}'.format(blob.precision))
                # blob.precision = precision
            logging.info('>> Loading model to {}'.format(device))

            # self.exec_net = self.ie.load_network(network = self.ieNet, device_name = device, num_requests = 2)
            self.exec_net = self.ie.load_network(network = self.ieNet, device_name = device, num_requests = 2)

            logging.info('<< Model loaded to  {}'.format(device))

            # # touch layers only after we load
            # self.output_blob_key = next(iter(self.ieNet.outputs))

            for key, blob in self.ieNet.outputs.items():

                layer = self.ieNet.layers[key]

                if layer.type == 'DetectionOutput':
                    outputFormat = Output_Format.DetectionOutput
                elif layer.type == 'RegionYolo':
                    outputFormat = Output_Format.RegionYolo
                elif layer.type == 'Convolution':
                    if layer.name == 'Mconv7_stage2_L1' or layer.name == 'Mconv7_stage2_L2':
                        outputFormat = Output_Format.HumanPose
                else:
                    return Model_Flag.Unsupported

            if outputFormat == Output_Format.DetectionOutput:
                if len(self.ieNet.inputs) == 1 and len(self.ieNet.outputs) == 1:
                    # 1 input, 1 output

                    input_key  = next(iter(self.ieNet.inputs))
                    output_key = next(iter(self.ieNet.outputs))

                    layer = self.ieNet.layers[output_key]

                    if layer.type == 'DetectionOutput':
                        outputFormat = Output_Format.DetectionOutput
                    else:
                        return Model_Flag.Unsupported

                    if input_key == 'image_tensor':
                        self.inputFormat = Input_Format.Tensorflow
                    elif input_key == 'image':
                        self.inputFormat = Input_Format.IntelIR
                    elif input_key == 'data':
                        self.inputFormat = Input_Format.Caffe
                    else:
                        self.inputFormat = Input_Format.Other

                    params = self.ieNet.layers[output_key].params
                    input_blob = self.ieNet.inputs[input_key]

                    self.result_processor = Object_Detection_Processor(
                                                model_name = self.name,
                                                input_format = self.inputFormat,
                                                input_key = input_key,
                                                input_shape = input_blob.shape,
                                                input_layout = input_blob.layout,
                                                output_format = outputFormat,
                                                output_key = output_key,
                                                output_params = params)

                elif len(self.ieNet.inputs) == 2 and len(self.ieNet.outputs) == 1:
                    # 2 inputs and 1 output.  Faster RCNN

                    output_key = next(iter(self.ieNet.outputs))

                    layer = self.ieNet.layers[output_key]

                    if layer.type != 'DetectionOutput':
                        return Model_Flag.Unsupported

                    info_key = ""
                    data_key = ""

                    for key, blob in self.ieNet.inputs.items():

                        if key == 'image_info':
                            info_key = key
                        elif key == 'image_tensor':
                            data_key = key

                    if len(info_key) > 0 and len(data_key) > 0:

                        self.inputFormat = Input_Format.Faster_RCNN
                        input_blob = self.ieNet.inputs[data_key]
                        params = self.ieNet.layers[output_key].params

                        self.result_processor = Object_Detection_RCNN_Processor(
                            model_name = self.name,
                            input_format = self.inputFormat,
                            info_key = info_key,
                            data_key = data_key,
                            data_shape = input_blob.shape,
                            data_layout = input_blob.layout,
                            output_format = Output_Format.DetectionOutput,
                            output_key = output_key,
                            output_params = params)
                    else:
                        return Model_Flag.Unsupported

            elif outputFormat == Output_Format.RegionYolo:
                input_key  = next(iter(self.ieNet.inputs))
                input_blob = self.ieNet.inputs[input_key]

                self.inputFormat = Input_Format.Yolo
                self.result_processor = Object_Detection_Yolo_Processor(
                                            model_name = self.name,
                                            input_format = self.inputFormat,
                                            input_key = input_key,
                                            input_shape = input_blob.shape,
                                            input_layout = input_blob.layout,
                                            output_format = Output_Format.RegionYolo)

                for key, blob in self.ieNet.outputs.items():
                    self.result_processor.reshape_data[key] = self.ieNet.layers[self.ieNet.layers[key].parents[0]].shape
                    self.result_processor.set_class_label(self.ieNet.layers[key].params)

                # for key, blob in self.result_processor.reshape_data.items():
                #     print('{} {}'.format(key, blob))


            elif outputFormat == Output_Format.HumanPose:
                input_key  = next(iter(self.ieNet.inputs))
                input_blob = self.ieNet.inputs[input_key]
                self.inputFormat = Input_Format.HumanPose

                self.result_processor = Human_Pose_Processor(
                                            model_name = self.name,
                                            input_format = Input_Format.HumanPose,
                                            input_shape = input_blob.shape,
                                            input_layout = input_blob.layout
                                            )
            return Model_Flag.Loaded

        except Exception as ex:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            traceback.print_exception(exc_type, exc_obj, exc_tb)
            logging.error('!! {0}:{1}() : Exception {2}'.format(self.__class__.__name__, sys._getframe().f_code.co_name, ex))
            return Model_Flag.LoadError

    def run_inference(self, frame, confidence):
        # if self._debug:
        #     logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name))

        return_frame = frame

        if self.result_processor == None:
            return return_frame

        if self.inputFormat == Input_Format.Faster_RCNN:

            inference_data = self.result_processor.process_for_inference(frame = frame)

            if inference_data:
                if self.asyncInference:
                    self.result_processor.prev_frame = frame
                    self.exec_net.start_async(request_id=self.request_slot_next, inputs={inference_data.data_key : inference_data.image_data, inference_data.info_key : inference_data.image_info})
                    if self.exec_net.requests[self.request_slot_curr].wait(-1) == 0:
                        return_frame = self.result_processor.process_result(self.exec_net.requests[self.request_slot_curr].outputs, self.result_processor.prev_frame, confidence)
                else:
                    self.request_slot_curr = 0
                    self.exec_net.infer(inputs={inference_data.data_key : inference_data.image_data, inference_data.info_key : inference_data.image_info})
                    return_frame = self.result_processor.process_result(self.exec_net.requests[self.request_slot_curr].outputs, frame, confidence)

        elif self.inputFormat == Input_Format.Yolo:

            frame_data, input_key = self.result_processor.process_for_inference(frame = frame)

            if frame_data.size > 0:

                if self.asyncInference:
                    self.result_processor.prev_frame = frame
                    self.result_processor.prev_frame_data = frame_data
                    self.exec_net.start_async(request_id=self.request_slot_next, inputs={input_key : frame_data})
                    if self.exec_net.requests[self.request_slot_curr].wait(-1) == 0:
                        return_frame = self.result_processor.process_result(layers = self.ieNet.layers,
                                                                            results = self.exec_net.requests[self.request_slot_curr].outputs, 
                                                                            frame_data = self.result_processor.prev_frame_data, 
                                                                            frame = self.result_processor.prev_frame, 
                                                                            confidence = confidence)
                else:
                    self.request_slot_curr = 0
                    self.exec_net.infer(inputs={input_key : frame_data})
                    return_frame = self.result_processor.process_result(layers = self.ieNet.layers,
                                                                        results = self.exec_net.requests[self.request_slot_curr].outputs,
                                                                        frame_data = frame_data,
                                                                        frame = frame,
                                                                        confidence = confidence)

        elif self.inputFormat == Input_Format.HumanPose:

            frame_data, input_key = self.result_processor.process_for_inference(frame = frame)

            if frame_data.size > 0:

                if self.asyncInference:
                    self.result_processor.prev_frame = frame
                    self.exec_net.start_async(request_id=self.request_slot_next, inputs={input_key : frame_data})
                    if self.exec_net.requests[self.request_slot_curr].wait(-1) == 0:
                        return_frame = self.result_processor.process_result(self.exec_net.requests[self.request_slot_curr].outputs, self.result_processor.prev_frame, confidence)
                        assert return_frame.size > 0, "Frame Empty"
                else:
                    self.request_slot_curr = 0
                    self.exec_net.infer(inputs={input_key : frame_data})
                    return_frame = self.result_processor.process_result(self.exec_net.requests[self.request_slot_curr].outputs, frame, confidence)

        elif self.inputFormat == Input_Format.Unknown:
            pass

        else:
        # elif self.inputFormat == Input_Format.Tensorflow or self.inputFormat == Input_Format.Caffe or self.inputFormat == Input_Format.IntelIR:
            # SSD/MobileNet Tensorflow models

            frame_data, input_key = self.result_processor.process_for_inference(frame = frame)

            if frame_data.size > 0:

                if self.asyncInference:
                    self.result_processor.prev_frame = frame
                    self.exec_net.start_async(request_id=self.request_slot_next, inputs={input_key : frame_data})
                    if self.exec_net.requests[self.request_slot_curr].wait(-1) == 0:
                        return_frame = self.result_processor.process_result(self.exec_net.requests[self.request_slot_curr].outputs, self.result_processor.prev_frame, confidence)
                        assert return_frame.size > 0, "Frame Empty"
                else:
                    self.request_slot_curr = 0
                    self.exec_net.infer(inputs={input_key : frame_data})
                    return_frame = self.result_processor.process_result(self.exec_net.requests[self.request_slot_curr].outputs, frame, confidence)

        if self.asyncInference:
            self.request_slot_next, self.request_slot_curr = self.request_slot_curr, self.request_slot_next

        return return_frame
コード例 #16
0
class BlazeposeOpenvino:
    def __init__(self,
                 input_src=None,
                 pd_xml=POSE_DETECTION_MODEL,
                 pd_device="CPU",
                 pd_score_thresh=0.5,
                 pd_nms_thresh=0.3,
                 lm_xml=FULL_BODY_LANDMARK_MODEL,
                 lm_device="CPU",
                 lm_score_threshold=0.7,
                 full_body=True,
                 use_gesture=False,
                 smoothing=True,
                 filter_window_size=5,
                 filter_velocity_scale=10,
                 show_3d=False,
                 crop=False,
                 multi_detection=False,
                 force_detection=False,
                 output=None):

        self.pd_score_thresh = pd_score_thresh
        self.pd_nms_thresh = pd_nms_thresh
        self.lm_score_threshold = lm_score_threshold
        self.full_body = full_body
        self.use_gesture = use_gesture
        self.smoothing = smoothing
        self.show_3d = show_3d
        self.crop = crop
        self.multi_detection = multi_detection
        self.force_detection = force_detection
        if self.multi_detection:
            print(
                "Warning: with multi-detection, smoothing filter is disabled and pose detection is forced on every frame."
            )
            self.smoothing = False
            self.force_detection = True

        if input_src.endswith('.jpg') or input_src.endswith('.png'):
            self.input_type = "image"
            self.img = cv2.imread(input_src)
            self.video_fps = 25
            video_height, video_width = self.img.shape[:2]
        else:
            self.input_type = "video"
            if input_src.isdigit():
                input_type = "webcam"
                input_src = int(input_src)
            self.cap = cv2.VideoCapture(input_src)
            self.video_fps = int(self.cap.get(cv2.CAP_PROP_FPS))
            video_width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            video_height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        print("Video FPS:", self.video_fps)

        # The full body landmark model predict 39 landmarks.
        # We are interested in the first 35 landmarks
        # from 1 to 33 correspond to the well documented body parts,
        # 34th (mid hips) and 35th (a point above the head) are used to predict ROI of next frame
        # Same for upper body model but with 8 less landmarks
        self.nb_lms = 35 if self.full_body else 27

        if self.smoothing:
            self.filter = mpu.LandmarksSmoothingFilter(filter_window_size,
                                                       filter_velocity_scale,
                                                       (self.nb_lms - 2, 3))

        # Create SSD anchors
        # https://github.com/google/mediapipe/blob/master/mediapipe/modules/pose_detection/pose_detection_cpu.pbtxt
        anchor_options = mpu.SSDAnchorOptions(
            num_layers=4,
            min_scale=0.1484375,
            max_scale=0.75,
            input_size_height=128,
            input_size_width=128,
            anchor_offset_x=0.5,
            anchor_offset_y=0.5,
            strides=[8, 16, 16, 16],
            aspect_ratios=[1.0],
            reduce_boxes_in_lowest_layer=False,
            interpolated_scale_aspect_ratio=1.0,
            fixed_anchor_size=True)
        self.anchors = mpu.generate_anchors(anchor_options)
        self.nb_anchors = self.anchors.shape[0]
        print(f"{self.nb_anchors} anchors have been created")

        # Load Openvino models
        self.load_models(pd_xml, pd_device, lm_xml, lm_device)

        # Rendering flags
        self.show_pd_box = False
        self.show_pd_kps = False
        self.show_rot_rect = False
        self.show_landmarks = True
        self.show_scores = False
        self.show_gesture = self.use_gesture
        self.show_fps = True
        self.show_segmentation = False

        if self.show_3d:
            self.vis3d = o3d.visualization.Visualizer()
            self.vis3d.create_window()
            opt = self.vis3d.get_render_option()
            opt.background_color = np.asarray([0, 0, 0])
            z = min(video_height, video_width) / 3
            self.grid_floor = create_grid([0, video_height, -z],
                                          [video_width, video_height, -z],
                                          [video_width, video_height, z],
                                          [0, video_height, z],
                                          5,
                                          2,
                                          color=(1, 1, 1))
            self.grid_wall = create_grid([0, 0, z], [video_width, 0, z],
                                         [video_width, video_height, z],
                                         [0, video_height, z],
                                         5,
                                         2,
                                         color=(1, 1, 1))
            self.vis3d.add_geometry(self.grid_floor)
            self.vis3d.add_geometry(self.grid_wall)
            view_control = self.vis3d.get_view_control()
            view_control.set_up(np.array([0, -1, 0]))
            view_control.set_front(np.array([0, 0, -1]))

        if output is None:
            self.output = None
        else:
            fourcc = cv2.VideoWriter_fourcc(*"MJPG")
            self.output = cv2.VideoWriter(output, fourcc, self.video_fps,
                                          (video_width, video_height))

    def load_models(self, pd_xml, pd_device, lm_xml, lm_device):

        print("Loading Inference Engine")
        self.ie = IECore()
        print("Device info:")
        versions = self.ie.get_versions(pd_device)
        print("{}{}".format(" " * 8, pd_device))
        print("{}MKLDNNPlugin version ......... {}.{}".format(
            " " * 8, versions[pd_device].major, versions[pd_device].minor))
        print("{}Build ........... {}".format(
            " " * 8, versions[pd_device].build_number))

        # Pose detection model
        pd_name = os.path.splitext(pd_xml)[0]
        pd_bin = pd_name + '.bin'
        print(
            "Pose Detection model - Reading network files:\n\t{}\n\t{}".format(
                pd_xml, pd_bin))
        self.pd_net = self.ie.read_network(model=pd_xml, weights=pd_bin)
        # Input blob: input - shape: [1, 3, 128, 128]
        # Output blob: classificators - shape: [1, 896, 1] : scores
        # Output blob: regressors - shape: [1, 896, 12] : bboxes
        self.pd_input_blob = next(iter(self.pd_net.input_info))
        print(
            f"Input blob: {self.pd_input_blob} - shape: {self.pd_net.input_info[self.pd_input_blob].input_data.shape}"
        )
        _, _, self.pd_h, self.pd_w = self.pd_net.input_info[
            self.pd_input_blob].input_data.shape
        for o in self.pd_net.outputs.keys():
            print(f"Output blob: {o} - shape: {self.pd_net.outputs[o].shape}")
            self.pd_scores = "classificators"
            self.pd_bboxes = "regressors"
        print("Loading pose detection model into the plugin")
        self.pd_exec_net = self.ie.load_network(network=self.pd_net,
                                                num_requests=1,
                                                device_name=pd_device)
        self.pd_infer_time_cumul = 0
        self.pd_infer_nb = 0

        self.infer_nb = 0
        self.infer_time_cumul = 0

        # Landmarks model
        if lm_device != pd_device:
            print("Device info:")
            versions = self.ie.get_versions(pd_device)
            print("{}{}".format(" " * 8, pd_device))
            print("{}MKLDNNPlugin version ......... {}.{}".format(
                " " * 8, versions[pd_device].major, versions[pd_device].minor))
            print("{}Build ........... {}".format(
                " " * 8, versions[pd_device].build_number))

        lm_name = os.path.splitext(lm_xml)[0]
        lm_bin = lm_name + '.bin'
        print("Landmark model - Reading network files:\n\t{}\n\t{}".format(
            lm_xml, lm_bin))
        self.lm_net = self.ie.read_network(model=lm_xml, weights=lm_bin)
        # Input blob: input_1 - shape: [1, 3, 256, 256]
        # Output blob: ld_3d - shape: [1, 195]  for full body or [1, 155] for upper body
        # Output blob: output_poseflag - shape: [1, 1]
        # Output blob: output_segmentation - shape: [1, 1, 128, 128]
        self.lm_input_blob = next(iter(self.lm_net.input_info))
        print(
            f"Input blob: {self.lm_input_blob} - shape: {self.lm_net.input_info[self.lm_input_blob].input_data.shape}"
        )
        _, _, self.lm_h, self.lm_w = self.lm_net.input_info[
            self.lm_input_blob].input_data.shape
        for o in self.lm_net.outputs.keys():
            print(f"Output blob: {o} - shape: {self.lm_net.outputs[o].shape}")
        self.lm_score = "output_poseflag"
        self.lm_segmentation = "output_segmentation"
        self.lm_landmarks = "ld_3d"
        print("Loading landmark model to the plugin")
        self.lm_exec_net = self.ie.load_network(network=self.lm_net,
                                                num_requests=1,
                                                device_name=lm_device)
        self.lm_infer_time_cumul = 0
        self.lm_infer_nb = 0

    def pd_postprocess(self, inference):
        scores = np.squeeze(inference[self.pd_scores])  # 896
        bboxes = inference[self.pd_bboxes][0]  # 896x12
        # Decode bboxes
        self.regions = mpu.decode_bboxes(self.pd_score_thresh,
                                         scores,
                                         bboxes,
                                         self.anchors,
                                         best_only=not self.multi_detection)
        # Non maximum suppression (not needed if best_only is True)
        if self.multi_detection:
            self.regions = mpu.non_max_suppression(self.regions,
                                                   self.pd_nms_thresh)

        mpu.detections_to_rect(self.regions,
                               kp_pair=[0, 1] if self.full_body else [2, 3])
        mpu.rect_transformation(self.regions, self.frame_size, self.frame_size)

    def pd_render(self, frame):
        for r in self.regions:
            if self.show_pd_box:
                box = (np.array(r.pd_box) * self.frame_size).astype(int)
                cv2.rectangle(frame, (box[0], box[1]),
                              (box[0] + box[2], box[1] + box[3]), (0, 255, 0),
                              2)
            if self.show_pd_kps:
                # Key point 0 - mid hip center
                # Key point 1 - point that encodes size & rotation (for full body)
                # Key point 2 - mid shoulder center
                # Key point 3 - point that encodes size & rotation (for upper body)
                if self.full_body:
                    # Only kp 0 and 1 used
                    list_kps = [0, 1]
                else:
                    # Only kp 2 and 3 used for upper body
                    list_kps = [2, 3]
                for kp in list_kps:
                    x = int(r.pd_kps[kp][0] * self.frame_size)
                    y = int(r.pd_kps[kp][1] * self.frame_size)
                    cv2.circle(frame, (x, y), 3, (0, 0, 255), -1)
                    cv2.putText(frame, str(kp), (x, y + 12),
                                cv2.FONT_HERSHEY_PLAIN, 1.5, (0, 255, 0), 2)
            if self.show_scores and r.pd_score is not None:
                cv2.putText(frame, f"Pose score: {r.pd_score:.2f}",
                            (50, self.frame_size // 2), cv2.FONT_HERSHEY_PLAIN,
                            2, (255, 255, 0), 2)

    def lm_postprocess(self, region, inference):
        region.lm_score = np.squeeze(inference[self.lm_score])
        if region.lm_score > self.lm_score_threshold:
            self.nb_active_regions += 1

            lm_raw = inference[self.lm_landmarks].reshape(-1, 5)
            # Each keypoint have 5 information:
            # - X,Y coordinates are local to the region of
            # interest and range from [0.0, 255.0].
            # - Z coordinate is measured in "image pixels" like
            # the X and Y coordinates and represents the
            # distance relative to the plane of the subject's
            # hips, which is the origin of the Z axis. Negative
            # values are between the hips and the camera;
            # positive values are behind the hips. Z coordinate
            # scale is similar with X, Y scales but has different
            # nature as obtained not via human annotation, by
            # fitting synthetic data (GHUM model) to the 2D
            # annotation.
            # - Visibility, after user-applied sigmoid denotes the
            # probability that a keypoint is located within the
            # frame and not occluded by another bigger body
            # part or another object.
            # - Presence, after user-applied sigmoid denotes the
            # probability that a keypoint is located within the
            # frame.

            # Normalize x,y,z. Here self.lm_w = self.lm_h and scaling in z = scaling in x = 1/self.lm_w
            lm_raw[:, :3] /= self.lm_w
            # Apply sigmoid on visibility and presence (if used later)
            # lm_raw[:,3:5] = 1 / (1 + np.exp(-lm_raw[:,3:5]))

            # region.landmarks contains the landmarks normalized 3D coordinates in the relative oriented body bounding box
            region.landmarks = lm_raw[:, :3]
            # Calculate the landmark coordinate in square padded image (region.landmarks_padded)
            src = np.array([(0, 0), (1, 0), (1, 1)], dtype=np.float32)
            dst = np.array(
                [(x, y) for x, y in region.rect_points[1:]], dtype=np.float32
            )  # region.rect_points[0] is left bottom point and points going clockwise!
            mat = cv2.getAffineTransform(src, dst)
            lm_xy = np.expand_dims(region.landmarks[:self.nb_lms, :2], axis=0)
            lm_xy = np.squeeze(cv2.transform(lm_xy, mat))
            # A segment of length 1 in the coordinates system of body bounding box takes region.rect_w_a pixels in the
            # original image. Then I arbitrarily divide by 4 for a more realistic appearance.
            lm_z = region.landmarks[:self.nb_lms, 2:3] * region.rect_w_a / 4
            lm_xyz = np.hstack((lm_xy, lm_z))
            if self.smoothing:
                lm_xyz = self.filter.apply(lm_xyz)
            region.landmarks_padded = lm_xyz.astype(np.int)
            # If we added padding to make the image square, we need to remove this padding from landmark coordinates
            # region.landmarks_abs contains absolute landmark coordinates in the original image (padding removed))
            region.landmarks_abs = region.landmarks_padded.copy()
            if self.pad_h > 0:
                region.landmarks_abs[:, 1] -= self.pad_h
            if self.pad_w > 0:
                region.landmarks_abs[:, 0] -= self.pad_w

            if self.use_gesture: self.recognize_gesture(region)

            if self.show_segmentation:
                self.seg = np.squeeze(inference[self.lm_segmentation])
                self.seg = 1 / (1 + np.exp(-self.seg))

    def lm_render(self, frame, region):
        if region.lm_score > self.lm_score_threshold:
            if self.show_segmentation:
                ret, mask = cv2.threshold(self.seg, 0.5, 1, cv2.THRESH_BINARY)
                mask = (mask * 255).astype(np.uint8)
                cv2.imshow("seg", self.seg)
                # cv2.imshow("mask", mask)
                src = np.array(
                    [[0, 0], [128, 0], [128, 128]],
                    dtype=np.float32)  # rect_points[0] is left bottom point !
                dst = np.array(region.rect_points[1:], dtype=np.float32)
                mat = cv2.getAffineTransform(src, dst)
                mask = cv2.warpAffine(mask, mat,
                                      (self.frame_size, self.frame_size))
                # cv2.imshow("mask2", mask)
                # mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)
                l = frame.shape[0]
                frame2 = cv2.bitwise_and(frame, frame, mask=mask)
                if not self.crop:
                    frame2 = frame2[self.pad_h:l - self.pad_h,
                                    self.pad_w:l - self.pad_w]
                cv2.imshow("Segmentation", frame2)
            if self.show_rot_rect:
                cv2.polylines(frame, [np.array(region.rect_points)], True,
                              (0, 255, 255), 2, cv2.LINE_AA)
            if self.show_landmarks:

                list_connections = LINES_FULL_BODY if self.full_body else LINES_UPPER_BODY
                lines = [
                    np.array(
                        [region.landmarks_padded[point, :2] for point in line])
                    for line in list_connections
                ]
                cv2.polylines(frame, lines, False, (255, 180, 90), 2,
                              cv2.LINE_AA)

                for i, x_y in enumerate(region.landmarks_padded[:self.nb_lms -
                                                                2, :2]):
                    if i > 10:
                        color = (0, 255, 0) if i % 2 == 0 else (0, 0, 255)
                    elif i == 0:
                        color = (0, 255, 255)
                    elif i in [4, 5, 6, 8, 10]:
                        color = (0, 255, 0)
                    else:
                        color = (0, 0, 255)
                    cv2.circle(frame, (x_y[0], x_y[1]), 4, color, -11)

                if self.show_3d:
                    points = region.landmarks_abs
                    lines = LINE_MESH_FULL_BODY if self.full_body else LINE_MESH_UPPER_BODY
                    colors = COLORS_FULL_BODY
                    for i, a_b in enumerate(lines):
                        a, b = a_b
                        line = create_segment(points[a],
                                              points[b],
                                              radius=5,
                                              color=colors[i])
                        if line:
                            self.vis3d.add_geometry(line,
                                                    reset_bounding_box=False)

            if self.show_scores:
                cv2.putText(frame, f"Landmark score: {region.lm_score:.2f}",
                            (region.landmarks_padded[24, 0] - 10,
                             region.landmarks_padded[24, 1] + 90),
                            cv2.FONT_HERSHEY_PLAIN, 2, (255, 255, 0), 2)
            if self.use_gesture and self.show_gesture:
                cv2.putText(frame, region.gesture,
                            (region.landmarks_padded[6, 0] - 10,
                             region.landmarks_padded[6, 1] - 50),
                            cv2.FONT_HERSHEY_PLAIN, 5, (0, 1190, 255), 3)

    def recognize_gesture(self, r):
        def angle_with_y(v):
            # v: 2d vector (x,y)
            # Returns angle in degree ofv with y-axis of image plane
            if v[1] == 0:
                return 90
            angle = atan2(v[0], v[1])
            return np.degrees(angle)

        # For the demo, we want to recognize the flag semaphore alphabet
        # For this task, we just need to measure the angles of both arms with vertical
        right_arm_angle = angle_with_y(r.landmarks_abs[14, :2] -
                                       r.landmarks_abs[12, :2])
        left_arm_angle = angle_with_y(r.landmarks_abs[13, :2] -
                                      r.landmarks_abs[11, :2])
        right_pose = int((right_arm_angle + 202.5) / 45)
        left_pose = int((left_arm_angle + 202.5) / 45)
        r.gesture = semaphore_flag.get((right_pose, left_pose), None)

    def run(self):

        self.fps = FPS(mean_nb_frames=20)

        nb_frames = 0
        nb_pd_inferences = 0
        nb_pd_inferences_direct = 0
        nb_lm_inferences = 0
        nb_lm_inferences_after_landmarks_ROI = 0
        glob_pd_rtrip_time = 0
        glob_lm_rtrip_time = 0

        get_new_frame = True
        use_previous_landmarks = False

        global_time = time.perf_counter()
        while True:
            if get_new_frame:
                nb_frames += 1
                if self.input_type == "image":
                    vid_frame = self.img
                else:
                    ok, vid_frame = self.cap.read()
                    if not ok:
                        break
                h, w = vid_frame.shape[:2]
                if self.crop:
                    # Cropping the long side to get a square shape
                    self.frame_size = min(h, w)
                    dx = (w - self.frame_size) // 2
                    dy = (h - self.frame_size) // 2
                    video_frame = vid_frame[dy:dy + self.frame_size,
                                            dx:dx + self.frame_size]
                else:
                    # Padding on the small side to get a square shape
                    self.frame_size = max(h, w)
                    self.pad_h = int((self.frame_size - h) / 2)
                    self.pad_w = int((self.frame_size - w) / 2)
                    video_frame = cv2.copyMakeBorder(vid_frame, self.pad_h,
                                                     self.pad_h, self.pad_w,
                                                     self.pad_w,
                                                     cv2.BORDER_CONSTANT)

                annotated_frame = video_frame.copy()

            if not self.force_detection and use_previous_landmarks:
                self.regions = regions_from_landmarks
                mpu.detections_to_rect(
                    self.regions, kp_pair=[0, 1]
                )  # self.regions.pd_kps are initialized from landmarks on previous frame
                mpu.rect_transformation(self.regions, self.frame_size,
                                        self.frame_size)
            else:
                # Infer pose detection
                # Resize image to NN square input shape
                frame_nn = cv2.resize(video_frame, (self.pd_w, self.pd_h),
                                      interpolation=cv2.INTER_AREA)
                # Transpose hxwx3 -> 1x3xhxw
                frame_nn = np.transpose(frame_nn, (2, 0, 1))[None, ]

                pd_rtrip_time = now()
                inference = self.pd_exec_net.infer(
                    inputs={self.pd_input_blob: frame_nn})
                glob_pd_rtrip_time += now() - pd_rtrip_time
                self.pd_postprocess(inference)
                self.pd_render(annotated_frame)
                nb_pd_inferences += 1
                if get_new_frame: nb_pd_inferences_direct += 1

            # Landmarks
            self.nb_active_regions = 0
            if self.show_3d:
                self.vis3d.clear_geometries()
                self.vis3d.add_geometry(self.grid_floor,
                                        reset_bounding_box=False)
                self.vis3d.add_geometry(self.grid_wall,
                                        reset_bounding_box=False)
            if self.force_detection:
                for r in self.regions:
                    frame_nn = mpu.warp_rect_img(r.rect_points, video_frame,
                                                 self.lm_w, self.lm_h)
                    # Transpose hxwx3 -> 1x3xhxw
                    frame_nn = np.transpose(frame_nn, (2, 0, 1))[None, ]
                    # Get landmarks
                    lm_rtrip_time = now()
                    inference = self.lm_exec_net.infer(
                        inputs={self.lm_input_blob: frame_nn})
                    glob_lm_rtrip_time += now() - lm_rtrip_time
                    nb_lm_inferences += 1
                    self.lm_postprocess(r, inference)
                    self.lm_render(annotated_frame, r)
            elif len(self.regions) == 1:
                r = self.regions[0]
                frame_nn = mpu.warp_rect_img(r.rect_points, video_frame,
                                             self.lm_w, self.lm_h)
                # Transpose hxwx3 -> 1x3xhxw
                frame_nn = np.transpose(frame_nn, (2, 0, 1))[None, ]
                # Get landmarks
                lm_rtrip_time = now()
                inference = self.lm_exec_net.infer(
                    inputs={self.lm_input_blob: frame_nn})
                glob_lm_rtrip_time += now() - lm_rtrip_time
                nb_lm_inferences += 1
                if use_previous_landmarks:
                    nb_lm_inferences_after_landmarks_ROI += 1

                self.lm_postprocess(r, inference)
                if not self.force_detection:
                    if get_new_frame:
                        if not use_previous_landmarks:
                            # With a new frame, we have run the landmark NN on a ROI found by the detection NN...
                            if r.lm_score > self.lm_score_threshold:
                                # ...and succesfully found a body and its landmarks
                                # Predict the ROI for the next frame from the last 2 landmarks normalized coordinates (x,y)
                                regions_from_landmarks = [
                                    mpu.Region(pd_kps=r.landmarks_padded[
                                        self.nb_lms - 2:self.nb_lms, :2] /
                                               self.frame_size)
                                ]
                                use_previous_landmarks = True
                        else:
                            # With a new frame, we have run the landmark NN on a ROI calculated from the landmarks of the previous frame...
                            if r.lm_score > self.lm_score_threshold:
                                # ...and succesfully found a body and its landmarks
                                # Predict the ROI for the next frame from the last 2 landmarks normalized coordinates (x,y)
                                regions_from_landmarks = [
                                    mpu.Region(pd_kps=r.landmarks_padded[
                                        self.nb_lms - 2:self.nb_lms, :2] /
                                               self.frame_size)
                                ]
                                use_previous_landmarks = True
                            else:
                                # ...and could not find a body
                                # We don't know if it is because the ROI calculated from the previous frame is not reliable (the body moved)
                                # or because there is really no body in the frame. To decide, we have to run the detection NN on this frame
                                get_new_frame = False
                                use_previous_landmarks = False
                                continue
                    else:
                        # On a frame on which we already ran the landmark NN without founding a body,
                        # we have run the detection NN...
                        if r.lm_score > self.lm_score_threshold:
                            # ...and succesfully found a body and its landmarks
                            use_previous_landmarks = True
                            # Predict the ROI for the next frame from the last 2 landmarks normalized coordinates (x,y)
                            regions_from_landmarks = [
                                mpu.Region(pd_kps=r.landmarks_padded[
                                    self.nb_lms - 2:self.nb_lms, :2] /
                                           self.frame_size)
                            ]
                            use_previous_landmarks = True
                        # else:
                        # ...and could not find a body
                        # We are sure there is no body in that frame

                        get_new_frame = True
                self.lm_render(annotated_frame, r)
            else:
                # Detection NN hasn't found any body
                get_new_frame = True

            self.fps.update()

            if self.show_3d:
                self.vis3d.poll_events()
                self.vis3d.update_renderer()
            if self.smoothing and self.nb_active_regions == 0:
                self.filter.reset()

            if not self.crop:
                annotated_frame = annotated_frame[self.pad_h:self.pad_h + h,
                                                  self.pad_w:self.pad_w + w]

            if self.show_fps:
                self.fps.display(annotated_frame,
                                 orig=(50, 50),
                                 size=1,
                                 color=(240, 180, 100))
            cv2.imshow("Blazepose", annotated_frame)

            if self.output:
                self.output.write(annotated_frame)

            key = cv2.waitKey(1)
            if key == ord('q') or key == 27:
                break
            elif key == 32:
                # Pause on space bar
                cv2.waitKey(0)
            elif key == ord('1'):
                self.show_pd_box = not self.show_pd_box
            elif key == ord('2'):
                self.show_pd_kps = not self.show_pd_kps
            elif key == ord('3'):
                self.show_rot_rect = not self.show_rot_rect
            elif key == ord('4'):
                self.show_landmarks = not self.show_landmarks
            elif key == ord('5'):
                self.show_scores = not self.show_scores
            elif key == ord('6'):
                self.show_gesture = not self.show_gesture
            elif key == ord('f'):
                self.show_fps = not self.show_fps
            elif key == ord('s'):
                self.show_segmentation = not self.show_segmentation

        # Print some stats
        print(
            f"FPS : {nb_frames/(time.perf_counter() - global_time):.1f} f/s (# frames = {nb_frames})"
        )
        print(
            f"# pose detection inferences : {nb_pd_inferences} - # direct: {nb_pd_inferences_direct} - # after landmarks ROI failures: {nb_pd_inferences-nb_pd_inferences_direct}"
        )
        print(
            f"# landmark inferences       : {nb_lm_inferences} - # after pose detection: {nb_lm_inferences - nb_lm_inferences_after_landmarks_ROI} - # after landmarks ROI prediction: {nb_lm_inferences_after_landmarks_ROI}"
        )
        print(
            f"Pose detection round trip   : {glob_pd_rtrip_time/nb_pd_inferences*1000:.1f} ms"
        )
        if nb_lm_inferences:
            print(
                f"Landmark round trip         : {glob_lm_rtrip_time/nb_lm_inferences*1000:.1f} ms"
            )

        if self.output:
            self.output.release()
コード例 #17
0
    def load_model(self, xml_file, bin_file, device = "MYRIAD", cpu_extension = None, precision = 'FP16'):
        # N : # of images in batch
        # C : Channel
        # H : Height
        # W : Width
        # Input => HWC
        if self._debug:
            logging.info('>> {0}:{1}()'.format(self.__class__.__name__, sys._getframe().f_code.co_name))

        try:
            self.reset_engine()

            p_model = Path(xml_file).resolve()
            self.name = str(Path(p_model.name).stem)

            logging.info('==================================================================')
            logging.info('Loading Model')
            logging.info('    Name      : {}'.format(self.name))
            logging.info('    Target    : {}'.format(device))
            logging.info('    Model     : {}'.format(xml_file))
            logging.info('    Precision : {}'.format(precision))

            version_data = self.ie.get_versions(device)

            self.ver_major = int(version_data[device].major)
            self.ver_minor = int(version_data[device].minor)
            self.ver_build = int(version_data[device].build_number)

            # self.plugin = IEPlugin(device=device)

            # if 'MYRIAD' in device:
            #     #https://docs.openvinotoolkit.org/latest/_docs_IE_DG_supported_plugins_MYRIAD.html
            #     self.plugin.set_config({"VPU_FORCE_RESET": "NO"})

            if self.ie:
                del self.ie
            
            self.ie = IECore()

            if self.ver_major >= 2 and self.ver_minor >= 1 and self.ver_build >= 42025:
                self.ieNet = self.ie.read_network(model = xml_file, weights = bin_file)
            else:
                self.ieNet = IENetwork(model = xml_file, weights = bin_file)

            # process input

            # image_tensor : TensorFlow
            # data         : Caffe

            if len(self.ieNet.inputs) > 2:
                logging.warning('!! Too many inputs.  Not supported')
                return  Model_Flag.LoadError

            # don't touch layers.  Somehow touching layer will cause load failure with Myriad
            # logging.info(' -Layers')
            # logging.info('       Type : {}'.format(self.ieNet.layers[key].type))
            # self.dump(self.ieNet.layers[key])

            logging.info('==================================================================')
            logging.info('Output Blobs')

            for key, blob in self.ieNet.outputs.items():

                logging.info('Output Key    : {}'.format(key))
                logging.info('     Layout   : {}'.format(blob.layout))
                logging.info('      Shape   : {}'.format(blob.shape))
                logging.info('  Precision   : {}'.format(blob.precision))
                # logging.info(' -Layers')
                # logging.info('       Type : {}'.format(self.ieNet.layers[key].type))
                # self.dump(self.ieNet.layers[key])
                # blob.precision = precision

            logging.info('==================================================================')
            logging.info('Input Blobs')

            for key, blob in self.ieNet.inputs.items():

                logging.info('Input Key     : {}'.format(key))
                logging.info('     Layout   : {}'.format(blob.layout))
                logging.info('      Shape   : {}'.format(blob.shape))
                logging.info('  Precision   : {}'.format(blob.precision))
                # blob.precision = precision
            logging.info('>> Loading model to {}'.format(device))

            # self.exec_net = self.ie.load_network(network = self.ieNet, device_name = device, num_requests = 2)
            self.exec_net = self.ie.load_network(network = self.ieNet, device_name = device, num_requests = 2)

            logging.info('<< Model loaded to  {}'.format(device))

            # # touch layers only after we load
            # self.output_blob_key = next(iter(self.ieNet.outputs))

            for key, blob in self.ieNet.outputs.items():

                layer = self.ieNet.layers[key]

                if layer.type == 'DetectionOutput':
                    outputFormat = Output_Format.DetectionOutput
                elif layer.type == 'RegionYolo':
                    outputFormat = Output_Format.RegionYolo
                elif layer.type == 'Convolution':
                    if layer.name == 'Mconv7_stage2_L1' or layer.name == 'Mconv7_stage2_L2':
                        outputFormat = Output_Format.HumanPose
                else:
                    return Model_Flag.Unsupported

            if outputFormat == Output_Format.DetectionOutput:
                if len(self.ieNet.inputs) == 1 and len(self.ieNet.outputs) == 1:
                    # 1 input, 1 output

                    input_key  = next(iter(self.ieNet.inputs))
                    output_key = next(iter(self.ieNet.outputs))

                    layer = self.ieNet.layers[output_key]

                    if layer.type == 'DetectionOutput':
                        outputFormat = Output_Format.DetectionOutput
                    else:
                        return Model_Flag.Unsupported

                    if input_key == 'image_tensor':
                        self.inputFormat = Input_Format.Tensorflow
                    elif input_key == 'image':
                        self.inputFormat = Input_Format.IntelIR
                    elif input_key == 'data':
                        self.inputFormat = Input_Format.Caffe
                    else:
                        self.inputFormat = Input_Format.Other

                    params = self.ieNet.layers[output_key].params
                    input_blob = self.ieNet.inputs[input_key]

                    self.result_processor = Object_Detection_Processor(
                                                model_name = self.name,
                                                input_format = self.inputFormat,
                                                input_key = input_key,
                                                input_shape = input_blob.shape,
                                                input_layout = input_blob.layout,
                                                output_format = outputFormat,
                                                output_key = output_key,
                                                output_params = params)

                elif len(self.ieNet.inputs) == 2 and len(self.ieNet.outputs) == 1:
                    # 2 inputs and 1 output.  Faster RCNN

                    output_key = next(iter(self.ieNet.outputs))

                    layer = self.ieNet.layers[output_key]

                    if layer.type != 'DetectionOutput':
                        return Model_Flag.Unsupported

                    info_key = ""
                    data_key = ""

                    for key, blob in self.ieNet.inputs.items():

                        if key == 'image_info':
                            info_key = key
                        elif key == 'image_tensor':
                            data_key = key

                    if len(info_key) > 0 and len(data_key) > 0:

                        self.inputFormat = Input_Format.Faster_RCNN
                        input_blob = self.ieNet.inputs[data_key]
                        params = self.ieNet.layers[output_key].params

                        self.result_processor = Object_Detection_RCNN_Processor(
                            model_name = self.name,
                            input_format = self.inputFormat,
                            info_key = info_key,
                            data_key = data_key,
                            data_shape = input_blob.shape,
                            data_layout = input_blob.layout,
                            output_format = Output_Format.DetectionOutput,
                            output_key = output_key,
                            output_params = params)
                    else:
                        return Model_Flag.Unsupported

            elif outputFormat == Output_Format.RegionYolo:
                input_key  = next(iter(self.ieNet.inputs))
                input_blob = self.ieNet.inputs[input_key]

                self.inputFormat = Input_Format.Yolo
                self.result_processor = Object_Detection_Yolo_Processor(
                                            model_name = self.name,
                                            input_format = self.inputFormat,
                                            input_key = input_key,
                                            input_shape = input_blob.shape,
                                            input_layout = input_blob.layout,
                                            output_format = Output_Format.RegionYolo)

                for key, blob in self.ieNet.outputs.items():
                    self.result_processor.reshape_data[key] = self.ieNet.layers[self.ieNet.layers[key].parents[0]].shape
                    self.result_processor.set_class_label(self.ieNet.layers[key].params)

                # for key, blob in self.result_processor.reshape_data.items():
                #     print('{} {}'.format(key, blob))


            elif outputFormat == Output_Format.HumanPose:
                input_key  = next(iter(self.ieNet.inputs))
                input_blob = self.ieNet.inputs[input_key]
                self.inputFormat = Input_Format.HumanPose

                self.result_processor = Human_Pose_Processor(
                                            model_name = self.name,
                                            input_format = Input_Format.HumanPose,
                                            input_shape = input_blob.shape,
                                            input_layout = input_blob.layout
                                            )
            return Model_Flag.Loaded

        except Exception as ex:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            traceback.print_exception(exc_type, exc_obj, exc_tb)
            logging.error('!! {0}:{1}() : Exception {2}'.format(self.__class__.__name__, sys._getframe().f_code.co_name, ex))
            return Model_Flag.LoadError
コード例 #18
0
class Network:
    """
    Load and configure inference plugins for the specified target devices 
    and performs synchronous and asynchronous modes for the specified infer requests.
    """

    def __init__(self):
        ### TODO: Initialize any class variables desired ###
        self.net = None
        self.plugin = None
        self.input_blob = None
        self.out_blob = None
        self.net_plugin = None
        self.infer_request_handle = None

    def load_model(self, model, device, input_size, output_size, num_requests, cpu_extension=None, plugin=None):
        ### TODO: Load the model ###
        model_xml = model
        # get IR binary file weight
        model_bin = os.path.splitext(model_xml)[0] + ".bin"
        
        # Plugin initialization for specified device
        # and load extensions library if specified
        if not plugin:
            log.info("Initializing plugin for {} device...".format(device))
            # load inference engine API named it as plugin
            self.plugin = IECore()
        else:
            self.plugin = plugin
        
        # with IENetwork load the model with architecture XML and weight with binary file
        # Read the IR, load IR files
        log.info("Reading IR...")
        self.net = IENetwork(model=model_xml, weights=model_bin)
        log.info("Loading IR to the plugin...")
        
        ### TODO: Add any necessary extensions ###
        # Add a CPU extension if applicable
        if cpu_extension and "CPU" in device:
            self.plugin.add_extension(cpu_extension, device)
    
        
        ### Get the supported layers of the network
        supported_layers = self.plugin.query_network(network=self.net, device_name="CPU")
        
        ### TODO: Check for supported layers ###
        ### Check for any unsupported layers, and let the user
        ### know if anything is missing. Exit the program, if so.
        unsupported_layers = [l for l in self.net.layers.keys() if l not in supported_layers]
        if len(unsupported_layers) != 0:
            print("Unsupported layers found: {}".format(unsupported_layers))
            print("Check whether extensions are available to add to IECore.")
            exit(1)
        
        ### TODO: Return the loaded inference plugin ###
        # Load network read from IR into plugin(Inference Engine)
        if num_requests == 0:
            self.net_plugin = self.plugin.load_network(self.net, device)
        else:
            self.net_plugin = self.plugin.load_network(self.net, device, num_requests=num_requests)
        
        # Get the input layer
        self.input_blob = next(iter(self.net.inputs))
        self.out_blob = next(iter(self.net.outputs))

        assert len(self.net.inputs.keys()) == input_size, \
            "Supports only {} input topologies".format(len(self.net.inputs))
        assert len(self.net.outputs) == output_size, \
            "Supports only {} output topologies".format(len(self.net.outputs))
        
        ### Note: You may need to update the function parameters. ###
        
        return self.plugin, self.get_input_shape()

    def get_input_shape(self):
        ### TODO: Return the shape of the input layer ###
        return self.net.inputs[self.input_blob].shape

    def exec_net(self, request_id, frame):
        ### TODO: Start an asynchronous request ###
        ### TODO: Return any necessary information ###
        ### Note: You may need to update the function parameters. ###
        self.infer_request_handle = self.net_plugin.start_async(request_id=request_id, inputs={self.input_blob: frame})
        return self.net_plugin

    def wait(self, request_id):
        ### TODO: Wait for the request to be complete. ###
        ### TODO: Return any necessary information ###
        ### Note: You may need to update the function parameters. ###
#         status = self.exec_network.requests[0].wait(-1)
        status = self.net_plugin.requests[request_id].wait(-1)
        return status

    def get_output(self, request_id, output=None):
        ### TODO: Extract and return the output results
        ### Note: You may need to update the function parameters. ###
#         out = self.infer_request_handle.outputs[self.output_blob]
        if output:
            res = self.infer_request_handle.outputs[output]
        else:
            res = self.net_plugin.requests[request_id].outputs[self.out_blob]
        return res

    def clean(self):
        """
        Deletes all the instances
        :return: None
        """
        del self.net_plugin
        del self.plugin
        del self.net
        
    
    def performance_counter(self, request_id):
        """
        Queries performance measures per layer to get feedback of what is the
        most time consuming layer.
        :param request_id: Index of Infer request value. Limited to device capabilities
        :return: Performance of the layer  
        """
        perf_count = self.net_plugin.requests[request_id].get_perf_counts()
        return perf_count
コード例 #19
0
import streamlit as st
from openvino.inference_engine import IECore

st.title("hello!")
ie = IECore()
model_path = "model/model.xml"
ie_net = ie.read_network(model=model_path,
                         weights=model_path.replace("xml", "bin"))
exec_net = ie.load_network(network=ie_net, num_requests=1, device_name="CPU")

st.markdown(exec_net)
コード例 #20
0
from openvino.inference_engine import IENetwork, IECore

import numpy as np
import time

# Getting model bin and xml file
model_path='pool_cnn/pool_cnn'
model_weights=model_path+'.bin'
model_structure=model_path+'.xml'

# TODO: Load the model
# Use either IECore or IEPlugin API
core = IECore()
model = IENetwork(model_structure, model_weights)
net = core.load_network(network=model, device_name='CPU')

input_name=next(iter(net.inputs))

# Reading and Preprocessing Image
input_img=np.load('image.npy')
input_img=input_img.reshape(1, 28, 28)

input_dict={input_name:input_img}

# TODO: Using the input image, run inference on the model for 10 iterations
start=time.time()

for _ in range(10):
    net.infer(input_dict)

# TODO: Finish the print statement
コード例 #21
0
def main(args):
    # Search available NCS2 devices on the system
    MYRIADs = []
    ie = IECore()
    for device in ie.available_devices:
        if 'MYRIAD' in device:
            MYRIADs.append(device)
    num_devices = len(MYRIADs)
    print('{} MYRIAD devices found. {}'.format(len(MYRIADs), MYRIADs))
    if num_devices == 0:
        return

    model = 'public/googlenet-v1/FP16/googlenet-v1'
    net = ie.read_network(model + '.xml', model + '.bin')

    # Build up the device descriptor
    if num_devices == 1:
        device = 'MYRIAD'
    else:
        device = 'MULTI'
        for i, MYRIAD in enumerate(MYRIADs):
            device += ',' if i != 0 else ':'
            device += MYRIAD
    print('Device name : {}'.format(device))

    inblob = list(net.input_info.keys())[0]
    inshape = net.input_info[inblob].tensor_desc.dims
    outblob = list(net.outputs.keys())[0]
    outshape = net.outputs[outblob].shape

    config = {'VPU_HW_STAGES_OPTIMIZATION': 'YES'}  # default = 'YES'
    num_requests = 4 * num_devices
    execnet = ie.load_network(net,
                              device,
                              config=config,
                              num_requests=num_requests)

    dummy = np.random.rand(1, 3, 224, 224)

    niter = 100
    print('Start inferencing ({} times, {})'.format(
        niter, 'SYNC' if args.sync else 'ASYNC'))
    start = time.monotonic()
    for i in range(niter):
        if args.sync == True:
            execnet.infer(inputs={inblob: dummy})  # Synchronous inference
        else:
            reqId = -1
            while reqId == -1:
                reqId = execnet.get_idle_request_id()
            execnet.requests[reqId].async_infer(
                inputs={inblob: dummy})  # Asynchronous inference

    if args.sync == False:
        # Wait for all requests to complete
        for i in range(num_requests):
            execnet.requests[i].wait()

    end = time.monotonic()

    print('Performance = {} FPS'.format(niter / (end - start)))
コード例 #22
0
class PersonDetect:
    '''
    Class for the Person Detection Model.
    '''
    def __init__(self, model_name, device, threshold=0.60):
        self.model_weights = model_name + '.bin'
        self.model_structure = model_name + '.xml'
        self.device = device
        self.threshold = threshold

        try:
            self.model = IENetwork(self.model_structure, self.model_weights)
        except Exception as e:
            raise ValueError(
                "Could not Initialise the network. Have you enterred the correct model path?"
            )

        self.input_name = next(iter(self.model.inputs))
        self.input_shape = self.model.inputs[self.input_name].shape
        self.output_name = next(iter(self.model.outputs))
        self.output_shape = self.model.outputs[self.output_name].shape

    def load_model(self):
        '''
        Loading the network in core
        '''
        self.plugin = IECore()
        self.exec_network = self.plugin.load_network(network=self.model,
                                                     device_name=self.device)

    def predict(self, image):
        '''
        Detecting people in image
        '''
        preprocessed_input = self.preprocess_input(image)

        self.exec_network.infer({self.input_name: preprocessed_input})

        result = self.exec_network.requests[0]

        coords = self.preprocess_outputs(result.outputs['detection_out'])
        height, width = image.shape[:2]
        for coord in coords:
            coord[0] = coord[0] * width
            coord[1] = coord[1] * height
            coord[2] = coord[2] * width
            coord[3] = coord[3] * height

        preprocessed_image = self.draw_outputs(coords, image)
        return coords, preprocessed_image

    def draw_outputs(self, coords, image):
        '''
        Drawing rectangles around detected people
        '''
        for coord in coords:
            (startX, startY, endX, endY) = coord
            cv2.rectangle(image, (startX, startY), (endX, endY), (255, 0, 0),
                          2)
            cv2.rectangle(image, (620, 1), (915, 562), (0, 0, 0), 5)

        return image

    def preprocess_outputs(self, outputs):
        '''
        Processing the output to get the bounding box with required threshold
        '''
        coords = []
        for i in np.arange(0, outputs.shape[2]):
            confidence = outputs[0, 0, i, 2]
            if confidence > self.threshold:
                box = outputs[0, 0, i, 3:7]
                coords.append(box)
        return coords

    def preprocess_input(self, image):
        '''
        Preprocessing the input to fit the the inference engine
        '''
        b, c, h, w = self.input_shape
        prepo = np.copy(image)
        prepo = cv2.resize(prepo, (w, h))
        prepo = prepo.transpose((2, 0, 1))
        prepo = prepo.reshape(1, c, h, w)
        return prepo
コード例 #23
0
class FaceDetectionModel:
    '''
    Class for the Face Detection Model.
    '''
    def __init__(self, model_name, device='CPU', extensions=None):
        '''
        TODO: Use this to set your instance variables.
        '''
        self.model_name = model_name
        self.device = device
        self.extensions = extensions

        self.model_structure = self.model_name
        self.model_weights = self.model_name.split('.')[0]+'.bin'

        self.core = None
        self.network = None
        self.exec_net = None

        self.input = None
        self.output = None
        self.mode = 'async'
        self.request_id = 0

    def load_model(self):
        '''
        TODO: You will need to complete this method.
        This method is for loading the model to the device specified by the user.
        If your model requires any Plugins, this is where you can load them.
        '''
        self.core = IECore()

        self.network = self.core.read_network(model=self.model_structure, weights=self.model_weights)
        self.exec_net = self.core.load_network(network=self.network, device_name=self.device,num_requests=self.num_requests)
        
        self.input = next(iter(self.network.inputs))
        self.output = next(iter(self.network.outputs))

        return self.exec_net

    def predict(self, image, prob_threshold):
        '''
        TODO: You will need to complete this method.
        This method is meant for running predictions on the input image.
        '''
        processed_frame = self.preprocess_input(image)
        self.exec_net.start_async(request_id=self.request_id,
                                      inputs={self.input: processed_frame})
        self.exec_net.requests[0].get_perf_counts()

        if self.mode == 'async':
            self.exec_net.requests[self.request_id].wait()
            result = self.exec_net.requests[self.request_id].outputs[self.output]
            croppedFace, box = self.preprocess_output(result[0][0], image,prob_threshold)
            return croppedFace, box
        else:
            if self.exec_net.requests[self.request_id].wait(-1) == 0:
                result = self.exec_net.requests[self.request_id].outputs[self.output]
                croppedFace, box = self.preprocess_output(result[0][0], image,prob_threshold)
                return croppedFace, box

    def check_model(self):
        supported_layers = self.core.query_network(network=self.network, device_name=self.device)
        unsupported_layers = [l for l in self.network.layers.keys() if l not in supported_layers]

        # if len(unsupported_layers) > 0:
        #     print("Please check extention for these unsupported layers =>" + str(unsupported_layers))
        #     exit(1)
        # print("All layers are supported !!")

        if len(unsupported_layers)!=0 and self.device=='CPU':
            print("unsupported layers found:{}".format(unsupported_layers))
            if not self.extensions==None:
                print("Adding cpu_extension")
                self.core.add_extension(self.extensions, self.device)
                supported_layers = self.core.query_network(network = self.network, device_name=self.device)
                unsupported_layers = [l for l in self.network.layers.keys() if l not in supported_layers]
                if len(unsupported_layers)!=0:
                    print("After adding the extension still unsupported layers found")
                    exit(1)
                print("After adding the extension the issue is resolved")
            else:
                print("Give the path of cpu extension")
                exit(1)


    def preprocess_input(self, image):
    '''
    Before feeding the data into the model for inference,
    you might have to preprocess it. This function is where you can do that.
    '''
        model_input_shape = self.network.inputs[self.input].shape

        image_resized = cv2.resize(image, (model_input_shape[3], model_input_shape[2]))
        p_frame = np.transpose(np.expand_dims(image_resized,axis=0), (0,3,1,2))
        return p_frame

    def preprocess_output(self, outputs, prob_threshold):
    '''
    Before feeding the output of this model to the next model,
    you might have to preprocess the output. This function is where you can do that.
    '''
        coords =[]
        result = outputs[self.output_names][0][0]
        for res in result:
            conf = res[2]
            if conf>prob_threshold:
                x_min=res[3]
                y_min=res[4]
                x_max=res[5]
                y_max=res[6]
                coords.append([x_min,y_min,x_max,y_max])
        return coords
コード例 #24
0
class Network:
    '''
    Load and store information for working with the Inference Engine,
    and any loaded models.
    '''
    def __init__(self):
        self.plugin = None
        self.network = None
        self.input_blob = None
        self.output_blob = None
        self.exec_network = None
        self.infer_request = None

    def load_model(self, model, device="CPU", cpu_extension=None):
        '''
        Load the model given IR files.
        Defaults to CPU as device for use in the workspace.
        Synchronous requests made within.
        '''
        model_xml = model
        model_bin = os.path.splitext(model_xml)[0] + ".bin"

        # Initialize the plugin
        self.plugin = IECore()

        # Add a CPU extension, if applicable
        if cpu_extension and "CPU" in device:
            self.plugin.add_extension(cpu_extension, device)

        # Read the IR as a IENetwork
        self.network = IENetwork(model=model_xml, weights=model_bin)

        # Load the IENetwork into the plugin
        self.exec_network = self.plugin.load_network(network=self.network,
                                                     num_requests=2,
                                                     device_name=device)

        # Get the input layer
        self.input_blob = next(iter(self.network.inputs))
        self.output_blob = next(iter(self.network.outputs))
        return

    def check_device_extension(self, log, device="CPU"):
        if "CPU" in device:
            supported_layers = self.plugin.query_network(self.network, "CPU")
            not_supported_layers = [
                l for l in self.network.layers.keys()
                if l not in supported_layers
            ]
            if len(not_supported_layers) != 0:
                log.error(
                    "Following layers are not supported by the plugin for specified device {}:\n {}"
                    .format(args.device, ', '.join(not_supported_layers)))
                log.error(
                    "Please try to specify cpu extensions library path in sample's command line parameters using -l "
                    "or --cpu_extension command line argument")
                sys.exit(1)

        assert len(self.network.inputs.keys(
        )) == 1, "Sample supports only YOLO V3 based single input topologies"
        return

    def get_input_shape(self):
        '''
        Gets the input shape of the network
        '''
        return self.network.inputs[self.input_blob].shape

    def async_inference(self, image, request_id=0):
        '''
        Makes an asynchronous inference request, given an input image.
        '''
        ### TODO: Start asynchronous inference

        self.infer_request = self.exec_network.start_async(
            request_id=request_id, inputs={self.input_blob: image})

        return

    def wait(self):
        '''
        Checks the status of the inference request.
        '''
        ### TODO: Wait for the async request to be complete

        status = self.exec_network.requests[0].wait(-1)

        return status

    def extract_output(self, request_id=0):
        '''
        Returns a list of the results for the output layer of the network.
        '''
        ### TODO: Return the outputs of the network from the output_blob
        return self.exec_network.requests[request_id].outputs
コード例 #25
0
def test_add_outputs_with_and_without_port():
    ie = IECore()
    net = ie.read_network(model=test_net_xml, weights=test_net_bin)
    net.add_outputs('28/Reshape')
    net.add_outputs([('29/WithoutBiases', 0)])
    assert sorted(net.outputs) == ['28/Reshape', '29/WithoutBiases', 'fc_out']
コード例 #26
0
(W, H) = (None, None)
(newW, newH) = (args["width"], args["height"])
(rW, rH) = (None, None)

mean = np.array([123.68, 116.779, 103.939][::-1], dtype="float16")

# define the two output layer names for the EAST detector model that
# we are interested -- the first is the output probabilities and the
# second can be used to derive the bounding box coordinates of text

# load the pre-trained EAST text detector
print("[INFO] loading EAST text detector...")
model_xml = args["east"]
model_bin = os.path.splitext(model_xml)[0] + ".bin"

ie = IECore()

net = ie.read_network(model_xml, model_bin)

input_info = net.input_info
input_blob = next(iter(input_info))

exec_net = ie.load_network(network=net, device_name=args["device"])

print("INFO: loading OCR model....")
model_rec_xml = args["rec"]
model_rec_bin = os.path.splitext(model_rec_xml)[0] + ".bin"
#reads network from .xml and .bin formats

net_rec = ie.read_network(model_rec_xml, model_rec_bin)
コード例 #27
0
def test_batch_size_setter():
    ie = IECore()
    net = ie.read_network(model=test_net_xml, weights=test_net_bin)
    net.batch_size = 4
    assert net.batch_size == 4
    assert net.input_info['data'].input_data.shape == [4, 3, 32, 32]
def run_demo(args):
    cap = open_images_capture(args.input, args.loop)

    log.info('OpenVINO Inference Engine')
    log.info('\tbuild: {}'.format(get_version()))
    ie = IECore()

    log.info('Reading Object Detection model {}'.format(args.model_od))
    detector_person = Detector(ie,
                               args.model_od,
                               device=args.device,
                               label_class=args.person_label)
    log.info('The Object Detection model {} is loaded to {}'.format(
        args.model_od, args.device))

    log.info('Reading Human Pose Estimation model {}'.format(args.model_hpe))
    single_human_pose_estimator = HumanPoseEstimator(ie,
                                                     args.model_hpe,
                                                     device=args.device)
    log.info('The Human Pose Estimation model {} is loaded to {}'.format(
        args.model_hpe, args.device))

    delay = int(cap.get_type() in ('VIDEO', 'CAMERA'))
    video_writer = cv2.VideoWriter()

    frames_processed = 0
    presenter = monitors.Presenter(args.utilization_monitors, 25)
    metrics = PerformanceMetrics()

    start_time = perf_counter()
    frame = cap.read()
    if frame is None:
        raise RuntimeError("Can't read an image from the input")

    if args.output and not video_writer.open(
            args.output, cv2.VideoWriter_fourcc(*'MJPG'), cap.fps(),
        (frame.shape[1], frame.shape[0])):
        raise RuntimeError("Can't open video writer")

    while frame is not None:
        bboxes = detector_person.detect(frame)
        human_poses = [
            single_human_pose_estimator.estimate(frame, bbox)
            for bbox in bboxes
        ]

        presenter.drawGraphs(frame)

        colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 0, 0),
                  (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0),
                  (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0),
                  (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0),
                  (0, 255, 0)]

        for pose, bbox in zip(human_poses, bboxes):
            cv2.rectangle(frame, (bbox[0], bbox[1]),
                          (bbox[0] + bbox[2], bbox[1] + bbox[3]), (255, 0, 0),
                          2)
            for id_kpt, kpt in enumerate(pose):
                cv2.circle(frame, (int(kpt[0]), int(kpt[1])), 3,
                           colors[id_kpt], -1)

        metrics.update(start_time, frame)

        frames_processed += 1
        if video_writer.isOpened() and (args.output_limit <= 0 or
                                        frames_processed <= args.output_limit):
            video_writer.write(frame)

        if not args.no_show:
            cv2.imshow('Human Pose Estimation Demo', frame)
            key = cv2.waitKey(delay)
            if key == 27:
                break
            presenter.handleKey(key)

        start_time = perf_counter()
        frame = cap.read()

    metrics.log_total()
    for rep in presenter.reportMeans():
        log.info(rep)
コード例 #29
0
def test_reshape():
    ie = IECore()
    net = ie.read_network(model=test_net_xml, weights=test_net_bin)
    net.reshape({"data": (2, 3, 32, 32)})
コード例 #30
0

# Workaround for reshaping bug
        c1 = net.layers['79/Cast_11815_const']
        c1.blobs['custom'][4] = inp_h
        c1.blobs['custom'][5] = inp_w

        c2 = net.layers['86/Cast_11811_const']
        c2.blobs['custom'][2] = out_h
        c2.blobs['custom'][3] = out_w

# Reshape network to specific size
        net.reshape({'0': [1, 3, inp_h, inp_w], '1': [1, 3, out_h, out_w]})

#Load network to device
        ie = IECore()
        exec_net = ie.load_network(net, 'CPU')

# Prepare input
        inp = img.transpose(2, 0, 1)  # interleaved to planar (HWC -> CHW)
        inp = inp.reshape(1, 3, inp_h, inp_w)
        inp = inp.astype(np.float32)

# Prepare second input - bicubic resize of first input
        resized_img = cv.resize(img, (out_w, out_h), interpolation=cv.INTER_CUBIC)
        resized = resized_img.transpose(2, 0, 1)
        resized = resized.reshape(1, 3, out_h, out_w)
        resized = resized.astype(np.float32)

        outs = exec_net.infer({'0': inp, '1': resized})