Exemple #1
0
class FacialLandmarks:
    '''
    Class for the Face Detection Model.
    '''
    def __init__(self, model_name, device='CPU', extensions=None):
        '''
        TODO: Use this to set your instance variables.
        '''
        self.model_bin = model_name + '.bin'
        self.model_xml = model_name + '.xml'
        self.device = device
        self.cpu_extension = extensions

        try:
            self.ie_plugin = IECore()
            self.model = self.ie_plugin.read_network(model=self.model_xml,
                                                     weights=self.model_bin)
        except Exception:
            raise ValueError(
                "Could not Initialise the network. Have you enterred the correct model path?"
            )

        self.input_name = next(iter(self.model.inputs))
        self.input_shape = self.model.inputs[self.input_name].shape
        self.output_name = next(iter(self.model.outputs))

    def load_model(self):
        '''
        TODO: You will need to complete this method.
        This method is for loading the model to the device specified by the user.
        If your model requires any Plugins, this is where you can load them.
        '''
        # Add CPU extension to IECore
        if self.cpu_extension and 'CPU' in self.device:
            log.info('Adding CPU extension:\n\t{}'.format(self.cpu_extension))
            self.ie_plugin.add_extension(self.cpu_extension, self.device)

        # Check layers
        log.info('Current device specified: {}'.format(self.device))
        log.info("Checking for unsupported layers...")
        supported_layers = self.ie_plugin.query_network(
            network=self.model, device_name=self.device)
        unsupported_layers = [
            l for l in self.model.layers.keys() if l not in supported_layers
        ]
        if len(unsupported_layers) != 0:
            log.error('These layers are unsupported:\n{}'.format(
                ', '.join(unsupported_layers)))
            log.error(
                'Specify an available extension to add to IECore from the command line using "-l"'
            )
            exit(1)
        else:
            log.info('All layers are supported!')

        # Load the model network into IECore
        self.exec_network = self.ie_plugin.load_network(
            self.model, self.device)
        log.info("IR Model has been successfully loaded into IECore")

        return self.exec_network

    def predict(self, image):
        '''
        TODO: You will need to complete this method.
        This method is meant for running predictions on the input image.
        '''
        p_image = self.preprocess_input(image)

        self.exec_network.start_async(0, {self.input_name: p_image})

        if self.wait() == 0:
            outputs = self.get_outputs()
        return self.preprocess_output(outputs, image)

    def preprocess_input(self, image):
        '''
        Before feeding the data into the model for inference,
        you might have to preprocess it. This function is where you can do that.
        '''
        required_width = self.input_shape[2]
        required_height = self.input_shape[3]
        dimension = (required_height, required_width)

        image = cv2.resize(image, dimension)
        image = image.transpose((2, 0, 1))
        image = image.reshape(1, *image.shape)
        return image

    def wait(self):
        status = self.exec_network.requests[0].wait(-1)
        return status

    def get_outputs(self):
        return self.exec_network.requests[0].outputs[self.output_name]

    def preprocess_output(self, outputs, image):
        '''
        Before feeding the output of this model to the next model,
        you might have to preprocess the output. This function is where you can do that.
        '''
        # denormalize detections
        xl = int(outputs[0][0][0] * image.shape[1])
        yl = int(outputs[0][1][0] * image.shape[0])
        xr = int(outputs[0][2][0] * image.shape[1])
        yr = int(outputs[0][3][0] * image.shape[0])

        # include offset for left eye
        xlmin = xl - 15
        ylmin = yl - 15
        xlmax = xl + 15
        ylmax = yl + 15

        # include offset for right eye
        xrmin = xr - 15
        yrmin = yr - 15
        xrmax = xr + 15
        yrmax = yr + 15

        coords = []
        coords.append((xlmin, ylmin))
        coords.append((xlmax, ylmax))
        coords.append((xrmin, yrmin))
        coords.append((xrmax, yrmax))

        # crop eyes
        eye_l = image[ylmin:ylmax, xlmin:xlmax]
        eye_r = image[yrmin:yrmax, xrmin:xrmax]

        return eye_l, eye_r, coords
Exemple #2
0
class HeadPoseEstimation:
    '''
    Class for the Head Pose Estimation Model.
    '''
    def __init__(self, model_name, device='CPU', extensions=None):
        '''
        instantiating the necessary variables.
        '''
        self.model_weights = model_name + '.bin'
        self.model_structure = model_name + '.xml'
        self.device = device
        self.extensions = extensions
        self.exec_net = None
        self.input_name = None
        self.input_shape = None
        self.output_name = None
        self.output_shape = None

    def load_model(self):
        '''
        This method is for loading the model to the device specified by the user.
        If your model requires any Plugins, this is where you can load them.
        '''
        self.core = IECore()
        self.model = self.core.read_network(self.model_structure,
                                            self.model_weights)

        supported_layers = self.core.query_network(network=self.model,
                                                   device_name=self.device)
        unsupported_layers = [
            l for l in self.model.layers.keys() if l not in supported_layers
        ]

        if len(unsupported_layers) != 0 and self.device == 'CPU':
            log.error(
                "Unsupported layers found: {}".format(unsupported_layers))
            log.error("Check whether extensions are available to add")
            self.core.add_extension(self.extension, self.device)
            supported_layers = self.core.query_network(network=self.model,
                                                       device_name=self.device)
            unsupported_layers = [
                l for l in self.model.layers.keys()
                if l not in supported_layers
            ]
            if len(unsupported_layers) != 0:
                log.error("ERROR: Unsupported layer issue not yet resolved")
                exit(1)
        self.exec_net = self.core.load_network(network=self.model,
                                               device_name=self.device,
                                               num_requests=1)

        self.input_name = next(iter(self.model.inputs))
        self.input_shape = self.model.inputs[self.input_name].shape
        self.output_name = next(iter(self.model.outputs))
        self.output_shape = self.model.outputs[self.output_name].shape

    def predict(self, image):
        '''
        This method is meant for running predictions on the input image.
        '''
        input_img = self.preprocess_input(image.copy())
        result = self.exec_net.infer({self.input_name: input_img})
        outputList = self.preprocess_output(result)

        return outputList

    def check_model(self):
        pass

    def preprocess_input(self, image):
        '''
        Before feeding the data into the model for inference,
        you might have to preprocess it. This function is where you can do that.
        '''
        p_frame = cv2.resize(image, (self.input_shape[3], self.input_shape[2]))
        p_frame = p_frame.transpose((2, 0, 1))
        p_frame = p_frame.reshape(1, *p_frame.shape)

        return p_frame

    def preprocess_output(self, outputs):
        '''
        Before feeding the output of this model to the next model,
        you might have to preprocess the output. This function is where you can do that.
        '''
        output = []
        output.append(outputs['angle_y_fc'].tolist()[0][0])
        output.append(outputs['angle_p_fc'].tolist()[0][0])
        output.append(outputs['angle_r_fc'].tolist()[0][0])

        return output
def get_net(model: str, core: IECore):
    model_xml = model
    model_bin = os.path.splitext(model_xml)[0] + ".bin"
    net = core.read_network(model=model_xml, weights=model_bin)
    return net
Exemple #4
0
def test_precision_setter():
    ie = IECore()
    net = ie.read_network(model=test_net_xml, weights=test_net_bin)
    net.layers['19/Fused_Add_'].out_data[0].precision = "I8"
    assert net.layers['19/Fused_Add_'].out_data[
        0].precision == "I8", "Incorrect precision for layer '19/Fused_Add_'"
def main():

    usage()

    boundary_box_flag = True

    # Prep for face detection
    ie = IECore()

    net_det = ie.read_network(model=model_det + '.xml',
                              weights=model_det + '.bin')
    input_name_det = next(iter(net_det.input_info))  # Input blob name "data"
    input_shape_det = net_det.input_info[
        input_name_det].tensor_desc.dims  # [1,3,384,672]
    out_name_det = next(iter(
        net_det.outputs))  # Output blob name "detection_out"
    exec_net_det = ie.load_network(network=net_det,
                                   device_name='CPU',
                                   num_requests=1)
    del net_det

    # Preparation for landmark detection
    net_lm = ie.read_network(model=model_lm + '.xml',
                             weights=model_lm + '.bin')
    input_name_lm = next(iter(net_lm.input_info))  # Input blob name
    input_shape_lm = net_lm.input_info[
        input_name_lm].tensor_desc.dims  # [1,3,60,60]
    out_name_lm = next(iter(
        net_lm.outputs))  # Output blob name "embd/dim_red/conv"
    out_shape_lm = net_lm.outputs[out_name_lm].shape  # 3x [1,1]
    exec_net_lm = ie.load_network(network=net_lm,
                                  device_name='CPU',
                                  num_requests=1)
    del net_lm

    # Preparation for headpose detection
    net_hp = ie.read_network(model=model_hp + '.xml',
                             weights=model_hp + '.bin')
    input_name_hp = next(iter(net_hp.input_info))  # Input blob name
    input_shape_hp = net_hp.input_info[
        input_name_hp].tensor_desc.dims  # [1,3,60,60]
    out_name_hp = next(iter(net_hp.outputs))  # Output blob name
    out_shape_hp = net_hp.outputs[out_name_hp].shape  # [1,70]
    exec_net_hp = ie.load_network(network=net_hp,
                                  device_name='CPU',
                                  num_requests=1)
    del net_hp

    # Preparation for gaze estimation
    net_gaze = ie.read_network(model=model_gaze + '.xml',
                               weights=model_gaze + '.bin')
    input_shape_gaze = [1, 3, 60, 60]
    exec_net_gaze = ie.load_network(network=net_gaze, device_name='CPU')
    del net_gaze

    # Open USB webcams
    cam = cv2.VideoCapture(0)
    camx, camy = [(1920, 1080), (1280, 720), (800, 600),
                  (480, 480)][1]  # Set camera resolution [1]=1280,720
    cam.set(cv2.CAP_PROP_FRAME_WIDTH, camx)
    cam.set(cv2.CAP_PROP_FRAME_HEIGHT, camy)

    laser_flag = True
    flip_flag = True
    spark_flag = True

    while True:
        ret, img = cam.read()  # img won't be modified
        if ret == False:
            break

        if flip_flag == True:
            img = cv2.flip(img, 1)  # flip image
        out_img = img.copy(
        )  # out_img will be drawn and modified to make an display image

        img1 = cv2.resize(img, (input_shape_det[_W], input_shape_det[_H]))
        img1 = img1.transpose((2, 0, 1))  # Change data layout from HWC to CHW
        img1 = img1.reshape(input_shape_det)
        res_det = exec_net_det.infer(inputs={input_name_det:
                                             img1})  # Detect faces

        gaze_lines = []
        for obj in res_det[out_name_det][0][
                0]:  # obj = [ image_id, label, conf, xmin, ymin, xmax, ymax ]
            if obj[2] > 0.75:  # Confidence > 75%
                xmin = abs(int(obj[3] * img.shape[1]))
                ymin = abs(int(obj[4] * img.shape[0]))
                xmax = abs(int(obj[5] * img.shape[1]))
                ymax = abs(int(obj[6] * img.shape[0]))
                class_id = int(obj[1])
                face = img[ymin:ymax, xmin:xmax]  # Crop the face image
                if boundary_box_flag == True:
                    cv2.rectangle(out_img, (xmin, ymin), (xmax, ymax),
                                  (255, 255, 0), 2)

                # Find facial landmarks (to find eyes)
                face1 = cv2.resize(face,
                                   (input_shape_lm[_W], input_shape_lm[_H]))
                face1 = face1.transpose((2, 0, 1))
                face1 = face1.reshape(input_shape_lm)
                res_lm = exec_net_lm.infer(inputs={input_name_lm: face1
                                                   })  # Run landmark detection
                lm = res_lm[out_name_lm][0][:8].reshape(
                    4, 2
                )  #  [[left0x, left0y], [left1x, left1y], [right0x, right0y], [right1x, right1y] ]

                # Estimate head orientation (yaw=Y, pitch=X, role=Z)
                res_hp = exec_net_hp.infer(
                    inputs={input_name_hp: face1})  # Run head pose estimation
                yaw = res_hp['angle_y_fc'][0][0]
                pitch = res_hp['angle_p_fc'][0][0]
                roll = res_hp['angle_r_fc'][0][0]

                _X = 0
                _Y = 1
                # Landmark position memo...   lm[1] (eye) lm[0] (nose)  lm[2] (eye) lm[3]
                eye_sizes = [
                    abs(int((lm[0][_X] - lm[1][_X]) * face.shape[1])),
                    abs(int((lm[3][_X] - lm[2][_X]) * face.shape[1]))
                ]  # eye size in the cropped face image
                eye_centers = [
                    [
                        int(((lm[0][_X] + lm[1][_X]) / 2 * face.shape[1])),
                        int(((lm[0][_Y] + lm[1][_Y]) / 2 * face.shape[0]))
                    ],
                    [
                        int(((lm[3][_X] + lm[2][_X]) / 2 * face.shape[1])),
                        int(((lm[3][_Y] + lm[2][_Y]) / 2 * face.shape[0]))
                    ]
                ]  # eye center coordinate in the cropped face image
                if eye_sizes[0] < 4 or eye_sizes[1] < 4:
                    continue

                ratio = 0.7
                eyes = []
                for i in range(2):
                    # Crop eye images
                    x1 = int(eye_centers[i][_X] - eye_sizes[i] * ratio)
                    x2 = int(eye_centers[i][_X] + eye_sizes[i] * ratio)
                    y1 = int(eye_centers[i][_Y] - eye_sizes[i] * ratio)
                    y2 = int(eye_centers[i][_Y] + eye_sizes[i] * ratio)
                    eyes.append(
                        cv2.resize(face[y1:y2, x1:x2].copy(),
                                   (input_shape_gaze[_W],
                                    input_shape_gaze[_H])))  # crop and resize

                    # Draw eye boundary boxes
                    if boundary_box_flag == True:
                        cv2.rectangle(out_img, (x1 + xmin, y1 + ymin),
                                      (x2 + xmin, y2 + ymin), (0, 255, 0), 2)

                    # rotate eyes around Z axis to keep them level
                    if roll != 0.:
                        rotMat = cv2.getRotationMatrix2D(
                            (int(input_shape_gaze[_W] / 2),
                             int(input_shape_gaze[_H] / 2)), roll, 1.0)
                        eyes[i] = cv2.warpAffine(
                            eyes[i],
                            rotMat,
                            (input_shape_gaze[_W], input_shape_gaze[_H]),
                            flags=cv2.INTER_LINEAR)
                    eyes[i] = eyes[i].transpose(
                        (2, 0, 1))  # Change data layout from HWC to CHW
                    eyes[i] = eyes[i].reshape((1, 3, 60, 60))

                hp_angle = [yaw, pitch, 0]  # head pose angle in degree
                res_gaze = exec_net_gaze.infer(
                    inputs={
                        'left_eye_image': eyes[0],
                        'right_eye_image': eyes[1],
                        'head_pose_angles': hp_angle
                    })  # gaze estimation
                gaze_vec = res_gaze['gaze_vector'][
                    0]  # result is in orthogonal coordinate system (x,y,z. not yaw,pitch,roll)and not normalized
                gaze_vec_norm = gaze_vec / np.linalg.norm(
                    gaze_vec)  # normalize the gaze vector

                vcos = math.cos(math.radians(roll))
                vsin = math.sin(math.radians(roll))
                tmpx = gaze_vec_norm[0] * vcos + gaze_vec_norm[1] * vsin
                tmpy = -gaze_vec_norm[0] * vsin + gaze_vec_norm[1] * vcos
                gaze_vec_norm = [tmpx, tmpy]

                # Store gaze line coordinations
                for i in range(2):
                    coord1 = (eye_centers[i][_X] + xmin,
                              eye_centers[i][_Y] + ymin)
                    coord2 = (eye_centers[i][_X] + xmin + int(
                        (gaze_vec_norm[0] + 0.) * 3000),
                              eye_centers[i][_Y] + ymin - int(
                                  (gaze_vec_norm[1] + 0.) * 3000))
                    gaze_lines.append(
                        [coord1, coord2,
                         False])  # line(coord1, coord2); False=spark flag

        # Gaze lines intersection check (for sparking)
        if spark_flag == True:
            for g1 in range(len(gaze_lines)):
                for g2 in range(g1 + 1, len(gaze_lines)):
                    if gaze_lines[g1][2] == True or gaze_lines[g2][2] == True:
                        continue  # Skip if either line has already marked as crossed
                    x1 = gaze_lines[g1][0]
                    y1 = gaze_lines[g1][1]
                    x2 = gaze_lines[g2][0]
                    y2 = gaze_lines[g2][1]
                    if intersection_check(x1, y1, x2, y2) == True:
                        l1 = line(x1, y1)
                        l2 = line(x2, y2)
                        x, y = intersection(
                            l1, l2)  # calculate crossing coordinate
                        gaze_lines[g1][1] = [int(x), int(y)]
                        gaze_lines[g1][2] = True
                        gaze_lines[g2][1] = [int(x), int(y)]
                        gaze_lines[g2][2] = True

        # Drawing gaze lines and sparks
        for gaze_line in gaze_lines:
            draw_gaze_line(out_img, (gaze_line[0][0], gaze_line[0][1]),
                           (gaze_line[1][0], gaze_line[1][1]), laser_flag)
            if gaze_line[2] == True:
                draw_spark(out_img, (gaze_line[1][0], gaze_line[1][1]))

        cv2.imshow("gaze", out_img)

        key = cv2.waitKey(1)
        if key == 27: break
        if key == ord(u'l'):
            laser_flag = True if laser_flag == False else False  # toggles laser_flag
        if key == ord(u'f'):
            flip_flag = True if flip_flag == False else False  # image flip flag
        if key == ord(u'b'):
            boundary_box_flag = True if boundary_box_flag == False else False  # boundary box flag
        if key == ord(u's'):
            spark_flag = True if spark_flag == False else False  # spark flag

    cv2.destroyAllWindows()
def main():
    args = build_argparser().parse_args()

    cap = open_images_capture(args.input, args.loop)

    # Plugin initialization for specified device and load extensions library if specified
    log.info('OpenVINO Inference Engine')
    log.info('\tbuild: {}'.format(get_version()))
    ie = IECore()

    # Read IR
    log.info('Reading Proposal model {}'.format(args.model_pnet))
    p_net = ie.read_network(args.model_pnet)
    assert len(p_net.input_info.keys()) == 1, "Pnet supports only single input topologies"
    assert len(p_net.outputs) == 2, "Pnet supports two output topologies"

    log.info('Reading Refine model {}'.format(args.model_rnet))
    r_net = ie.read_network(args.model_rnet)
    assert len(r_net.input_info.keys()) == 1, "Rnet supports only single input topologies"
    assert len(r_net.outputs) == 2, "Rnet supports two output topologies"

    log.info('Reading Output model {}'.format(args.model_onet))
    o_net = ie.read_network(args.model_onet)
    assert len(o_net.input_info.keys()) == 1, "Onet supports only single input topologies"
    assert len(o_net.outputs) == 3, "Onet supports three output topologies"

    pnet_input_blob = next(iter(p_net.input_info))
    rnet_input_blob = next(iter(r_net.input_info))
    onet_input_blob = next(iter(o_net.input_info))

    for name, blob in p_net.outputs.items():
        if blob.shape[1] == 2:
            pnet_cls_name = name
        elif blob.shape[1] == 4:
            pnet_roi_name = name
        else:
            raise RuntimeError("Unsupported output layer for Pnet")

    for name, blob in r_net.outputs.items():
        if blob.shape[1] == 2:
            rnet_cls_name = name
        elif blob.shape[1] == 4:
            rnet_roi_name = name
        else:
            raise RuntimeError("Unsupported output layer for Rnet")

    for name, blob in o_net.outputs.items():
        if blob.shape[1] == 2:
            onet_cls_name = name
        elif blob.shape[1] == 4:
            onet_roi_name = name
        elif blob.shape[1] == 10:
            onet_pts_name = name
        else:
            raise RuntimeError("Unsupported output layer for Onet")

    next_frame_id = 0

    metrics = PerformanceMetrics()
    presenter = None
    video_writer = cv2.VideoWriter()
    is_loaded_before = False

    while True:
        start_time = perf_counter()
        origin_image = cap.read()
        if origin_image is None:
            if next_frame_id == 0:
                raise ValueError("Can't read an image from the input")
            break
        if next_frame_id == 0:
            presenter = monitors.Presenter(args.utilization_monitors, 55,
                                           (round(origin_image.shape[1] / 4), round(origin_image.shape[0] / 8)))
            if args.output and not video_writer.open(args.output, cv2.VideoWriter_fourcc(*'MJPG'),
                                                     cap.fps(), (origin_image.shape[1], origin_image.shape[0])):
                raise RuntimeError("Can't open video writer")
        next_frame_id += 1

        rgb_image = cv2.cvtColor(origin_image, cv2.COLOR_BGR2RGB)
        oh, ow, _ = rgb_image.shape

        scales = utils.calculate_scales(rgb_image)

        # *************************************
        # Pnet stage
        # *************************************

        pnet_res = []
        for i, scale in enumerate(scales):
            hs = int(oh*scale)
            ws = int(ow*scale)
            image = preprocess_image(rgb_image, ws, hs)

            p_net.reshape({pnet_input_blob: [1, 3, ws, hs]})  # Change weidth and height of input blob
            exec_pnet = ie.load_network(network=p_net, device_name=args.device)
            if i == 0 and not is_loaded_before:
                log.info("The Proposal model {} is loaded to {}".format(args.model_pnet, args.device))

            p_res = exec_pnet.infer(inputs={pnet_input_blob: image})
            pnet_res.append(p_res)

        image_num = len(scales)
        rectangles = []
        for i in range(image_num):
            roi = pnet_res[i][pnet_roi_name]
            cls = pnet_res[i][pnet_cls_name]
            _, _, out_h, out_w = cls.shape
            out_side = max(out_h, out_w)
            rectangle = utils.detect_face_12net(cls[0][1], roi[0], out_side, 1/scales[i], ow, oh,
                                                score_threshold[0], iou_threshold[0])
            rectangles.extend(rectangle)
        rectangles = utils.NMS(rectangles, iou_threshold[1], 'iou')

        # Rnet stage
        if len(rectangles) > 0:

            r_net.reshape({rnet_input_blob: [len(rectangles), 3, 24, 24]})  # Change batch size of input blob
            exec_rnet = ie.load_network(network=r_net, device_name=args.device)
            if not is_loaded_before:
                log.info("The Refine model {} is loaded to {}".format(args.model_rnet, args.device))

            rnet_input = []
            for rectangle in rectangles:
                crop_img = rgb_image[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])]
                crop_img = preprocess_image(crop_img, 24, 24)
                rnet_input.extend(crop_img)

            rnet_res = exec_rnet.infer(inputs={rnet_input_blob: rnet_input})

            roi = rnet_res[rnet_roi_name]
            cls = rnet_res[rnet_cls_name]
            rectangles = utils.filter_face_24net(cls, roi, rectangles, ow, oh, score_threshold[1], iou_threshold[2])

        # Onet stage
        if len(rectangles) > 0:

            o_net.reshape({onet_input_blob: [len(rectangles), 3, 48, 48]})  # Change batch size of input blob
            exec_onet = ie.load_network(network=o_net, device_name=args.device)
            if not is_loaded_before:
                log.info("The Output model {} is loaded to {}".format(args.model_onet, args.device))
                is_loaded_before = True

            onet_input = []
            for rectangle in rectangles:
                crop_img = rgb_image[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])]
                crop_img = preprocess_image(crop_img, 48, 48)
                onet_input.extend(crop_img)

            onet_res = exec_onet.infer(inputs={onet_input_blob: onet_input})

            roi = onet_res[onet_roi_name]
            cls = onet_res[onet_cls_name]
            pts = onet_res[onet_pts_name]
            rectangles = utils.filter_face_48net(cls, roi, pts, rectangles, ow, oh,
                                                 score_threshold[2], iou_threshold[3])

        # display results
        for rectangle in rectangles:
            # Draw detected boxes
            cv2.putText(origin_image, 'confidence: {:.2f}'.format(rectangle[4]),
                        (int(rectangle[0]), int(rectangle[1])), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0))
            cv2.rectangle(origin_image, (int(rectangle[0]), int(rectangle[1])), (int(rectangle[2]), int(rectangle[3])),
                          (255, 0, 0), 1)
            # Draw landmarks
            for i in range(5, 15, 2):
                cv2.circle(origin_image, (int(rectangle[i+0]), int(rectangle[i+1])), 2, (0, 255, 0))

        metrics.update(start_time, origin_image)

        if video_writer.isOpened() and (args.output_limit <= 0 or next_frame_id <= args.output_limit):
            video_writer.write(origin_image)

        if not args.no_show:
            cv2.imshow('MTCNN Results', origin_image)
            key = cv2.waitKey(1)
            if key in {ord('q'), ord('Q'), 27}:
                break
            presenter.handleKey(key)

    metrics.log_total()
class TestModels(unittest.TestCase):

    @classmethod
    def setUpClass(self):
        self.ie = IECore()
        self.test_img = cv.imread(os.path.join(os.environ['MODELS_PATH'],
                                               'validation_set',
                                               '512x512',
                                               'dog.bmp'))
        if self.test_img is None:
            tc = unittest.TestCase()
            tc.fail('No image data found')


    def get_iou(self, box1, box2):
        # box is xmin, ymin, xmax, ymax
        x_min, x_max = max(box1[0], box2[0]), min(box1[2], box2[2])
        y_min, y_max = max(box1[1], box2[1]), min(box1[3], box2[3])
        inter = (x_max - x_min) * (y_max - y_min)
        area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
        area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
        return inter / (area1 + area2 - inter)


    # source: https://github.com/opencv/opencv/blob/master/modules/dnn/misc/python/test/test_dnn.py
    def normAssertDetections(self, ref_class_ids, ref_scores, ref_boxes,
                             test_class_ids, test_scores, test_boxes,
                             conf_threshold=1e-5, scores_diff=1e-5, boxes_iou_diff=1e-4):
        matched_ref_boxes = [False] * len(ref_boxes)
        errMsg = ''
        for i in range(len(test_boxes)):
            test_score = test_scores[i]
            if test_score < conf_threshold:
                continue

            test_class_id, test_box = test_class_ids[i], test_boxes[i]
            matched = False
            for j in range(len(ref_boxes)):
                if (not matched_ref_boxes[j]) and test_class_id == ref_class_ids[j] and \
                   abs(test_score - ref_scores[j]) < scores_diff:
                    iou = self.get_iou(test_box, ref_boxes[j])
                    if abs(iou - 1.0) < boxes_iou_diff:
                        matched = True
                        matched_ref_boxes[j] = True
            if not matched:
                errMsg += '\nUnmatched prediction: class %d score %f box %s' % (test_class_id, test_score, test_box)

        for i in range(len(ref_boxes)):
            if (not matched_ref_boxes[i]) and ref_scores[i] > conf_threshold:
                errMsg += '\nUnmatched reference: class %d score %f box %s' % (ref_class_ids[i], ref_scores[i], ref_boxes[i])
        if errMsg:
            raise Exception(errMsg)


    def check_torchvision_model(self, model_func, size, threshold=1e-5):
        inp_size = [1, 3, size[0], size[1]]

        inp = cv.resize(self.test_img, (size[1], size[0]))
        inp = np.expand_dims(inp.astype(np.float32).transpose(2, 0, 1), axis=0)
        inp /= 255
        inp = torch.tensor(inp)

        # Create model
        model = model_func(pretrained=True, progress=False)
        model.eval()
        ref = model(inp)

        # Convert to OpenVINO IR
        mo_pytorch.convert(model, input_shape=inp_size, model_name='model')

        # Run model with OpenVINO and compare outputs
        net = self.ie.read_network('model.xml', 'model.bin')
        exec_net = self.ie.load_network(net, 'CPU')
        out = exec_net.infer({'input': inp.detach().numpy()})

        if isinstance(ref, torch.Tensor):
            ref = {'': ref}
        for out0, ref0 in zip(out.values(), ref.values()):
            diff = np.max(np.abs(out0 - ref0.detach().numpy()))
            self.assertLessEqual(diff, threshold)

    def test_inception_v3(self):
        self.check_torchvision_model(models.inception_v3, (299, 299), 4e-5)

    def test_squeezenet1_1(self):
        self.check_torchvision_model(models.squeezenet1_1, (227, 227))

    def test_alexnet(self):
        self.check_torchvision_model(models.alexnet, (227, 227))

    def test_resnet18(self):
        self.check_torchvision_model(models.resnet18, (227, 227), 2e-5)

    def test_deeplabv3_resnet50(self):
        self.check_torchvision_model(models.segmentation.deeplabv3_resnet50, (240, 320), 2e-4)

    def test_detectron2_retinanet(self):
        width = 320
        height = 320

        # Load model
        model = model_zoo.get("COCO-Detection/retinanet_R_50_FPN_1x.yaml", trained=True)
        model.eval()

        # Prepare input tensor
        img = cv.resize(self.test_img, (width, height))
        inp = img.transpose(2, 0, 1).astype(np.float32)

        # Get reference prediction
        ref = model([{'image': torch.tensor(inp)}])
        ref = ref[0]['instances'].get_fields()
        ref_boxes = []
        for box, score, class_idx in zip(ref['pred_boxes'], ref['scores'], ref['pred_classes']):
            xmin, ymin, xmax, ymax = box
            ref_boxes.append([xmin, ymin, xmax, ymax])
            if score > 0.45:
                cv.rectangle(img, (int(xmin), int(ymin)), (int(xmax), int(ymax)), color=(0, 180, 255), thickness=3)

        # Convert model to OpenVINO IR
        mo_pytorch.convert(model, input_shape=[1, 3, height, width], model_name='retinanet_R_50_FPN_1x')

        # Get OpenVINO prediction
        net = self.ie.read_network('retinanet_R_50_FPN_1x.xml', 'retinanet_R_50_FPN_1x.bin')
        exec_net = self.ie.load_network(net, 'CPU')
        outs = exec_net.infer({'input': inp.reshape(1, 3, height, width)})
        ie_detections = next(iter(outs.values()))
        ie_detections = ie_detections.reshape(-1, 7)

        for det in ie_detections:
            conf = det[2]
            if conf > 0.45:
                xmin, ymin, xmax, ymax = [int(v) for v in det[3:]]
                cv.rectangle(img, (xmin, ymin), (xmax, ymax), color=(210, 9, 179))

        # Uncomment to visualize detections
        # cv.imshow('RetinaNet (Detectron2)', img)
        # cv.waitKey()

        self.normAssertDetections(ref['pred_classes'], ref['scores'], ref_boxes,
                                  ie_detections[:, 1], ie_detections[:, 2], ie_detections[:, 3:])

    def test_strided_slice(self):
        import torch.nn as nn
        class SSlice(nn.Module):
            def forward(self, x):
                return x[:, :1, 2:, 3]

        self.check_torchvision_model(lambda **args: SSlice(), (299, 299), 4e-5)


    def test_resunet(self):
        import BrainMaGe
        from BrainMaGe.models.networks import fetch_model

        weights = Path(BrainMaGe.__file__).parent / 'weights' / 'resunet_ma.pt'
        pt_model = fetch_model(modelname="resunet", num_channels=1, num_classes=2, num_filters=16)
        checkpoint = torch.load(weights, map_location=torch.device('cpu'))
        pt_model.load_state_dict(checkpoint["model_state_dict"])
        pt_model.eval()

        # Get reference output
        inp = torch.randn([1, 1, 128, 128, 128])
        ref = pt_model(inp).detach().numpy()

        # Perform multiple runs with other inputs to make sure that InstanceNorm layer does not stuck
        for _ in range(2):
            dummy_inp = torch.randn(inp.shape)
            pt_model(dummy_inp)

        # Generate OpenVINO IR
        mo_pytorch.convert(pt_model, input_shape=list(inp.shape), model_name='model')

        # Run model with OpenVINO and compare outputs
        net = self.ie.read_network('model.xml', 'model.bin')
        exec_net = self.ie.load_network(net, 'CPU')
        out = exec_net.infer({'input': inp.detach().numpy()})
        out = next(iter(out.values()))

        diff = np.max(np.abs(out - ref))
        self.assertLessEqual(diff, 5e-4)
class PersonDetect:
    '''
    Class for the Person Detection Model.
    '''

    def __init__(self, model_name, device, threshold=0.60):
        self.model_weights=model_name+'.bin'
        self.model_structure=model_name+'.xml'
        self.device=device
        self.threshold=threshold
        self.exec_network=None
        self.core = IECore()

        try:
            self.core = IECore()
            self.model= self.core.read_network(model=self.model_structure, weights= self.model_weights)
        except Exception as e:
            raise ValueError("Could not Initialise the network. Have you enterred the correct model path?")

        self.input_name=next(iter(self.model.inputs))
        self.input_shape=self.model.inputs[self.input_name].shape
        self.output_name=next(iter(self.model.outputs))
        self.output_shape=self.model.outputs[self.output_name].shape
    
    # I took this code from the exercises given to us by Michael Virgo in Course 1.
    def load_model(self):
        self.core = IECore()
        self.exec_network= self.core.load_network(network=self.model, device_name=self.device)
        return self.exec_network        
    
    # I took this code from the exercises given to us by Roumaissaa Madoui
        p_frame = self.preprocess_input(image)
        self.exec_network.start_async(request_id=0, inputs={self.input_name: p_frame})
        if self.exec_network.requests[0].wait(-1) == 0:
            outputs = self.exec_network.requests[0].outputs[self.output_name]
            coords = self.preprocess_outputs(outputs)
            scaled_coords, image = self.draw_outputs(coords, image)
        
        return scaled_coords, image
    
    def predict(self, image):
        p_frame = self.preprocess_input(image)
        self.exec_network.start_async(request_id=0, inputs={self.input_name: p_frame})
        if self.exec_network.requests[0].wait(-1) == 0:
            outputs = self.exec_network.requests[0].outputs[self.output_name]
            coords = self.preprocess_outputs(outputs)
            scaled_coords, image = self.draw_outputs(coords, image)
            
        return scaled_coords, image
     
            
        
            
    
    # I took this code from the exercises given to us Michael Virgo in Course 1.
    def draw_outputs(self, coords, image):
        scaled_coords = []
        for coord in coords: # Output shape is 1x1x100x7
            xmin = int(coord[3] * image.shape[1])
            ymin = int(coord[4] * image.shape[0])
            xmax = int(coord[5] * image.shape[1])
            ymax = int(coord[6] * image.shape[0])
            
            scaled_coords.append([xmin, ymin, xmax, ymax])
            cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, 255, 0), 3)
        
        return scaled_coords, image
    
    # I took this code from the exercises given to us by Michael Virgo in Course 1.
    def preprocess_outputs(self, outputs):
        coords = []
        for box in outputs[0][0]:
            conf = box[2]
            class_id = int(box[1])
            if conf >= self.threshold and class_id == 1:
                coords.append(box)
                
        return coords
    
    # I took this code from the exercises given to us by Michael Virgo in Course 1.
    def preprocess_input(self, image):
        p_frame = cv2.resize(image, (self.input_shape[3], self.input_shape[2]))
        p_frame = p_frame.transpose((2,0,1))
        p_frame = p_frame.reshape(1, *p_frame.shape)
        
        return p_frame
def main():
    log.basicConfig(format="[ %(levelname)s ] %(message)s",
                    level=log.INFO,
                    stream=sys.stdout)
    args = build_argparser().parse_args()
    log.info("Loading Inference Engine")
    ie = IECore()

    # --------------------------- 1. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------
    model_xml = args.model
    model_name = splitext(basename(model_xml))[0]
    model_bin = model_xml[:-3] + 'bin'
    log.info("Loading network files:\n\t{}\n".format(model_xml))
    net = ie.read_network(model=model_xml, weights=model_bin)
    # -----------------------------------------------------------------------------------------------------
    tab = pd.DataFrame(data=d)
    tab.to_csv(globalize(f"{args.tab_base}_{model_name}.csv", "C:/frames/"))
    # ------------- 2. Load Plugin for inference engine and extensions library if specified --------------
    log.info("Device info:")
    device = 'CPU'
    versions = ie.get_versions(device)
    print("{}{}".format(" " * 8, device))
    print("{}MKLDNNPlugin version ......... {}.{}".format(
        " " * 8, versions[device].major, versions[device].minor))
    print("{}Build ........... {}".format(" " * 8,
                                          versions[device].build_number))

    supported_layers = ie.query_network(net, "CPU")
    not_supported_layers = [
        l for l in net.layers.keys() if l not in supported_layers
    ]
    if len(not_supported_layers) != 0:
        log.error(
            "Following layers are not supported by the plugin for specified device {}:\n {}"
            .format(device, ', '.join(not_supported_layers)))
        log.error(
            "Please try to specify cpu extensions library path in sample's command line parameters using -l "
            "or --cpu_extension command line argument")
        sys.exit(1)
    # -----------------------------------------------------------------------------------------------------

    infos = [*net.input_info]
    print("inputs number: " + str(len(infos)))
    print("input shape: " + str(net.input_info[infos[0]].input_data.shape))
    print("input key: " + infos[0])
    n, c, h, w = net.input_info[infos[0]].input_data.shape

    log.info("Preparing input blobs")

    out_blob = next(iter(net.outputs))
    input_name = infos[0]
    log.info("Batch size is {}".format(net.batch_size))
    net.input_info[infos[0]].precision = 'U8'

    log.info('Preparing output blobs')

    output_name, output_info = "", net.outputs[next(iter(net.outputs.keys()))]
    for output_key in net.outputs:
        if net.layers[output_key].type == "DetectionOutput":
            output_name, output_info = output_key, net.outputs[output_key]

    if output_name == "":
        log.error("Can't find a DetectionOutput layer in the topology")

    output_dims = output_info.shape
    if len(output_dims) != 4:
        log.error("Incorrect output dimensions for SSD model")
    max_proposal_count, object_size = output_dims[2], output_dims[3]

    if object_size != 7:
        log.error("Output item should have 7 as a last dimension")

    output_info.precision = "FP32"

    log.info("Loading model to the device")
    exec_net = ie.load_network(network=net, device_name=device)
    log.info("Creating infer request and starting inference")

    # -----------------------------------------------------------------------------------------------------

    if args.video is not None and args.input is not None:
        raise RuntimeError('Either use video or images input')
    if args.video is None and args.input is None:
        raise RuntimeError('Need an input: video or images')
    has_video = False
    if args.video is not None:
        has_video = True

    log.info('Processing and ' + ('sav' if args.save else 'show') + 'ing ' +
             ('video' if has_video else 'images'))
    if args.save:
        if has_video:
            outdir = dirname(args.video) + sep + 'results' + sep
        else:
            outdir = args.input + sep + 'results' + sep
        makedirs(outdir, exist_ok=True)

    if has_video:
        cap = cv2.VideoCapture(args.video)
        fps = cap.get(cv2.CAP_PROP_FPS)
        video_length = cap.get(cv2.CAP_PROP_FRAME_COUNT)
        ret, image = cap.read()  # first frame just to read size (...)
        size = (image.shape[1], image.shape[0])
        if args.save:
            writer = cv2.VideoWriter(outdir + basename(args.video),
                                     cv2.VideoWriter_fourcc(*'mp4v'), fps,
                                     size)
    else:
        filenames = sorted(glob.glob(args.input + '/*.jpg'), key=getmtime)

    network_ratio = w / h
    count = -1
    x1, y1, ws, hs, paths, objects, probas = [], [], [], [], [], [], []

    while True:
        count += 1
        if has_video:
            ret, image = cap.read()
            name = f'frame{count}'
            if count % 100 == 0:
                print('Progress: %.2f%%' % (100.0 * count / video_length),
                      end='\r',
                      flush=True)
                if count % 1000 == 0:
                    log.info('Progress: %.2f%%' %
                             (100.0 * count / video_length))
            if not ret:
                break
        else:
            if count == len(filenames):
                break
            name = filenames[count]
            image = cv2.imread(name)

        output = []
        ih, iw = image.shape[:-1]
        input_ratio = iw / ih
        if input_ratio < network_ratio:
            new_h = int(floor(w / input_ratio))
            new_w = w
            scale_ratio = iw / w
            off_h = int(floor((new_h - h) / 2))
            off_w = 0
        else:
            new_h = h
            new_w = int(floor(h * input_ratio))
            scale_ratio = ih / h
            off_h = 0
            off_w = int(floor((new_w - w) / 2))

        crop = cv2.resize(image, (new_w, new_h))
        crop = crop[off_h:off_h + h, off_w:off_w + w, :]
        images_hw = crop.shape[:-1]

        data = {
            input_name: crop.transpose((2, 0, 1))
        }  # Change data layout from HWC to CHW
        res = exec_net.infer(inputs=data)
        res = res[out_blob][0][0]
        for number, proposal in enumerate(res):
            if proposal[2] > 0:
                ih, iw = images_hw
                label = np.int(proposal[1])
                confidence = proposal[2]
                xmin = np.int(scale_ratio * (off_w + iw * proposal[3]))
                ymin = np.int(scale_ratio * (off_h + ih * proposal[4]))
                xmax = np.int(scale_ratio * (off_w + iw * proposal[5]))
                ymax = np.int(scale_ratio * (off_h + ih * proposal[6]))
                if confidence > args.confidence:
                    output.append((xmin, ymin, xmax, ymax, label,
                                   basename(name), confidence))
                    """if not args.save:
                        print("[{},{}] element, prob = {:.6}    ({},{})-({},{})" \
                              .format(number, label, confidence, xmin, ymin, xmax, ymax))"""

        for box in output:
            if box[5] == 1:
                cl = (255, 0, 0)
            else:
                cl = (0, 0, 255)

            x, y, width, height = box[0], box[
                1], box[2] - box[0], box[3] - box[1]
            cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), cl, 2)

            multiple_append([x1, y1, ws, hs, objects, paths, probas],
                            [x, y, width, height, box[4], box[5], box[6]])

        if args.save:
            if has_video:
                writer.write(image)
            else:
                base = basename(name)
                log.info(f'Write to {outdir + base}')
                cv2.imwrite(outdir + 'detection_' + model_name + '_' + base,
                            image)
        else:
            if args.hide:
                cv2.imshow('result', image)
                cv2.waitKey(0)

    d = {
        'file': paths,
        'object': objects,
        'x1': x1,
        'y1': y1,
        'w': ws,
        'h': hs,
        'p': probas
    }
    tab = pd.DataFrame(data=d)
    tab.to_csv(args.tab_base, index_label='#')

    # -----------------------------------------------------------------------------------------------------
    if has_video:
        cap.release()
        if args.save:
            writer.release()
    log.info("Execution successful\n")
class Model_Head_Pose_Estimation:
    '''
    Class for the Head Pose Estimation Model.
    '''
    def __init__(self, model_name, device='CPU', extensions=None):
        self.model_name = model_name
        self.device = device
        self.extensions = extensions
        self.model_structure = model_name
        self.model_weights = self.model_name.split('.')[0] + '.bin'
        self.plugin = None
        self.network = None
        self.exec_net = None
        self.input_name = None
        self.input_shape = None
        self.output_name = None
        self.output_shape = None

    def load_model(self):
        '''
        TODO: You will need to complete this method
        This method is for loading the model to the device specified by the user.
        If your model requires any Plugins, this is where you can load them.
        '''

        # Initialize the plugin
        self.plugin = IECore()
        self.network = self.plugin.read_network(model=self.model_structure,
                                                weights=self.model_weights)

        # Check for supported layers
        supported_layers = self.plugin.query_network(network=self.network,
                                                     device_name=self.device)
        unsupported_layers = [
            l for l in self.network.layers.keys() if l not in supported_layers
        ]
        if len(unsupported_layers) != 0 and self.device == 'CPU':
            print("Unsupported layers found:{}".format(unsupported_layers))
            if not self.extensions == None:
                print("Adding cpu_extension")
                self.plugin.add_extension(self.extensions, self.device)
                supported_layers = self.plugin.query_network(
                    network=self.network, device_name=self.device)
                unsupported_layers = [
                    l for l in self.network.layers.keys()
                    if l not in supported_layers
                ]
                if len(unsupported_layers) != 0:
                    print("Issue remain exists after after adding extensions")
                    exit(1)
                print("Issue resolved after adding extensions")
            else:
                print("Provide the path of cpu extension")
                exit(1)

        self.exec_net = self.plugin.load_network(network=self.network,
                                                 device_name=self.device,
                                                 num_requests=1)
        self.input_name = next(iter(self.network.inputs))
        self.input_shape = self.network.inputs[self.input_name].shape
        self.output_name = next(iter(self.network.outputs))
        self.output_shape = self.network.outputs[self.output_name].shape

    def predict(self, image):
        '''
        TODO: You will need to complete this method.
        This method is meant for running predictions on the input image.
        '''
        img_processed = self.preprocess_input(image.copy())
        outputs = self.exec_net.infer({self.input_name: img_processed})
        final = self.preprocess_output(outputs)
        return final

    def check_model(self):
        pass

    def preprocess_input(self, image):
        '''
        Before feeding the data into the model for inference,
        you might have to preprocess it. This function is where you can do that.
        '''
        img_resized = cv2.resize(image,
                                 (self.input_shape[3], self.input_shape[2]))
        img_processed = np.transpose(np.expand_dims(img_resized, axis=0),
                                     (0, 3, 1, 2))
        return img_processed

    def preprocess_output(self, outputs):
        '''
        Before feeding the output of this model to the next model,
        you might have to preprocess the output. This function is where you can do that.
        '''
        outs = []
        outs.append(outputs['angle_y_fc'].tolist()[0][0])
        outs.append(outputs['angle_p_fc'].tolist()[0][0])
        outs.append(outputs['angle_r_fc'].tolist()[0][0])
        return outs
def main():
    # arguments
    parser = ArgumentParser()

    parser.add_argument(
        "-m", "--model", help="Required. Path to an .xml file with a trained model", required=True, type=str)
    parser.add_argument(
        "-i", "--input", help="Required. Path to a input image file", required=True, type=str)
    parser.add_argument("-l", "--cpu_extension",
        help="Optional. Required for CPU custom layers. Absolute MKLDNN (CPU)-targeted custom layers. "
        "Absolute path to a shared library with the kernels implementations", type=str, default=None)
    parser.add_argument("-d", "--device",
        help="Optional. Specify the target device to infer on; CPU, GPU, FPGA, HDDL or MYRIAD is acceptable. "
        "Sample will look for a suitable plugin for device specified. Default value is CPU", default="CPU", type=str)

    args = parser.parse_args()

    # logging
    log.basicConfig(format="[ %(levelname)s ] %(message)s",
                    level=log.INFO, stream=sys.stdout)

    log.info("creating inference engine")
    ie = IECore()
    if args.cpu_extension and "CPU" in args.device:
        ie.add_extension(args.cpu_extension, "CPU")

    log.info("Loading network")
    net = ie.read_network(args.model, os.path.splitext(args.model)[0] + ".bin")

    assert len(net.input_info) == 1, "Sample supports only single input topologies"
    assert len(net.outputs) == 1, "Sample supports only single output topologies"

    log.info("preparing input blobs")
    input_blob = next(iter(net.input_info))
    out_blob = next(iter(net.outputs))
    net.batch_size = 1

    # read and pre-process input image
    _, _, height, width = net.input_info[input_blob].input_data.shape

    image = cv2.imread(args.input, cv2.IMREAD_COLOR)
    (input_height, input_width) = image.shape[:-1]

    # resize
    if (input_height, input_width) != (height, width):
        log.info("Image is resized from {} to {}".format(
            image.shape[:-1], (height, width)))
        image = cv2.resize(image, (width, height), cv2.INTER_CUBIC)

    # prepare input
    image = image.astype(np.float32)
    image = image.transpose((2, 0, 1))
    image_input = np.expand_dims(image, 0)

    # loading model to the plugin
    log.info("loading model to the plugin")
    exec_net = ie.load_network(network=net, device_name=args.device)

    # start sync inference
    log.info("starting inference")
    res = exec_net.infer(inputs={input_blob: image_input})

    # processing output blob
    log.info("processing output blob")
    disp = np.squeeze(res[out_blob][0])

    # resize disp to input resolution
    disp = cv2.resize(disp, (input_width, input_height), cv2.INTER_CUBIC)

    # rescale disp
    disp_min = disp.min()
    disp_max = disp.max()

    if disp_max - disp_min > 1e-6:
        disp = (disp - disp_min) / (disp_max - disp_min)
    else:
        disp.fill(0.5)

    # pfm
    out = 'disp.pfm'
    cv2.imwrite(out, disp)

    log.info("Disparity map was saved to {}".format(out))

    # png
    out = 'disp.png'
    plt.imsave(out, disp, vmin=0, vmax=1, cmap='inferno')

    log.info("Color-coded disparity image was saved to {}".format(out))

    log.info("This demo is an API example, for any performance measurements please use "
             "the dedicated benchmark_app tool from the openVINO toolkit\n")
Exemple #12
0
class FaceDetection:
    '''
    Class for the Face Detection Model.
    '''
    def __init__(self, model_name, device='CPU', extensions=None):
        '''
        TODO: Use this to set your instance variables.
        '''
        self.model_name = model_name
        self.device = device
        self.extensions = extensions
        self.model_structure = self.model_name # model xml file
        self.model_weights = self.model_name.split('.')[0]+'.bin' # get model binary file path just use model xml file
        ## load the IE Engine API plugin (Inference Engine entity)
        self.plugin = IECore()
        ## check if read model without problem
        self.check_model(self.model_structure, self.model_weights)        
        self.exec_net = None
        ## Get the input layer, iterate through the inputs here
        self.input_name = next(iter(self.network.inputs))
        ## Return the shape of the input layer
        self.input_shape = self.network.inputs[self.input_name].shape
        ## Get the output layer
        self.output_names = next(iter(self.network.outputs))
        ## Return the shape of the output layer
        self.output_shape = self.network.outputs[self.output_names].shape
        

## check supported layer and performence counts reference: 
# https://gist.github.com/justinshenk/9917891c0433f33967f6e8cd8fcaa49a
    def load_model(self):
        '''        
        TODO: You will need to complete this method.
        This method is for loading the model to the device specified by the user.
        If your model requires any Plugins, this is where you can load them.
        '''
        ## Queries the plugin with specified device name what network layers are supported in the current configuration.
        ## get the supported layers of the network
        supported_layers = self.plugin.query_network(network=self.network, device_name=self.device)
        ## check unsupported layer
        layers_unsupported = [ul for ul in self.network.layers.keys() if ul not in supported_layers]

        ## condition of found unsupported layer and device is CPU
        if len(layers_unsupported)!=0 and self.device=='CPU':
            print('unsupported layers found: {}'.format(layers_unsupported))
            ## extension is not None
            if self.extensions!=None:
                print("Adding cpu_extension now")
                ## Loads extension library to the plugin with a specified device name.
                self.plugin.add_extension(self.extensions, self.device)
                ## update the support and unsupported layers
                supported_layers = self.plugin.query_network(network=self.network, device_name=self.device)
                layers_unsupported = [ul for ul in self.network.layers.keys() if ul not in supported_layers]
                ## if still no unsupported layer exit
                if len(layers_unsupported)!=0:
                    print("Please try again! unsupported layers found after adding the extensions.  device {}:\n{}".format(self.device, ', '.join(layers_unsupported)))
                    print("Please try to specify cpu extensions library path in sample's command line parameters using -l "
                      "or --cpu_extension command line argument")
                    exit(1)
                print("Problem is resolved after adding the extension!")
            ## extensions is None exit    
            else:
                print("Please give the right path of cpu extension!")
                exit(1)
        ## Loads a network that was read from the Intermediate Representation (IR) to the plugin with specified device
        ## load the network into the inference engine
        self.exec_net = self.plugin.load_network(network=self.network, device_name=self.device, num_requests=1)
        

    def predict(self, image, prob_threshold, perf_flag):
        '''
        TODO: You will need to complete this method.
        This method is meant for running predictions on the input image.
        '''
        ## 1.process the image
        processed_input = self.preprocess_input(image.copy())
        ## 2.Starts synchronous inference for the first infer request of the executable network and returns output data.
        ## A dictionary that maps output layer names
        outputs = self.exec_net.infer({self.input_name:processed_input})
        # print(outputs)
        
        if perf_flag:
            self.performance()

        ## 3. process the outputs
        coords = self.preprocess_output(outputs, prob_threshold)
        ## if coords empty, return 0,0
        if (len(coords)==0):
            return 0, 0
         ## get the first detected face
        coords = coords[0]
        h, w=image.shape[0], image.shape[1]
        ## print(coords, image.shape)        

        coords = coords* np.array([w, h, w, h])
        ## Copy of the array, cast to a specified type. int32
        coords = coords.astype(np.int32)
        ## (x_min, y_min) - coordinates of the top left bounding box corner
        ## (x_max, y_max) - coordinates of the bottom right bounding box corner.
        # print('top left, bottom right', coords)
        ## ymin:ymax, xmin:xmax --> height, width
        cropped_face = image[coords[1]:coords[3], coords[0]:coords[2]]
        # print(cropped_face.shape)

        # cv2.rectangle(image, (coords[0], coords[1]), (coords[2], coords[3]), (255,0,0), 2) 
        # cv2.imshow('detected face', cv2.resize(image, (600, 500)))

        return cropped_face, coords


    def check_model(self, model_structure, model_weights):
        # raise NotImplementedError
        try:
            # Reads a network from the IR files and creates an IENetwork, load IR files into their related class, architecture with XML and weights with binary file
            self.network = self.plugin.read_network(model=model_structure, weights=model_weights)
        except Exception as e:
            raise ValueError("Error occurred during face_detection network initialization.")


## check supported layer and performence counts reference: 
# https://gist.github.com/justinshenk/9917891c0433f33967f6e8cd8fcaa49a
    def performance(self):
        perf_counts = self.exec_net.requests[0].get_perf_counts()
        # print('\n', perf_counts)
        print("## Face detection model performance:")
        print("{:<70} {:<15} {:<15} {:<15} {:<10}".format('name', 'layer_type', 'exet_type', 'status', 'real_time, us'))

        for layer, stats in perf_counts.items():            
            print("{:<70} {:<15} {:<15} {:<15} {:<10}".format(layer, stats['layer_type'], stats['exec_type'], 
                                                              stats['status'], stats['real_time']))

    def preprocess_input(self, image):
        '''
        Before feeding the data into the model for inference,
        you might have to preprocess it. This function is where you can do that.
        Given an input image, height and width:
        '''
        ## - Resize to height and width, (H, W), but resize use W, H which is opposite order
        # print(image.shape)
        # print(self.input_shape) # [1, 3, 384, 672]
        H, W = self.input_shape[2], self.input_shape[3]
        # print(H, W) # (384, 672)

        image_resized = cv2.resize(image, (W, H))
        # print(image_resized.shape) # (384, 672, 3)
        ## - Transpose the final "channel" dimension to be first to BGR
        ## - Reshape the image to add a "batch" of 1 at the start
        ## (optional)
        # image_processed = np.transpose(np.expand_dims(image_resized, axis=0), (0,3,1,2))
        ## BxCxHxW
        image = image_resized.transpose((2,0,1))
        # print(image.shape) # (3, 384, 672)
        ## add 1 dim at very start, then channels then H, W
        image_processed = image.reshape(1, 3, self.input_shape[2], self.input_shape[3])
        # print(image_processed.shape) # (1, 3, 384, 672)

        return image_processed


    def preprocess_output(self, outputs, prob_threshold):
        '''
        Before feeding the output of this model to the next model,
        you might have to preprocess the output. This function is where you can do that.
        '''

        coords = []
        # print(self.input_name)
        # print(self.output_names)
        # print(outputs[self.output_names].shape) # (1, 1, 200, 7)
        # print(outputs[self.output_names][0][0])
        outs = outputs[self.output_names][0][0] # output 
        for out in outs:
            # print(out)
            confidence = out[2]
            if confidence > prob_threshold:
                x_min=out[3]
                y_min=out[4]
                x_max=out[5]
                y_max=out[6]
                coords.append([x_min, y_min, x_max, y_max])
        return coords
Exemple #13
0
class Base(ABC):
    """Model Base Class"""
    def __init__(
        self,
        model_name,
        source_width=None,
        source_height=None,
        device="CPU",
        threshold=0.60,
        extensions=None,
    ):
        self.model_weights = f"{model_name}.bin"
        self.model_structure = f"{model_name}.xml"
        assert (Path(self.model_weights).absolute().exists()
                and Path(self.model_structure).absolute().exists())

        self.device = device
        self.threshold = threshold
        self._model_size = os.stat(self.model_weights).st_size / 1024.0**2

        self._ie_core = IECore()
        self.model = self._get_model()

        # Get the input layer
        self.input_name = next(iter(self.model.inputs))
        self.input_shape = self.model.inputs[self.input_name].shape
        self.output_name = next(iter(self.model.outputs))
        self.output_shape = self.model.outputs[self.output_name].shape
        self._init_image_w = source_width
        self._init_image_h = source_height
        self.exec_network = None
        self.perf_stats = {}
        self.load_model()

    def _get_model(self):
        """Helper function for reading the network."""
        try:
            try:
                model = self._ie_core.read_network(model=self.model_structure,
                                                   weights=self.model_weights)
            except AttributeError:
                logger.warn(
                    "Using an old version of OpenVINO, consider updating it!")
                model = IENetwork(model=self.model_structure,
                                  weights=self.model_weights)
        except Exception:
            raise ValueError("Could not Initialise the network. "
                             "Have you entered the correct model path?")
        else:
            return model

    def load_model(self):
        """Load the model into the plugin"""
        if self.exec_network is None:
            start_time = time.time()
            self.exec_network = self._ie_core.load_network(
                network=self.model, device_name=self.device)
            self._model_load_time = (time.time() - start_time) * 1000
            logger.info(
                f"Model: {self.model_structure} took {self._model_load_time:.3f} ms to load."
            )

    def predict(self, image, request_id=0, show_bbox=False, **kwargs):
        if not isinstance(image, np.ndarray):
            raise IOError("Image not parsed correctly.")

        p_image = self.preprocess_input(image, **kwargs)
        predict_start_time = time.time()
        self.exec_network.start_async(request_id=request_id,
                                      inputs={self.input_name: p_image})
        status = self.exec_network.requests[request_id].wait(-1)
        if status == 0:
            pred_result = []
            for output_name, data_ptr in self.model.outputs.items():
                pred_result.append(self.exec_network.requests[request_id].
                                   outputs[output_name])
            self.perf_stats[output_name] = self.exec_network.requests[
                request_id].get_perf_counts()
            predict_end_time = float(time.time() - predict_start_time) * 1000
            bbox, _ = self.preprocess_output(pred_result,
                                             image,
                                             show_bbox=show_bbox)
            return (predict_end_time, bbox)

    @abstractmethod
    def preprocess_output(self,
                          inference_results,
                          image,
                          show_bbox=False,
                          **kwargs):
        """Draw bounding boxes onto the frame."""
        raise NotImplementedError("Please Implement this method")

    @staticmethod
    @abstractmethod
    def draw_output(image):
        raise NotImplementedError("Please Implement this method")

    @staticmethod
    def plot_frame(image):
        """Helper function for finding image coordinates/px"""
        img = image[:, :, 0]
        plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        plt.show()

    def add_text(self,
                 text,
                 image,
                 position,
                 font_size=0.75,
                 color=(255, 255, 255)):
        cv2.putText(
            image,
            text,
            position,
            cv2.FONT_HERSHEY_COMPLEX,
            font_size,
            color,
            1,
        )

    def preprocess_input(self, image, height=None, width=None):
        """Helper function for processing frame"""
        if (height and width) is None:
            height, width = self.input_shape[2:]
        p_frame = cv2.resize(image, (width, height))
        # Change data layout from HWC to CHW
        p_frame = p_frame.transpose((2, 0, 1))
        p_frame = p_frame.reshape(1, *p_frame.shape)
        return p_frame
Exemple #14
0
class Model_Pose:
    '''
    Class for the Face Detection Model.
    '''
    def __init__(self,
                 model_name,
                 device='CPU',
                 extensions=None,
                 threshold=0.6):
        '''
        TODO: Use this to set your instance variables.
        '''
        self.device = device
        self.threshold = threshold
        self.core = IECore()
        self.network = self.core.read_network(
            model=str(model_name),
            weights=str(os.path.splitext(model_name)[0] + ".bin"))

        self.input = next(iter(self.network.inputs))
        self.output = next(iter(self.network.outputs))

    def load_model(self):
        '''
        TODO: You will need to complete this method.
        This method is for loading the model to the device specified by the user.
        If your model requires any Plugins, this is where you can load them.
        '''
        self.exec_network = self.core.load_network(self.network, self.device)
        return self.exec_network

    def predict(self, image):
        '''
        TODO: You will need to complete this method.
        This method is meant for running predictions on the input image.
        '''
        self.preprocess_image = self.preprocess_input(image)
        self.results = self.exec_network.infer(
            inputs={self.input: self.preprocess_image})
        self.output_list = self.preprocess_output(self.results)
        return self.output_list

    def check_model(self):
        supported_layers = self.core.query_network(network=self.network,
                                                   device_name=self.device)
        unsupported_layers = [
            layer for layer in self.network.layers.keys()
            if layer not in supported_layers
        ]
        if len(unsupported_layers) > 0:
            print("Check extention of these unsupported layers =>" +
                  str(unsupported_layers))
            exit(1)
        print("All layers are supported")

    def preprocess_input(self, image):
        '''
        Before feeding the data into the model for inference,
        you might have to preprocess it. This function is where you can do that.
        '''
        image = image.astype(np.float32)
        net_input_shape = self.network.inputs[self.input].shape
        p_frame = cv2.resize(image, (net_input_shape[3], net_input_shape[2]))
        p_frame = p_frame.transpose(2, 0, 1)
        p_frame = p_frame.reshape(1, *p_frame.shape)
        return p_frame

    def preprocess_output(self, outputs):
        '''
        Before feeding the output of this model to the next model,
        you might have to preprocess the output. This function is where you can do that.
        '''
        yaw = outputs["angle_y_fc"][0, 0]
        pitch = outputs["angle_p_fc"][0, 0]
        roll = outputs["angle_r_fc"][0, 0]
        return [yaw, pitch, roll]
def test_query_network(device):
    ie = IECore()
    net = ie.read_network(model=test_net_xml, weights=test_net_bin)
    query_res = ie.query_network(net, device)
    assert net.layers.keys() == query_res.keys(), "Not all network layers present in query_network results"
    assert next(iter(set(query_res.values()))) == device, "Wrong device for some layers"
class Model_Facial_Landmarks_Detection:
    '''
    Class for the Facial Landmark Detection Model.
    '''
    def __init__(self, model_name, device='CPU', extensions=None):
        self.model_name = model_name
        self.device = device
        self.extensions = extensions
        self.model_structure = model_name
        self.model_weights = self.model_name.split('.')[0] + '.bin'
        self.plugin = None
        self.network = None
        self.exec_net = None
        self.input_name = None
        self.input_shape = None
        self.output_name = None
        self.output_shape = None

    def load_model(self):
        '''
        TODO: You will need to complete this method
        This method is for loading the model to the device specified by the user.
        If your model requires any Plugins, this is where you can load them.
        '''

        # Initialize the plugin
        self.plugin = IECore()
        self.network = self.plugin.read_network(model=self.model_structure,
                                                weights=self.model_weights)

        # Check for supported layers
        supported_layers = self.plugin.query_network(network=self.network,
                                                     device_name=self.device)
        unsupported_layers = [
            l for l in self.network.layers.keys() if l not in supported_layers
        ]
        if len(unsupported_layers) != 0 and self.device == 'CPU':
            print("Unsupported layers found:{}".format(unsupported_layers))
            if not self.extensions == None:
                print("Adding cpu_extension")
                self.plugin.add_extension(self.extensions, self.device)
                supported_layers = self.plugin.query_network(
                    network=self.network, device_name=self.device)
                unsupported_layers = [
                    l for l in self.network.layers.keys()
                    if l not in supported_layers
                ]
                if len(unsupported_layers) != 0:
                    print("Issue remain exists after after adding extensions")
                    exit(1)
                print("Issue resolved after adding extensions")
            else:
                print("Provide the path of cpu extension")
                exit(1)

        self.exec_net = self.plugin.load_network(network=self.network,
                                                 device_name=self.device,
                                                 num_requests=1)
        self.input_name = next(iter(self.network.inputs))
        self.input_shape = self.network.inputs[self.input_name].shape
        self.output_name = next(iter(self.network.outputs))
        self.output_shape = self.network.outputs[self.output_name].shape

    def predict(self, image):
        '''
        TODO: You will need to complete this method.
        This method is meant for running predictions on the input image.
        '''
        img_processed = self.preprocess_input(image.copy())
        outputs = self.exec_net.infer({self.input_name: img_processed})
        coords = self.preprocess_output(outputs)

        h = image.shape[0]
        w = image.shape[1]

        coords = coords * np.array([w, h, w, h])
        coords = coords.astype(np.int32)

        le_xmin = coords[0] - 10
        le_ymin = coords[1] - 10
        le_xmax = coords[0] + 10
        le_ymax = coords[1] + 10

        re_xmin = coords[2] - 10
        re_ymin = coords[3] - 10
        re_xmax = coords[2] + 10
        re_ymax = coords[3] + 10

        left_eye = image[le_ymin:le_ymax, le_xmin:le_xmax]
        right_eye = image[re_ymin:re_ymax, re_xmin:re_xmax]

        eye_coords = [[le_xmin, le_ymin, le_xmax, le_ymax],
                      [re_xmin, re_ymin, re_xmax, re_ymax]]
        return left_eye, right_eye, eye_coords

    def check_model(self):
        pass

    def preprocess_input(self, image):
        '''
        Before feeding the data into the model for inference,
        you might have to preprocess it. This function is where you can do that.
        '''
        img_cvt = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        img_resized = cv2.resize(img_cvt,
                                 (self.input_shape[3], self.input_shape[2]))
        img_processed = np.transpose(np.expand_dims(img_resized, axis=0),
                                     (0, 3, 1, 2))
        return img_processed

    def preprocess_output(self, outputs):
        '''
        Before feeding the output of this model to the next model,
        you might have to preprocess the output. This function is where you can do that.
        '''
        outs = outputs[self.output_name][0]
        lefteye_x = outs[0].tolist()[0][0]
        lefteye_y = outs[1].tolist()[0][0]
        righteye_x = outs[2].tolist()[0][0]
        righteye_y = outs[3].tolist()[0][0]

        return (lefteye_x, lefteye_y, righteye_x, righteye_y)
def test_register_plugin():
    ie = IECore()
    ie.register_plugin("MKLDNNPlugin", "BLA")
    net = ie.read_network(model=test_net_xml, weights=test_net_bin)
    exec_net = ie.load_network(net, "BLA")
    assert isinstance(exec_net, ExecutableNetwork), "Cannot load the network to the registered plugin with name 'BLA'"
def test_read_network_from_xml():
    ie = IECore()
    net = ie.read_network(model=test_net_xml, weights=test_net_bin)
    assert isinstance(net, IENetwork)
Exemple #19
0
def main():
    args = build_argparser()

    logging.basicConfig(format="[ %(levelname)s ] %(message)s",
                        level=logging.INFO,
                        stream=sys.stdout)
    log = logging.getLogger()

    log.info("Creating Inference Engine")
    ie = IECore()

    if args.device == "CPU" and args.cpu_extension:
        ie.add_extension(args.cpu_extension, 'CPU')

    log.info("Loading model {}".format(args.model))
    net = ie.read_network(args.model, args.model[:-4] + ".bin")

    if len(net.input_info) != 1:
        log.error("Demo supports only models with 1 input layer")
        sys.exit(1)
    input_blob = next(iter(net.input_info))
    input_shape = net.input_info[input_blob].input_data.shape
    if len(net.outputs) != 1:
        log.error("Demo supports only models with 1 output layer")
        sys.exit(1)
    output_blob = next(iter(net.outputs))

    log.info("Loading model to the plugin")
    exec_net = ie.load_network(network=net, device_name=args.device)

    log.info("Preparing input")

    labels = []
    if args.labels:
        with open(args.labels, "r") as file:
            labels = [line.rstrip() for line in file.readlines()]

    batch_size, channels, one, length = input_shape
    if one != 1:
        raise RuntimeError(
            "Wrong third dimension size of model input shape - {} (expected 1)"
            .format(one))

    audio = AudioSource(args.input,
                        channels=channels,
                        samplerate=args.sample_rate)

    hop = length - args.overlap if isinstance(args.overlap, int) else int(
        length * (1.0 - args.overlap))
    if hop < 0:
        log.error(
            "Wrong value for '-ol/--overlap' argument - overlapping more than clip length"
        )
        sys.exit(1)

    log.info("Starting inference")
    outputs = []
    clips = 0
    infer_time = 0
    for idx, chunk in enumerate(
            audio.chunks(length, hop, num_chunks=batch_size)):
        chunk.shape = input_shape
        infer_start_time = time.perf_counter()
        output = exec_net.infer(inputs={input_blob: chunk})
        infer_time += time.perf_counter() - infer_start_time
        clips += batch_size
        output = output[output_blob]
        for batch, data in enumerate(output):
            start_time = (idx * batch_size + batch) * hop / audio.samplerate
            end_time = (
                (idx * batch_size + batch) * hop + length) / audio.samplerate
            outputs.append(data)
            label = np.argmax(data)
            if start_time < audio.duration():
                log.info("[{:.2f}-{:.2f}] - {:6.2%} {:s}".format(
                    start_time, end_time, data[label],
                    labels[label] if labels else "Class {}".format(label)))

    logging.info("Average infer time - {:.1f} ms per clip".format(
        infer_time / clips * 1000))
def test_read_network_as_path():
    ie = IECore()
    net = ie.read_network(model=Path(model_path()[0]), weights=Path(test_net_bin))
    assert isinstance(net, IENetwork)
Exemple #21
0
class Face_Detect_Model:
    '''
    Class for the Face Detection Model.
    '''
    def __init__(self, model_name, device='CPU', threshold=0.5):
        '''
        Intialize instance variables and load the model with the supplied CL arguments.
        '''

        self.model_weights = model_name + '.bin'
        self.model_structure = model_name + '.xml'
        self.device = device
        self.threshold = threshold

        try:
            self.core = IECore()
            self.model = self.core.read_network(model=self.model_structure,
                                                weights=self.model_weights)
        except Exception as e:
            raise ValueError("Could not Initialise the network.")

        self.input_name = next(iter(self.model.input_info))
        self.input_shape = self.model.input_info[
            self.input_name].input_data.shape
        self.output_name = next(iter(self.model.outputs))
        self.output_shape = self.model.outputs[self.output_name].shape

        # Load the model
        self.load_model()

    def load_model(self):
        '''
        Load the already read model with the specified device type.
        '''

        self.net = self.core.load_network(network=self.model,
                                          device_name=self.device,
                                          num_requests=1)

    def predict(self, image):
        '''
        Modularize the whole process input / make inference / process output cycle.
        '''

        # Preprocess the input, run the net, and return the face coordinates
        proc_img = self.preprocess_input(image)
        input_dict = {self.input_name: proc_img}
        out = self.net.infer(input_dict)[self.output_name]
        x_min, y_min, x_max, y_max = self.preprocess_output(out, image.shape)

        return x_min, y_min, x_max, y_max

    def preprocess_input(self, image):
        '''
        Preprocess network input so that we can run the network correctly.
        '''

        # Remember  that the resize function takes the width first
        proc_frame = cv2.resize(image,
                                (self.input_shape[3], self.input_shape[2]))
        proc_frame = np.transpose(proc_frame, (2, 0, 1))
        proc_frame = proc_frame[np.newaxis, :]

        return proc_frame

    def preprocess_output(self, outputs, orig_input_shape):
        '''
        Get detection with biggest confidence and output its bounding box in the oiriginal image coordinates space.
        if its confidence its bigger than the user specified CLI threshold.
        '''

        # Get width and height of original image
        orig_height = orig_input_shape[0]
        orig_width = orig_input_shape[1]

        # Get output with biggest confidence
        best_detection_id = np.argmax(outputs[0, 0, :, 2])
        best_detection = outputs[0, 0, best_detection_id]

        # If detection is over our confidence threshold
        if best_detection[2] > self.threshold:
            x_min, y_min, x_max, y_max = best_detection[3:]

            # Transform detection coordinates to the original image input space
            x_min = int(x_min * orig_width)
            x_max = int(x_max * orig_width)
            y_min = int(y_min * orig_height)
            y_max = int(y_max * orig_height)

            return x_min, y_min, x_max, y_max

        else:
            return None, None, None, None
def test_incorrect_xml():
    ie = IECore()
    with pytest.raises(Exception) as e:
        ie.read_network(model="./model.xml", weights=Path(test_net_bin))
    assert "Path to the model ./model.xml doesn't exist or it's a directory" in str(e.value)
Exemple #23
0
def layer_out_data():
    ie = IECore()
    net = ie.read_network(model=test_net_xml, weights=test_net_bin)
    return net.layers['19/Fused_Add_'].out_data[0]
def test_incorrect_bin():
    ie = IECore()
    with pytest.raises(Exception) as e:
        ie.read_network(model=test_net_xml, weights="./model.bin")
    assert "Path to the weights ./model.bin doesn't exist or it's a directory" in str(e.value)
Exemple #25
0
def main():
    log.basicConfig(format="[ %(levelname)s ] %(message)s",
                    level=log.INFO,
                    stream=sys.stdout)
    args = build_argparser().parse_args()
    log.info("Loading Inference Engine")
    ie = IECore()
    # --------------------------- 1. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------
    model_xml = args.model
    model_bin = os.path.splitext(model_xml)[0] + ".bin"
    log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin))
    net = ie.read_network(model=model_xml, weights=model_bin)
    # -----------------------------------------------------------------------------------------------------

    # ------------- 2. Load Plugin for inference engine and extensions library if specified --------------
    log.info("Device info:")
    versions = ie.get_versions(args.device)
    print("{}{}".format(" " * 8, args.device))
    print("{}MKLDNNPlugin version ......... {}.{}".format(
        " " * 8, versions[args.device].major, versions[args.device].minor))
    print("{}Build ........... {}".format(" " * 8,
                                          versions[args.device].build_number))

    if args.cpu_extension and "CPU" in args.device:
        ie.add_extension(args.cpu_extension, "CPU")
        log.info("CPU extension loaded: {}".format(args.cpu_extension))

    if "CPU" in args.device:
        supported_layers = ie.query_network(net, "CPU")
        not_supported_layers = [
            l for l in net.layers.keys() if l not in supported_layers
        ]
        if len(not_supported_layers) != 0:
            log.error(
                "Following layers are not supported by the plugin for specified device {}:\n {}"
                .format(args.device, ', '.join(not_supported_layers)))
            log.error(
                "Please try to specify cpu extensions library path in sample's command line parameters using -l "
                "or --cpu_extension command line argument")
            sys.exit(1)
    # -----------------------------------------------------------------------------------------------------

    # --------------------------- 3. Read and preprocess input --------------------------------------------

    print("inputs number: " + str(len(net.input_info.keys())))

    for input_key in net.input_info:
        print("input shape: " +
              str(net.input_info[input_key].input_data.shape))
        print("input key: " + input_key)
        if len(net.input_info[input_key].input_data.layout) == 4:
            n, c, h, w = net.input_info[input_key].input_data.shape

    images = np.ndarray(shape=(n, c, h, w))
    images_hw = []
    for i in range(n):
        image = cv2.imread(args.input[i])
        ih, iw = image.shape[:-1]
        images_hw.append((ih, iw))
        log.info("File was added: ")
        log.info("        {}".format(args.input[i]))
        if (ih, iw) != (h, w):
            log.warning("Image {} is resized from {} to {}".format(
                args.input[i], image.shape[:-1], (h, w)))
            image = cv2.resize(image, (w, h))
        image = image.transpose(
            (2, 0, 1))  # Change data layout from HWC to CHW
        images[i] = image

    # -----------------------------------------------------------------------------------------------------

    # --------------------------- 4. Configure input & output ---------------------------------------------
    # --------------------------- Prepare input blobs -----------------------------------------------------
    log.info("Preparing input blobs")
    assert (len(net.input_info.keys()) == 1 or len(net.input_info.keys())
            == 2), "Sample supports topologies only with 1 or 2 inputs"
    out_blob = next(iter(net.outputs))
    input_name, input_info_name = "", ""

    for input_key in net.input_info:
        if len(net.input_info[input_key].layout) == 4:
            input_name = input_key
            log.info("Batch size is {}".format(net.batch_size))
            net.input_info[input_key].precision = 'U8'
        elif len(net.input_info[input_key].layout) == 2:
            input_info_name = input_key
            net.input_info[input_key].precision = 'FP32'
            if net.input_info[input_key].input_data.shape[1] != 3 and net.input_info[input_key].input_data.shape[1] != 6 or \
                net.input_info[input_key].input_data.shape[0] != 1:
                log.error(
                    'Invalid input info. Should be 3 or 6 values length.')

    data = {}
    data[input_name] = images

    if input_info_name != "":
        infos = np.ndarray(shape=(n, c), dtype=float)
        for i in range(n):
            infos[i, 0] = h
            infos[i, 1] = w
            infos[i, 2] = 1.0
        data[input_info_name] = infos

    # --------------------------- Prepare output blobs ----------------------------------------------------
    log.info('Preparing output blobs')

    output_name, output_info = "", net.outputs[next(iter(net.outputs.keys()))]
    for output_key in net.outputs:
        if net.layers[output_key].type == "DetectionOutput":
            output_name, output_info = output_key, net.outputs[output_key]

    if output_name == "":
        log.error("Can't find a DetectionOutput layer in the topology")

    output_dims = output_info.shape
    if len(output_dims) != 4:
        log.error("Incorrect output dimensions for SSD model")
    max_proposal_count, object_size = output_dims[2], output_dims[3]

    if object_size != 7:
        log.error("Output item should have 7 as a last dimension")

    output_info.precision = "FP32"
    # -----------------------------------------------------------------------------------------------------

    # --------------------------- Performing inference ----------------------------------------------------
    log.info("Loading model to the device")
    exec_net = ie.load_network(network=net, device_name=args.device)
    log.info("Creating infer request and starting inference")
    res = exec_net.infer(inputs=data)
    # -----------------------------------------------------------------------------------------------------

    # --------------------------- Read and postprocess output ---------------------------------------------
    log.info("Processing output blobs")
    res = res[out_blob]
    boxes, classes = {}, {}
    data = res[0][0]
    for number, proposal in enumerate(data):
        if proposal[2] > 0:
            imid = np.int(proposal[0])
            ih, iw = images_hw[imid]
            label = np.int(proposal[1])
            confidence = proposal[2]
            xmin = np.int(iw * proposal[3])
            ymin = np.int(ih * proposal[4])
            xmax = np.int(iw * proposal[5])
            ymax = np.int(ih * proposal[6])
            print("[{},{}] element, prob = {:.6}    ({},{})-({},{}) batch id : {}" \
                  .format(number, label, confidence, xmin, ymin, xmax, ymax, imid), end="")
            if proposal[2] > 0.5:
                print(" WILL BE PRINTED!")
                if not imid in boxes.keys():
                    boxes[imid] = []
                boxes[imid].append([xmin, ymin, xmax, ymax])
                if not imid in classes.keys():
                    classes[imid] = []
                classes[imid].append(label)
            else:
                print()

    for imid in classes:
        tmp_image = cv2.imread(args.input[imid])
        for box in boxes[imid]:
            cv2.rectangle(tmp_image, (box[0], box[1]), (box[2], box[3]),
                          (232, 35, 244), 2)
        cv2.imwrite("out.bmp", tmp_image)
        log.info("Image out.bmp created!")
    # -----------------------------------------------------------------------------------------------------

    log.info("Execution successful\n")
    log.info(
        "This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool"
    )
def test_load_network(device):
    ie = IECore()
    net = ie.read_network(model=test_net_xml, weights=test_net_bin)
    exec_net = ie.load_network(net, device)
    assert isinstance(exec_net, ExecutableNetwork)
def main():
    log.basicConfig(format='[ %(levelname)s ] %(message)s', level=log.INFO, stream=sys.stdout)
    args = build_argparser().parse_args()

    # Plugin initialization for specified device and load extensions library if specified.
    log.info('Creating Inference Engine...')
    ie = IECore()
    if args.cpu_extension and 'CPU' in args.device:
        ie.add_extension(args.cpu_extension, 'CPU')
    # Read IR
    log.info('Loading network')
    net = ie.read_network(args.model, os.path.splitext(args.model)[0] + '.bin')

    required_input_keys = {'im_data', 'im_info'}
    assert required_input_keys == set(net.input_info), \
        'Demo supports only topologies with the following input keys: {}'.format(', '.join(required_input_keys))
    required_output_keys = {'boxes', 'scores', 'classes', 'raw_masks'}
    assert required_output_keys.issubset(net.outputs.keys()), \
        'Demo supports only topologies with the following output keys: {}'.format(', '.join(required_output_keys))

    n, c, h, w = net.input_info['im_data'].input_data.shape
    assert n == 1, 'Only batch 1 is supported by the demo application'

    log.info('Loading IR to the plugin...')
    exec_net = ie.load_network(network=net, device_name=args.device, num_requests=2)

    try:
        input_source = int(args.input_source)
    except ValueError:
        input_source = args.input_source
    cap = cv2.VideoCapture(input_source)
    if not cap.isOpened():
        log.error('Failed to open "{}"'.format(args.input_source))
    cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)

    if args.no_track:
        tracker = None
    else:
        tracker = StaticIOUTracker()

    with open(args.labels, 'rt') as labels_file:
        class_labels = labels_file.read().splitlines()

    presenter = monitors.Presenter(args.utilization_monitors, 45,
        (round(cap.get(cv2.CAP_PROP_FRAME_WIDTH) / 4), round(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) / 8)))
    visualizer = Visualizer(class_labels, show_boxes=args.show_boxes, show_scores=args.show_scores)

    render_time = 0

    log.info('Starting inference...')
    print("To close the application, press 'CTRL+C' here or switch to the output window and press ESC key")
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        if args.no_keep_aspect_ratio:
            # Resize the image to a target size.
            scale_x = w / frame.shape[1]
            scale_y = h / frame.shape[0]
            input_image = cv2.resize(frame, (w, h))
        else:
            # Resize the image to keep the same aspect ratio and to fit it to a window of a target size.
            scale_x = scale_y = min(h / frame.shape[0], w / frame.shape[1])
            input_image = cv2.resize(frame, None, fx=scale_x, fy=scale_y)

        input_image_size = input_image.shape[:2]
        input_image = np.pad(input_image, ((0, h - input_image_size[0]),
                                           (0, w - input_image_size[1]),
                                           (0, 0)),
                             mode='constant', constant_values=0)
        # Change data layout from HWC to CHW.
        input_image = input_image.transpose((2, 0, 1))
        input_image = input_image.reshape((n, c, h, w)).astype(np.float32)
        input_image_info = np.asarray([[input_image_size[0], input_image_size[1], 1]], dtype=np.float32)

        # Run the net.
        inf_start = time.time()
        outputs = exec_net.infer({'im_data': input_image, 'im_info': input_image_info})
        inf_end = time.time()
        det_time = inf_end - inf_start

        # Parse detection results of the current request
        boxes = outputs['boxes']
        boxes[:, 0::2] /= scale_x
        boxes[:, 1::2] /= scale_y
        scores = outputs['scores']
        classes = outputs['classes'].astype(np.uint32)
        masks = []
        for box, cls, raw_mask in zip(boxes, classes, outputs['raw_masks']):
            raw_cls_mask = raw_mask[cls, ...]
            mask = segm_postprocess(box, raw_cls_mask, frame.shape[0], frame.shape[1])
            masks.append(mask)

        # Filter out detections with low confidence.
        detections_filter = scores > args.prob_threshold
        scores = scores[detections_filter]
        classes = classes[detections_filter]
        boxes = boxes[detections_filter]
        masks = list(segm for segm, is_valid in zip(masks, detections_filter) if is_valid)

        render_start = time.time()

        if len(boxes) and args.raw_output_message:
            log.info('Detected boxes:')
            log.info('  Class ID | Confidence |     XMIN |     YMIN |     XMAX |     YMAX ')
            for box, cls, score, mask in zip(boxes, classes, scores, masks):
                log.info('{:>10} | {:>10f} | {:>8.2f} | {:>8.2f} | {:>8.2f} | {:>8.2f} '.format(cls, score, *box))

        # Get instance track IDs.
        masks_tracks_ids = None
        if tracker is not None:
            masks_tracks_ids = tracker(masks, classes)

        # Visualize masks.
        frame = visualizer(frame, boxes, classes, scores, presenter, masks, masks_tracks_ids)

        # Draw performance stats.
        inf_time_message = 'Inference time: {:.3f} ms'.format(det_time * 1000)
        render_time_message = 'OpenCV rendering time: {:.3f} ms'.format(render_time * 1000)
        cv2.putText(frame, inf_time_message, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)
        cv2.putText(frame, render_time_message, (15, 30), cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1)

        # Print performance counters.
        if args.perf_counts:
            perf_counts = exec_net.requests[0].get_perf_counts()
            log.info('Performance counters:')
            print('{:<70} {:<15} {:<15} {:<15} {:<10}'.format('name', 'layer_type', 'exet_type', 'status',
                                                              'real_time, us'))
            for layer, stats in perf_counts.items():
                print('{:<70} {:<15} {:<15} {:<15} {:<10}'.format(layer, stats['layer_type'], stats['exec_type'],
                                                                  stats['status'], stats['real_time']))

        if not args.no_show:
            # Show resulting image.
            cv2.imshow('Results', frame)
        render_end = time.time()
        render_time = render_end - render_start

        if not args.no_show:
            key = cv2.waitKey(args.delay)
            esc_code = 27
            if key == esc_code:
                break
            presenter.handleKey(key)

    print(presenter.reportMeans())
    cv2.destroyAllWindows()
    cap.release()
def test_load_network_wrong_device():
    ie = IECore()
    net = ie.read_network(model=test_net_xml, weights=test_net_bin)
    with pytest.raises(RuntimeError) as e:
        ie.load_network(net, "BLA")
    assert 'Device with "BLA" name is not registered in the InferenceEngine' in str(e.value)
class PersonDetect:
    '''
    Class for the Person Detection Model.
    '''
    def __init__(self, model_name, device, threshold=0.60):
        self.model_weights = model_name + '.bin'
        self.model_structure = model_name + '.xml'
        self.device = device
        self.threshold = threshold

        try:
            self.core = IECore()
            self.model = self.core.read_network(model=self.model_structure,
                                                weights=self.model_weights)
        except Exception as e:
            raise ValueError(
                "Could not Initialise the network. Have you enterred the correct model path?"
            )

        self.input_name = next(iter(self.model.inputs))
        self.input_shape = self.model.inputs[self.input_name].shape
        self.output_name = next(iter(self.model.outputs))
        self.output_shape = self.model.outputs[self.output_name].shape

    def load_model(self):
        try:
            self.net = self.core.load_network(network=self.model,
                                              device_name=self.device,
                                              num_requests=1)

        except Exception as e:
            raise NotImplementedError

    def predict(self, image):
        p_frame = self.preprocess_input(image)
        outputs = self.net.infer({self.input_name: p_frame})
        coords = self.preprocess_outputs(outputs[self.output_name])
        self.draw_outputs(coords, image)
        return coords, image

    def draw_outputs(self, coords, image):
        for coord in coords:
            cv2.rectangle(image, (coord[0], coord[1]), (coord[2], coord[3]),
                          (0, 255, 0), 1)

    def preprocess_outputs(self, outputs):
        coords = []
        for box in outputs[0][0]:  # output.shape: 1x1xNx7
            conf = box[2]
            if conf >= self.threshold:
                xmin = int(box[3] * self.w)
                ymin = int(box[4] * self.h)
                xmax = int(box[5] * self.w)
                ymax = int(box[6] * self.h)
                coords.append((xmin, ymin, xmax, ymax))
        return coords

    def preprocess_input(self, image):
        p_frame = cv2.resize(image, (self.input_shape[3], self.input_shape[2]))
        p_frame = p_frame.transpose((2, 0, 1))
        p_frame = p_frame.reshape(1, *p_frame.shape)
        return p_frame

    def set_out_size(self, w, h):
        self.w = w
        self.h = h
Exemple #30
0
    print("\nDetect initing...")
    print('=' * 30)

    # load network
    if args.tiny:
        print('model: v4tiny')
        model_xml = './IR_FP16/yolov4-tiny.xml'
        model_bin = './IR_FP16/yolov4-tiny.bin'
    else:
        print('model: v4')
        model_xml = './IR_FP16/yolov4.xml'
        model_bin = './IR_FP16/yolov4.bin'

    ie = IECore()
    net = ie.read_network(model=model_xml, weights=model_bin)

    print("inputs number: " + str(len(net.input_info.keys())))
    for input_key in net.input_info:
        print("input shape: " +
              str(net.input_info[input_key].input_data.shape))
        if len(net.input_info[input_key].input_data.layout) == 4:
            n, c, h, w = net.input_info[input_key].input_data.shape
    print('=' * 30)

    # build net
    print("Loading model to the device...")
    exec_net = ie.load_network(
        network=net, device_name='MYRIAD' if args.device == 'VPU' else 'CPU')
    print("Creating infer request and starting inference...")