def main():
    log.basicConfig(format='[ %(levelname)s ] %(message)s',
                    level=log.INFO,
                    stream=sys.stdout)
    args = build_argparser().parse_args()

    # Plugin initialization for specified device and load extensions library if specified.
    log.info('Creating Inference Engine...')
    ie = IECore()
    if args.cpu_extension and 'CPU' in args.device:
        ie.add_extension(args.cpu_extension, 'CPU')
    # Read IR
    log.info('Loading Mask-RCNN network')
    mask_rcnn_net = ie.read_network(
        args.mask_rcnn_model,
        os.path.splitext(args.mask_rcnn_model)[0] + '.bin')

    log.info('Loading encoder part of text recognition network')
    text_enc_net = ie.read_network(
        args.text_enc_model,
        os.path.splitext(args.text_enc_model)[0] + '.bin')

    log.info('Loading decoder part of text recognition network')
    text_dec_net = ie.read_network(
        args.text_dec_model,
        os.path.splitext(args.text_dec_model)[0] + '.bin')

    if 'CPU' in args.device:
        supported_layers = ie.query_network(mask_rcnn_net, 'CPU')
        not_supported_layers = [
            l for l in mask_rcnn_net.layers.keys() if l not in supported_layers
        ]
        if len(not_supported_layers) != 0:
            log.error(
                'Following layers are not supported by the plugin for specified device {}:\n {}'
                .format(args.device, ', '.join(not_supported_layers)))
            log.error(
                "Please try to specify cpu extensions library path in sample's command line parameters using -l "
                "or --cpu_extension command line argument")
            sys.exit(1)

    required_input_keys = {'im_data', 'im_info'}
    assert required_input_keys == set(mask_rcnn_net.input_info), \
        'Demo supports only topologies with the following input keys: {}'.format(', '.join(required_input_keys))
    required_output_keys = {
        'boxes', 'scores', 'classes', 'raw_masks', 'text_features'
    }
    assert required_output_keys.issubset(mask_rcnn_net.outputs.keys()), \
        'Demo supports only topologies with the following output keys: {}'.format(', '.join(required_output_keys))

    n, c, h, w = mask_rcnn_net.input_info['im_data'].input_data.shape
    assert n == 1, 'Only batch 1 is supported by the demo application'

    log.info('Loading IR to the plugin...')
    mask_rcnn_exec_net = ie.load_network(network=mask_rcnn_net,
                                         device_name=args.device,
                                         num_requests=2)
    text_enc_exec_net = ie.load_network(network=text_enc_net,
                                        device_name=args.device)
    text_dec_exec_net = ie.load_network(network=text_dec_net,
                                        device_name=args.device)

    hidden_shape = text_dec_net.input_info[
        args.trd_input_prev_hidden].input_data.shape

    del mask_rcnn_net
    del text_enc_net
    del text_dec_net

    try:
        input_source = int(args.input_source)
        cap = cv2.VideoCapture(input_source)
    except ValueError:
        input_source = args.input_source
        if os.path.isdir(input_source):
            cap = FolderCapture(input_source)
        else:
            cap = cv2.VideoCapture(input_source)

    if not cap.isOpened():
        log.error('Failed to open "{}"'.format(args.input_source))
    if isinstance(cap, cv2.VideoCapture):
        cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)

    if args.no_track:
        tracker = None
    else:
        tracker = StaticIOUTracker()

    visualizer = Visualizer(['__background__', 'text'],
                            show_boxes=args.show_boxes,
                            show_scores=args.show_scores)

    render_time = 0

    presenter = monitors.Presenter(
        args.utilization_monitors, 45,
        (round(cap.get(cv2.CAP_PROP_FRAME_WIDTH) / 4),
         round(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) / 8)))
    log.info('Starting inference...')
    print(
        "To close the application, press 'CTRL+C' here or switch to the output window and press ESC key"
    )
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        if not args.keep_aspect_ratio:
            # Resize the image to a target size.
            scale_x = w / frame.shape[1]
            scale_y = h / frame.shape[0]
            input_image = cv2.resize(frame, (w, h))
        else:
            # Resize the image to keep the same aspect ratio and to fit it to a window of a target size.
            scale_x = scale_y = min(h / frame.shape[0], w / frame.shape[1])
            input_image = cv2.resize(frame, None, fx=scale_x, fy=scale_y)

        input_image_size = input_image.shape[:2]
        input_image = np.pad(input_image,
                             ((0, h - input_image_size[0]),
                              (0, w - input_image_size[1]), (0, 0)),
                             mode='constant',
                             constant_values=0)
        # Change data layout from HWC to CHW.
        input_image = input_image.transpose((2, 0, 1))
        input_image = input_image.reshape((n, c, h, w)).astype(np.float32)
        input_image_info = np.asarray(
            [[input_image_size[0], input_image_size[1], 1]], dtype=np.float32)

        # Run the net.
        inf_start = time.time()
        outputs = mask_rcnn_exec_net.infer({
            'im_data': input_image,
            'im_info': input_image_info
        })

        # Parse detection results of the current request
        boxes = outputs['boxes']
        scores = outputs['scores']
        classes = outputs['classes'].astype(np.uint32)
        raw_masks = outputs['raw_masks']
        text_features = outputs['text_features']

        # Filter out detections with low confidence.
        detections_filter = scores > args.prob_threshold
        scores = scores[detections_filter]
        classes = classes[detections_filter]
        boxes = boxes[detections_filter]
        raw_masks = raw_masks[detections_filter]
        text_features = text_features[detections_filter]

        boxes[:, 0::2] /= scale_x
        boxes[:, 1::2] /= scale_y
        masks = []
        for box, cls, raw_mask in zip(boxes, classes, raw_masks):
            raw_cls_mask = raw_mask[cls, ...]
            mask = segm_postprocess(box, raw_cls_mask, frame.shape[0],
                                    frame.shape[1])
            masks.append(mask)

        texts = []
        for feature in text_features:
            feature = text_enc_exec_net.infer({'input': feature})['output']
            feature = np.reshape(feature,
                                 (feature.shape[0], feature.shape[1], -1))
            feature = np.transpose(feature, (0, 2, 1))

            hidden = np.zeros(hidden_shape)
            prev_symbol_index = np.ones((1, )) * SOS_INDEX

            text = ''
            for i in range(MAX_SEQ_LEN):
                decoder_output = text_dec_exec_net.infer({
                    args.trd_input_prev_symbol:
                    prev_symbol_index,
                    args.trd_input_prev_hidden:
                    hidden,
                    args.trd_input_encoder_outputs:
                    feature
                })
                symbols_distr = decoder_output[args.trd_output_symbols_distr]
                prev_symbol_index = int(np.argmax(symbols_distr, axis=1))
                if prev_symbol_index == EOS_INDEX:
                    break
                text += args.alphabet[prev_symbol_index]
                hidden = decoder_output[args.trd_output_cur_hidden]

            texts.append(text)

        inf_end = time.time()
        inf_time = inf_end - inf_start

        render_start = time.time()

        if len(boxes) and args.raw_output_message:
            log.info('Detected boxes:')
            log.info(
                '  Class ID | Confidence |     XMIN |     YMIN |     XMAX |     YMAX '
            )
            for box, cls, score, mask in zip(boxes, classes, scores, masks):
                log.info(
                    '{:>10} | {:>10f} | {:>8.2f} | {:>8.2f} | {:>8.2f} | {:>8.2f} '
                    .format(cls, score, *box))

        # Get instance track IDs.
        masks_tracks_ids = None
        if tracker is not None:
            masks_tracks_ids = tracker(masks, classes)

        presenter.drawGraphs(frame)

        # Visualize masks.
        frame = visualizer(frame, boxes, classes, scores, masks, texts,
                           masks_tracks_ids)

        # Draw performance stats.
        inf_time_message = 'Inference and post-processing time: {:.3f} ms'.format(
            inf_time * 1000)
        render_time_message = 'OpenCV rendering time: {:.3f} ms'.format(
            render_time * 1000)
        cv2.putText(frame, inf_time_message, (15, 15),
                    cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)
        cv2.putText(frame, render_time_message, (15, 30),
                    cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1)

        # Print performance counters.
        if args.perf_counts:
            perf_counts = mask_rcnn_exec_net.requests[0].get_perf_counts()
            log.info('Performance counters:')
            print('{:<70} {:<15} {:<15} {:<15} {:<10}'.format(
                'name', 'layer_type', 'exet_type', 'status', 'real_time, us'))
            for layer, stats in perf_counts.items():
                print('{:<70} {:<15} {:<15} {:<15} {:<10}'.format(
                    layer, stats['layer_type'], stats['exec_type'],
                    stats['status'], stats['real_time']))

        if not args.no_show:
            # Show resulting image.
            cv2.imshow('Results', frame)
        render_end = time.time()
        render_time = render_end - render_start

        if not args.no_show:
            key = cv2.waitKey(args.delay)
            esc_code = 27
            if key == esc_code:
                break
            presenter.handleKey(key)

    print(presenter.reportMeans())
    cv2.destroyAllWindows()
    cap.release()
Beispiel #2
0
def main():
    args = build_argparser().parse_args()

    # ------------- 1. Plugin initialization for specified device and load extensions library if specified -------------
    log.info("Creating Inference Engine...")
    ie = IECore()

    config_user_specified = {}
    config_min_latency = {}

    devices_nstreams = {}
    if args.num_streams:
        devices_nstreams = {device: args.num_streams for device in ['CPU', 'GPU'] if device in args.device} \
                           if args.num_streams.isdigit() \
                           else dict([device.split(':') for device in args.num_streams.split(',')])

    if 'CPU' in args.device:
        if args.cpu_extension:
            ie.add_extension(args.cpu_extension, 'CPU')
        if args.number_threads is not None:
            config_user_specified['CPU_THREADS_NUM'] = str(args.number_threads)
        if 'CPU' in devices_nstreams:
            config_user_specified['CPU_THROUGHPUT_STREAMS'] = devices_nstreams['CPU'] \
                                                              if int(devices_nstreams['CPU']) > 0 \
                                                              else 'CPU_THROUGHPUT_AUTO'

        config_min_latency['CPU_THROUGHPUT_STREAMS'] = '1'

    if 'GPU' in args.device:
        if 'GPU' in devices_nstreams:
            config_user_specified['GPU_THROUGHPUT_STREAMS'] = devices_nstreams['GPU'] \
                                                              if int(devices_nstreams['GPU']) > 0 \
                                                              else 'GPU_THROUGHPUT_AUTO'

        config_min_latency['GPU_THROUGHPUT_STREAMS'] = '1'

    # -------------------- 2. Reading the IR generated by the Model Optimizer (.xml and .bin files) --------------------
    log.info("Loading network")
    net = ie.read_network(args.model, os.path.splitext(args.model)[0] + ".bin")

    assert len(
        net.input_info
    ) == 1, "Sample supports only YOLO V3 based single input topologies"

    # ---------------------------------------------- 3. Preparing inputs -----------------------------------------------
    log.info("Preparing inputs")
    input_blob = next(iter(net.input_info))

    # Read and pre-process input images
    if net.input_info[input_blob].input_data.shape[1] == 3:
        input_height, input_width = net.input_info[
            input_blob].input_data.shape[2:]
        nchw_shape = True
    else:
        input_height, input_width = net.input_info[
            input_blob].input_data.shape[1:3]
        nchw_shape = False

    if args.labels:
        with open(args.labels, 'r') as f:
            labels_map = [x.strip() for x in f]
    else:
        labels_map = None

    input_stream = 0 if args.input == "cam" else args.input

    mode = Mode(Modes.USER_SPECIFIED)
    cap = cv2.VideoCapture(input_stream)
    wait_key_time = 1

    # ----------------------------------------- 4. Loading model to the plugin -----------------------------------------
    log.info("Loading model to the plugin")
    exec_nets = {}

    exec_nets[Modes.USER_SPECIFIED] = ie.load_network(
        network=net,
        device_name=args.device,
        config=config_user_specified,
        num_requests=args.num_infer_requests)
    exec_nets[Modes.MIN_LATENCY] = ie.load_network(
        network=net,
        device_name=args.device.split(":")[-1].split(",")[0],
        config=config_min_latency,
        num_requests=1)
    print(args.device.split(":")[-1].split(",")[0])

    empty_requests = deque(exec_nets[mode.current].requests)
    completed_request_results = {}
    next_frame_id = 0
    next_frame_id_to_show = 0
    mode_metrics = {mode.current: PerformanceMetrics()}
    prev_mode_active_request_count = 0
    event = threading.Event()
    callback_exceptions = []

    # ----------------------------------------------- 5. Doing inference -----------------------------------------------
    log.info("Starting inference...")
    print(
        "To close the application, press 'CTRL+C' here or switch to the output window and press ESC key"
    )
    print(
        "To switch between min_latency/user_specified modes, press TAB key in the output window"
    )

    presenter = monitors.Presenter(
        args.utilization_monitors, 55,
        (round(cap.get(cv2.CAP_PROP_FRAME_WIDTH) / 4),
         round(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) / 8)))

    while (cap.isOpened() \
           or completed_request_results \
           or len(empty_requests) < len(exec_nets[mode.current].requests)) \
          and not callback_exceptions:
        if next_frame_id_to_show in completed_request_results:
            frame, output, start_time, is_same_mode = completed_request_results.pop(
                next_frame_id_to_show)

            next_frame_id_to_show += 1

            objects = get_objects(output, net, (input_height, input_width),
                                  frame.shape[:-1], args.prob_threshold,
                                  args.keep_aspect_ratio)
            objects = filter_objects(objects, args.iou_threshold,
                                     args.prob_threshold)

            if len(objects) and args.raw_output_message:
                log.info(
                    " Class ID | Confidence | XMIN | YMIN | XMAX | YMAX | COLOR "
                )

            origin_im_size = frame.shape[:-1]
            presenter.drawGraphs(frame)
            for obj in objects:
                # Validation bbox of detected object
                obj['xmax'] = min(obj['xmax'], origin_im_size[1])
                obj['ymax'] = min(obj['ymax'], origin_im_size[0])
                obj['xmin'] = max(obj['xmin'], 0)
                obj['ymin'] = max(obj['ymin'], 0)
                color = (min(obj['class_id'] * 12.5,
                             255), min(obj['class_id'] * 7,
                                       255), min(obj['class_id'] * 5, 255))
                det_label = labels_map[obj['class_id']] if labels_map and len(labels_map) >= obj['class_id'] else \
                    str(obj['class_id'])

                if args.raw_output_message:
                    log.info(
                        "{:^9} | {:10f} | {:4} | {:4} | {:4} | {:4} | {} ".
                        format(det_label, obj['confidence'], obj['xmin'],
                               obj['ymin'], obj['xmax'], obj['ymax'], color))

                cv2.rectangle(frame, (obj['xmin'], obj['ymin']),
                              (obj['xmax'], obj['ymax']), color, 2)
                cv2.putText(
                    frame, "#" + det_label + ' ' +
                    str(round(obj['confidence'] * 100, 1)) + ' %',
                    (obj['xmin'], obj['ymin'] - 7), cv2.FONT_HERSHEY_COMPLEX,
                    0.6, color, 1)

            helpers.put_highlighted_text(frame,
                                         "{} mode".format(mode.current.name),
                                         (10, int(origin_im_size[0] - 20)),
                                         cv2.FONT_HERSHEY_COMPLEX, 0.75,
                                         (10, 10, 200), 2)

            if is_same_mode and prev_mode_active_request_count == 0:
                mode_metrics[mode.current].update(start_time, frame)
            else:
                mode_metrics[mode.get_other()].update(start_time, frame)
                prev_mode_active_request_count -= 1
                helpers.put_highlighted_text(
                    frame, "Switching modes, please wait...",
                    (10, int(origin_im_size[0] - 50)),
                    cv2.FONT_HERSHEY_COMPLEX, 0.75, (10, 200, 10), 2)

            if not args.no_show:
                cv2.imshow("Detection Results", frame)
                key = cv2.waitKey(wait_key_time)

                if key in {ord("q"), ord("Q"), 27}:  # ESC key
                    break
                if key == 9:  # Tab key
                    if prev_mode_active_request_count == 0:
                        prev_mode = mode.current
                        mode.switch()

                        prev_mode_active_request_count = len(
                            exec_nets[prev_mode].requests) - len(
                                empty_requests)
                        empty_requests.clear()
                        empty_requests.extend(exec_nets[mode.current].requests)

                        mode_metrics[mode.current] = PerformanceMetrics()
                else:
                    presenter.handleKey(key)

        elif empty_requests and prev_mode_active_request_count == 0 and cap.isOpened(
        ):
            start_time = perf_counter()
            ret, frame = cap.read()
            if not ret:
                if args.loop:
                    cap.open(input_stream)
                else:
                    cap.release()
                continue

            request = empty_requests.popleft()

            # resize input_frame to network size
            in_frame = preprocess_frame(frame, input_height, input_width,
                                        nchw_shape, args.keep_aspect_ratio)

            # Start inference
            request.set_completion_callback(
                py_callback=async_callback,
                py_data=(request, next_frame_id, mode.current, frame,
                         start_time, completed_request_results, empty_requests,
                         mode, event, callback_exceptions))
            request.async_infer(inputs={input_blob: in_frame})
            next_frame_id += 1

        else:
            event.wait()
            event.clear()

    if callback_exceptions:
        raise callback_exceptions[0]

    for mode, metrics in mode_metrics.items():
        print("\nMode: {}".format(mode.name))
        metrics.print_total()
    print(presenter.reportMeans())

    for exec_net in exec_nets.values():
        await_requests_completion(exec_net.requests)
Beispiel #3
0
net.reshape({input_blob:(batch_size,n_channels,height,width,depth)})
batch_size, n_channels, height, width, depth = net.inputs[input_blob].shape
batch_size, n_out_channels, height_out, width_out, depth_out = net.outputs[out_blob].shape

print("The network inputs are:")
for idx, input_layer in enumerate(net.inputs.keys()):
    print("{}: {}, shape = {} [N,C,H,W,D]".format(idx,input_layer,net.inputs[input_layer].shape))

print("The network outputs are:")
for idx, output_layer in enumerate(net.outputs.keys()):
    print("{}: {}, shape = {} [N,C,H,W,D]".format(idx,output_layer,net.outputs[output_layer].shape))

# Loading model to the plugin
print("Loading model to the plugin")
exec_net = ie.load_network(network=net, device_name="CPU")
del net

"""
OpenVINO inference code
input_blob is the name (string) of the input tensor in the graph
out_blob is the name (string) of the output tensor in the graph
Essentially, this looks exactly like a feed_dict for TensorFlow inference
"""
# Go through the sample validation dataset to plot predictions
predictions_ov = np.zeros((num_imgs, n_out_channels,
                        depth_out, height_out, width_out))

print("Starting OpenVINO inference")
results = {}
ov_times = []
class Model_FaceDetection:
    def __init__(self, model_name, device='CPU', extensions=None):
        '''
        TODO: Use this to set your instance variables.
        '''
        self.model_name = model_name
        self.device = device
        self.extensions = extensions
        self.model_weights = self.model_name.split('.')[0]+'.bin'
        self.input_name = None
        self.input_shape = None
        self.output_names = None
        self.output_shape = None
        self.plugin = None
        self.network = None
        self.exec_net = None

    def load_model(self):
        '''
        TODO: You will need to complete this method.
        This method is for loading the model to the device specified by the user.
        If your model requires any Plugins, this is where you can load them.
        '''
        self.plugin = IECore()
        self.network = self.plugin.read_network(model=self.model_name, weights=self.model_weights)
        self.supported_layers = self.plugin.query_network(network=self.network, device_name=self.device)
        self.unsupported_layers = [layer for layer in self.network.layers.keys() if layer not in self.supported_layers]
        
        if(not self.check_model()):
            exit(1)
                
        self.exec_net = self.plugin.load_network(network=self.network, device_name=self.device,num_requests=1)
        self.input_name = next(iter(self.network.inputs))
        self.input_shape = self.network.inputs[self.input_name].shape
        self.output_names = next(iter(self.network.outputs))
        self.output_shape = self.network.outputs[self.output_names].shape
        
    def predict(self, image, prob_threshold):
        '''
        TODO: You will need to complete this method.
        This method is meant for running predictions on the input image.
        '''
        
        img_processed = self.preprocess_input(image.copy())
        faces = self.exec_net.infer({self.input_name:img_processed})
        coords = self.preprocess_output(faces, prob_threshold)
        if (len(coords)==0):
            return 0, 0
        # finding first face
        coords = coords[0] 
        h=image.shape[0]
        w=image.shape[1]
        coords = coords* np.array([w, h, w, h])
        coords = coords.astype(np.int32)
        
        face = image[coords[1]:coords[3], coords[0]:coords[2]]
        return face, coords

    def check_model(self):
        # check for unsupported layers
        if len(self.unsupported_layers)!=0 and self.device=='CPU':
            print("unsupported layers :{}".format(self.unsupported_layers))
            if not self.extensions==None:
                self.plugin.add_extension(self.extensions, self.device)
                self.supported_layers = self.plugin.query_network(network = self.network, device_name=self.device)
                self.unsupported_layers = [lalyer for lalyer in self.network.layers.keys() if lalyer not in self.supported_layers]
                if len(self.unsupported_layers)!=0:
                    print("unsupported layers found")
                    return False
            else:
                print("cpu extension path not found")
                return False
        return True    

    def preprocess_input(self, image):
        '''
        Before feeding the data into the model for inference,
        you might have to preprocess it. This function is where you can do that.
        '''
        resizedImage = cv2.resize(image, (self.input_shape[3], self.input_shape[2]))
        return np.transpose(np.expand_dims(resizedImage,axis=0), (0,3,1,2))

    def preprocess_output(self, outputs, prob_threshold):
        '''
        Before feeding the output of this model to the next model,
        you might have to preprocess the output. This function is where you can do that.
        '''
        modelOutputs = outputs[self.output_names][0][0]
        coordList =[]
        for output in modelOutputs:
            conf = output[2]
            if conf>prob_threshold:
                xMin=output[3]
                yMin=output[4]
                xMax=output[5]
                yMax=output[6]
                coordList.append([xMin,yMin,xMax,yMax])
        return coordList
        
class face_detection:
    '''
    Class for the Face Detection Model.
    '''
    def __init__(self, model_name, device='CPU', extensions=None):
        '''
        Set your instance variables.
        '''
        self.model_name = model_name
        self.device = device
        self.extensions = extensions
        self.plugin = None
        self.network = None
        self.input_blob = None
        self.output_blob = None
        self.exec_network = None
        self.infer_request = None

    def load_model(self):
        '''
        This method is for loading the model to the device specified by the user.
        If your model requires any Plugins, this is where you can load them.
        '''
        self.plugin = IECore()
        model_xml = self.model_name
        model_bin = os.path.splitext(model_xml)[0] + ".bin"

        self.network = IENetwork(model=model_xml, weights=model_bin)

        self.check_model()
        self.exec_network = self.plugin.load_network(self.network, self.device)

        self.input_blob = next(iter(self.network.inputs))
        self.output_blob = next(iter(self.network.outputs))
        return

    def predict(self, image, request_id):
        '''
        This method is meant for running predictions on the input image.
        '''
        self.exec_network.start_async(request_id=request_id,
                                      inputs={self.input_blob: image})
        return

    def check_model(self):
        if self.extensions and "CPU" in self.device:
            self.plugin.add_extension(self.extensions, self.device)

        supported_layers = self.plugin.query_network(network=self.network,
                                                     device_name="CPU")
        unsupported_layers = [
            l for l in self.network.layers.keys() if l not in supported_layers
        ]
        if len(unsupported_layers) != 0:
            print("Unsupported layers found: {}".format(unsupported_layers))
            print("Check whether extensions are available to add to IECore.")
            exit(1)
        return

    def preprocess_input(self, image):
        '''
        Before feeding the data into the model for inference,
        you might have to preprocess it. This function is where you can do that.
        '''
        net_input_shape = self.network.inputs[self.input_blob].shape
        h = net_input_shape[2]
        w = net_input_shape[3]
        p_frame = cv2.resize(image, (w, h))
        p_frame = p_frame.transpose((2, 0, 1))
        p_frame = p_frame.reshape(1, 3, h, w)
        return p_frame

    def wait(self):
        status = self.exec_network.requests[0].wait(-1)
        return status

    def get_output(self):
        return self.exec_network.requests[0].outputs[self.output_blob]

    def preprocess_output(self, faces, image, args, width, height):
        '''
        Before feeding the output of this model to the next model,
        you might have to preprocess the output. This function is where you can do that.
        '''

        for box in faces[0][0]:  # Output shape is 1x1x100x7
            conf = box[2]
            if conf >= args.prob_threshold:
                xmin = int(box[3] * width)
                ymin = int(box[4] * height)
                xmax = int(box[5] * width)
                ymax = int(box[6] * height)
                if args.log_level == "DEBUG":
                    cv2.rectangle(image, (xmin, ymin), (xmax, ymax),
                                  (0, 0, 255), 2)
                face_crop = image[ymin:ymax, xmin:xmax]

        return face_crop
class Inference:
    '''
    Class with all relevant tools to do object detection
    '''

    # Load all relevant variables into the class
    def __init__(self, model_name, device, threshold=0.60):
        self.model_weights = model_name + '.bin'
        self.model_structure = model_name + '.xml'
        self.device = device
        self.threshold = threshold

        # Initialise the network and save it in the self.model variables
        self.model = IENetwork(self.model_structure,
                               self.model_weights)  # old openvino version
        # self.model = core.read_network(self.model_structure, self.model_weights) # new openvino version

        # Get the input layer
        self.input_name = next(iter(self.model.inputs))
        self.input_name_all = [i for i in self.model.inputs.keys()
                               ]  # gets all input_names
        self.input_name_all_02 = self.model.inputs.keys(
        )  # gets all output_names
        self.input_name_first_entry = self.input_name_all[0]

        self.input_shape = self.model.inputs[self.input_name].shape

        self.output_name = next(iter(self.model.outputs))
        self.output_name_type = self.model.outputs[self.output_name]
        self.output_names = [i for i in self.model.outputs.keys()
                             ]  # gets all output_names
        self.output_names_total_entries = len(self.output_names)

        self.output_shape = self.model.outputs[self.output_name].shape
        self.output_shape_second_entry = self.model.outputs[
            self.output_name].shape[1]
        self.output_name_first_entry = self.output_names[0]

        print("--------")
        print("input_name: " + str(self.input_name))
        print("input_name_all: " + str(self.input_name_all))
        print("input_name_all_total: " + str(self.input_name_all_02))
        print("input_name_first_entry: " + str(self.input_name_first_entry))
        print("--------")

        print("input_shape: " + str(self.input_shape))
        print("--------")

        print("output_name: " + str(self.output_name))
        print("output_name type: " + str(self.output_name_type))
        print("output_names: " + str(self.output_names))
        print("output_names_total_entries: " +
              str(self.output_names_total_entries))
        print("output_name_first_entry: " + str(self.output_name_first_entry))
        print("--------")

        print("output_shape: " + str(self.output_shape))
        print("output_shape_second_entry: " +
              str(self.output_shape_second_entry))
        print("--------")

    # Loads the model
    def load_model(self):
        # Adds Extension
        CPU_EXTENSION = "/opt/intel/openvino/deployment_tools/inference_engine/lib/intel64/libcpu_extension_sse4.so"
        self.core = IECore()
        self.core.add_extension(CPU_EXTENSION, self.device)
        # Load the network into an executable network
        self.exec_network = self.core.load_network(network=self.model,
                                                   device_name=self.device,
                                                   num_requests=1)
        print("Model is loaded")

    # Start inference and prediction
    def predict(self, image):

        # save original image
        input_img = image
        # Pre-process the image
        image = self.preprocess_input(image)
        result = self.exec_network.infer({self.input_name:
                                          image})  #syncro inference
        print("Start syncro inference")
        #infer_request_handle = self.async_inference(image)
        #res = self.get_output(infer_request_handle, 0, output=None)

        # Vehicle output
        color, car_type = self.vehicle_attributes(result)

        return frame, color, car_type

    # Preprocess the image
    def preprocess_input(self, frame):
        # Get the input shape
        n, c, h, w = (self.core, self.input_shape)[1]
        print("n-c-h-w " + str(n) + "-" + str(c) + "-" + str(h) + "-" + str(w))
        image = cv2.resize(frame, (w, h))
        image = image.transpose((2, 0, 1))
        image = image.reshape((n, c, h, w))
        print("End of preprocess input")

        return image

    # Get the inference output
    def get_output(self, infer_request_handle, request_id, output):
        if output:
            res = infer_request_handle.output[output]
        else:
            res = self.exec_network.requests[request_id].outputs[
                self.output_name]
        return res

    def vehicle_attributes(self, result):
        #Gets the output of the vehicle model

        CAR_COLORS = [
            "white", "gray", "yellow", "red", "green", "blue", "black"
        ]

        color = result['color']
        color_flatten = result['color'].flatten()
        color_total = len(color_flatten)
        color_class = np.argmax(color)
        color_class_text = CAR_COLORS[color_class]
        print("--------")
        print("color: " + str(color))
        print("color_flatten: " + str(color_flatten))
        print("total number of colors: " + str(color_total))
        print("color number with the higest propability (argmax): " +
              str(color_class))
        print("color text with the higest propability (argmax): " +
              str(color_class_text))
        print("--------")

        CAR_TYPES = ["car", "bus", "truck", "van"]

        car_type = result['type']
        car_type_flatten = result['type'].flatten()
        car_type_total = len(car_type_flatten)
        car_type_class = np.argmax(car_type)
        car_type_class_text = CAR_TYPES[car_type_class]
        print("car_type: " + str(car_type))
        print("car_type_flatten: " + str(car_type_flatten))
        print("total number of car types: " + str(car_type_total))
        print("car type with the higest propability (argmax): " +
              str(car_type_class))
        print("car text with the higest propability (argmax): " +
              str(car_type_class_text))
        print("--------")

        return color_class, car_type_class
Beispiel #7
0
class Model_Face:
    '''
    Class for the Face Detection Model.
    '''
    def __init__(self, model_name, device='CPU', extensions=None):
        self.model_weights = model_name + '.bin'
        self.model_structure = model_name + '.xml'
        self.device = device
        self.extensions = extensions
#         self.check_model()
#         try:
#             self.input_name = next(iter(self.net.inputs))
#             self.input_shape = self.net.inputs[self.input_name].shape
#             self.output_name = next(iter(self.net.outputs))
#             self.output_shape = self.net.outputs[self.output_name].shape
#             print('Initialise.. completed.')
#         except Exception as e:
#             raise ValueError('Something is wrong with input and output values..')

    def load_model(self):
        '''
        This method is for loading the model to the device specified by the user.
        If your model requires any Plugins, this is where you can load them.
        '''
        try:
            print('Model is loading...')
            self.core = IECore()
            self.net = self.core.read_network(model=self.model_structure,
                                              weights=self.model_weights)
            supported = self.core.query_network(self.net, self.device)
            not_supported = [
                layer for layer in self.net.layers.keys()
                if layer not in supported
            ]
            if len(not_supported) != 0 and self.device == 'CPU':
                print('Unsuported', not_supported)
                if not self.extensions == None:
                    print('***Quick fix.\n ~CPU Extension added')
                    self.core.add_extension(self.extensions, device)
                    supported = self.core.query_network(self.net, self.device)
                    not_supported = [
                        layer for layer in self.net.layers.keys()
                        if layer not in supported
                    ]
                    if len(not_supported) == 0:
                        print('***Quick fix, Failed.')
                else:
                    print('Check the extension path.')
            self.net_exec = self.core.load_network(network=self.net,
                                                   device_name=self.device)
        except Exception as e:
            raise ('Something is wrong.. ~debug load model~')

        try:
            self.input_name = next(iter(self.net.inputs))
            self.input_shape = self.net.inputs[self.input_name].shape
            self.output_name = next(iter(self.net.outputs))
            self.output_shape = self.net.outputs[self.output_name].shape
            print('Initialise.. completed.')
        except Exception as e:
            raise ValueError(
                'Something is wrong with input and output values..')

    def predict(self, image, thres):
        '''
        This method is meant for running predictions on the input image.
        '''
        self.image = image
        print('Face-detection predict..')
        pre_image = self.preprocess_input(self.image)
        input_name = self.input_name
        input_dict = {input_name: pre_image}
        #         infer = self.net_exec.start_async(request_id=0, inputs=input_dict)
        #         status = infer.wait()
        face = []
        #         if status == 0:
        # #             print(infer.outputs)
        # #             print(self.output_name)
        #             results = infer.outputs[self.output_name]
        #             outputs = self.preprocess_output(results, thres)
        #             outputs = outputs[0]
        #             height = self.image.shape[0]
        #             width = self.image.shape[1]
        #             outputs = outputs* np.array([width, height, width, height])
        #             outputs = outputs.astype(np.int32)
        #             face = self.image[outputs[1]:outputs[3], outputs[0]:outputs[2]]

        results = self.net_exec.infer(input_dict)
        outputs = self.preprocess_output(results, thres)
        outputs = outputs[0]
        height = self.image.shape[0]
        width = self.image.shape[1]
        outputs = outputs * np.array([width, height, width, height])
        outputs = outputs.astype(np.int32)
        face = self.image[outputs[1]:outputs[3], outputs[0]:outputs[2]]
        return face, outputs

    def check_model(self):
        '''
        Check - initialise the model
        '''
        try:
            self.model = IENetwork(self.model_structure, self.model_weights)
        except Exception as e:
            raise ValueError(
                "Could not Initialise the network. Have you enterred the correct model path?"
            )

    def preprocess_input(self, image):
        '''
        An input image in the format [BxCxHxW], where:

        B - batch size
        C - number of channels
        H - image height
        W - image width
        '''
        image = cv2.resize(image, (self.input_shape[3], self.input_shape[2]))
        image = image.transpose((2, 0, 1))
        image = image.reshape(1, *image.shape)
        return image

    def preprocess_output(self, outputs, thres):
        '''
        thres = threshold of confidence
        
        The net outputs blob with shape: [1, 1, N, 7], where N is the number of detected bounding boxes. 
        Each detection has the format [image_id, label, conf, x_min, y_min, x_max, y_max], where:

        image_id - ID of the image in the batch
        label - predicted class ID
        conf - confidence for the predicted class
        (x_min, y_min) - coordinates of the top left bounding box corner
        (x_max, y_max) - coordinates of the bottom right bounding box corner.
        '''
        object_list = []
        print('PreOutput-face_detection..')
        tmp_out = outputs[self.output_name][0][0]
        for i in tmp_out:
            conf = i[2]  # conf-accuracy of the face
            if conf > thres:

                x_min = i[3]
                x_max = i[5]
                y_min = i[4]
                y_max = i[6]
            object_list.append([x_min, y_min, x_max, y_max])
        return object_list
Beispiel #8
0
class Model:
    class Status(Enum):
        READY = 0
        ENCODER_INFER = 1
        DECODER_INFER = 2

    def __init__(self, args, interactive_mode):
        self.args = args
        log.info("Creating Inference Engine")
        self.ie = IECore()
        self.ie.set_config(
            {"PERF_COUNT": "YES" if self.args.perf_counts else "NO"},
            args.device)
        self.encoder = read_net(self.args.m_encoder, self.ie)
        self.dec_step = read_net(self.args.m_decoder, self.ie)
        self.exec_net_encoder = self.ie.load_network(
            network=self.encoder, device_name=self.args.device)
        self.exec_net_decoder = self.ie.load_network(
            network=self.dec_step, device_name=self.args.device)
        self.images_list = []
        self.vocab = Vocab(self.args.vocab_path)
        self.model_status = Model.Status.READY
        self.is_async = interactive_mode
        self.num_infers_decoder = 0
        self.check_model_dimensions()
        if not interactive_mode:
            self.preprocess_inputs()

    def preprocess_inputs(self):
        height, width = self.encoder.input_info['imgs'].input_data.shape[-2:]
        target_shape = (height, width)
        if os.path.isdir(self.args.input):
            inputs = sorted(
                os.path.join(self.args.input, inp)
                for inp in os.listdir(self.args.input))
        else:
            inputs = [self.args.input]
        log.info("Loading and preprocessing images")
        for filenm in tqdm(inputs):
            image_raw = cv.imread(filenm)
            assert image_raw is not None, "Error reading image {}".format(
                filenm)
            image = preprocess_image(
                PREPROCESSING[self.args.preprocessing_type], image_raw,
                target_shape)
            record = namespace(img_name=filenm, img=image)
            self.images_list.append(record)

    def check_model_dimensions(self):
        batch_dim, channels, height, width = self.encoder.input_info[
            'imgs'].input_data.shape
        assert batch_dim == 1, "Demo only works with batch size 1."
        assert channels in (1, 3), "Input image is not 1 or 3 channeled image."

    def _async_infer_encoder(self, image, req_id):
        return self.exec_net_encoder.start_async(
            request_id=req_id, inputs={self.args.imgs_layer: image})

    def _async_infer_decoder(self, row_enc_out, dec_st_c, dec_st_h, output,
                             tgt, req_id):
        self.num_infers_decoder += 1
        return self.exec_net_decoder.start_async(
            request_id=req_id,
            inputs={
                self.args.row_enc_out_layer: row_enc_out,
                self.args.dec_st_c_layer: dec_st_c,
                self.args.dec_st_h_layer: dec_st_h,
                self.args.output_prev_layer: output,
                self.args.tgt_layer: tgt
            })

    def infer_async(self, model_input):
        model_input = change_layout(model_input)
        assert self.is_async
        if self.model_status == Model.Status.READY:
            self._start_encoder(model_input)
            return None

        if self.model_status == Model.Status.ENCODER_INFER:
            infer_status_encoder = self._infer_request_handle_encoder.wait(
                timeout=0)
            if infer_status_encoder == 0:
                self._start_decoder()
            return None

        return self._process_decoding_results()

    def infer_sync(self, model_input):
        assert not self.is_async
        model_input = change_layout(model_input)
        self._start_encoder(model_input)
        infer_status_encoder = self._infer_request_handle_encoder.wait(
            timeout=-1)
        assert infer_status_encoder == 0
        self._start_decoder()
        res = None
        while res is None:
            res = self._process_decoding_results()
        return res

    def _process_decoding_results(self):
        timeout = 0 if self.is_async else -1
        infer_status_decoder = self._infer_request_handle_decoder.wait(timeout)
        if infer_status_decoder != 0 and self.is_async:
            return None
        dec_res = self._infer_request_handle_decoder.output_blobs
        self._unpack_dec_results(dec_res)

        if self.tgt[0][0][
                0] == END_TOKEN or self.num_infers_decoder >= self.args.max_formula_len:
            self.num_infers_decoder = 0
            self.logits = np.array(self.logits)
            logits = self.logits.squeeze(axis=1)
            targets = np.argmax(logits, axis=1)
            self.model_status = Model.Status.READY
            return logits, targets
        self._infer_request_handle_decoder = self._async_infer_decoder(
            self.row_enc_out,
            self.dec_states_c,
            self.dec_states_h,
            self.output,
            self.tgt,
            req_id=0)

        return None

    def _start_encoder(self, model_input):
        self._infer_request_handle_encoder = self._async_infer_encoder(
            model_input, req_id=0)
        self.model_status = Model.Status.ENCODER_INFER

    def _start_decoder(self):
        enc_res = self._infer_request_handle_encoder.output_blobs
        self._unpack_enc_results(enc_res)
        self._infer_request_handle_decoder = self._async_infer_decoder(
            self.row_enc_out,
            self.dec_states_c,
            self.dec_states_h,
            self.output,
            self.tgt,
            req_id=0)
        self.model_status = Model.Status.DECODER_INFER

    def _unpack_dec_results(self, dec_res):
        self.dec_states_h = dec_res[self.args.dec_st_h_t_layer].buffer
        self.dec_states_c = dec_res[self.args.dec_st_c_t_layer].buffer
        self.output = dec_res[self.args.output_layer].buffer
        logit = dec_res[self.args.logit_layer].buffer
        self.logits.append(logit)
        self.tgt = np.array([[np.argmax(logit, axis=1)]])

    def _unpack_enc_results(self, enc_res):
        self.row_enc_out = enc_res[self.args.row_enc_out_layer].buffer
        self.dec_states_h = enc_res[self.args.hidden_layer].buffer
        self.dec_states_c = enc_res[self.args.context_layer].buffer
        self.output = enc_res[self.args.init_0_layer].buffer
        self.tgt = np.array([[START_TOKEN]])
        self.logits = []
Beispiel #9
0
class FaceDetectionModel:
    '''
    Class for the Face Detection Model.
    '''
    def __init__(self, model_name, device='CPU', extensions=None):
        '''
        TODO: Use this to set your instance variables.
        '''
        self.model_name = model_name
        self.device = device
        self.extensions = extensions
        self.model_structure = self.model_name
        self.model_weights = self.model_name.split('.')[0] + '.bin'
        self.plugin = None
        self.network = None
        self.exec_net = None
        self.input_name = None
        self.input_shape = None
        self.output_names = None
        self.output_shape = None

    def load_model(self):
        '''
        TODO: You will need to complete this method.
        This method is for loading the model to the device specified by the user.
        If your model requires any Plugins, this is where you can load them.
        '''
        self.plugin = IECore()
        self.network = self.plugin.read_network(model=self.model_structure,
                                                weights=self.model_weights)
        supported_layers = self.plugin.query_network(network=self.network,
                                                     device_name=self.device)
        unsupported_layers = [
            l for l in self.network.layers.keys() if l not in supported_layers
        ]

        if len(unsupported_layers) != 0 and self.device == 'CPU':
            print("unsupported layers found:{}".format(unsupported_layers))
            if not self.extensions == None:
                print("Adding cpu_extension")
                self.plugin.add_extension(self.extensions, self.device)
                supported_layers = self.plugin.query_network(
                    network=self.network, device_name=self.device)
                unsupported_layers = [
                    l for l in self.network.layers.keys()
                    if l not in supported_layers
                ]
                if len(unsupported_layers) != 0:
                    print(
                        "After adding the extension still unsupported layers found"
                    )
                    exit(1)
                print("After adding the extension the issue is resolved")
            else:
                print("Give the path of cpu extension")
                exit(1)

        self.exec_net = self.plugin.load_network(network=self.network,
                                                 device_name=self.device,
                                                 num_requests=1)

        self.input_name = next(iter(self.network.inputs))
        self.input_shape = self.network.inputs[self.input_name].shape
        self.output_names = next(iter(self.network.outputs))
        self.output_shape = self.network.outputs[self.output_names].shape

    def predict(self, image, prob_threshold):
        '''
        TODO: You will need to complete this method.
        This method is meant for running predictions on the input image.
        '''

        img_processed = self.preprocess_input(image.copy())
        outputs = self.exec_net.infer({self.input_name: img_processed})
        coords = self.preprocess_output(outputs, prob_threshold)
        if (len(coords) == 0):
            return 0, 0
        coords = coords[0]  #take the first detected face
        h = image.shape[0]
        w = image.shape[1]
        coords = coords * np.array([w, h, w, h])
        coords = coords.astype(np.int32)

        cropped_face = image[coords[1]:coords[3], coords[0]:coords[2]]
        return cropped_face, coords

    def check_model(self):
        ''

    def preprocess_input(self, image):
        '''
        Before feeding the data into the model for inference,
        you might have to preprocess it. This function is where you can do that.
        '''
        image_resized = cv2.resize(image,
                                   (self.input_shape[3], self.input_shape[2]))
        img_processed = np.transpose(np.expand_dims(image_resized, axis=0),
                                     (0, 3, 1, 2))
        return img_processed

    def preprocess_output(self, outputs, prob_threshold):
        '''
        Before feeding the output of this model to the next model,
        you might have to preprocess the output. This function is where you can do that.
        '''
        coords = []
        outs = outputs[self.output_names][0][0]
        for out in outs:
            conf = out[2]
            if conf > prob_threshold:
                x_min = out[3]
                y_min = out[4]
                x_max = out[5]
                y_max = out[6]
                coords.append([x_min, y_min, x_max, y_max])
        return coords
Beispiel #10
0
class Model_HPE:
    '''
    Class for the Head Pose Estimation Model.
    '''

    def __init__(self, model_name, device, extensions):

        self.plugin = None
        self.net = None
        self.input_blob = None
        self.output_blob = None
        self.exec_net = None
        self.infer_request = None
        self.input_shape = None
        self.output_shape = None
        self.input_name = None
        self.device = device
        self.extension = extensions
        self.model = model_name
        self.output = None

    def load_model(self):
        '''
        This method is for loading the model to the device specified by the user.
        If your model requires any Plugins, this is where you can load them.
        '''
        model_xml = self.model + ".xml"
        model_weights = self.model + ".bin"

        self.plugin = IECore()
        self.net = IENetwork(model_xml, model_weights)

        self.exec_net = self.plugin.load_network(network=self.net, device_name=self.device,
                                                 num_requests=1)

        if self.extension and 'CPU' in self.device:
            self.plugin.add_cpu_extension(self.extension)
        self.check_model()

        self.input_blob = next(iter(self.net.inputs))
        self.input_shape = self.net.inputs[self.input_blob].shape
        self.output_blob = next(iter(self.net.outputs))
        self.output_shape = self.net.outputs[self.output_blob].shape

    def predict(self, image, benchmark_timing):
        self.exec_net.start_async(request_id=0, inputs={self.input_blob: image})
        if self.exec_net.requests[0].wait(-1) == 0:
            self.result = self.exec_net.requests[0].outputs

            if benchmark_timing:
                pp = PrettyPrinter(indent=4)
                print('Benchmark Timing for Head_Pose_Estimation')
                pp.pprint(self.exec_net.requests[0].get_perf_counts())
                # Write get_perf_counts() data to a text file
                data = (self.exec_net.requests[0].get_perf_counts())
                self.write_benchmark('Benchmark Timing for Head_Pose_Estimation', data)

        return self.result

    def check_model(self):
        supported_layers = self.plugin.query_network(network=self.net, device_name=self.device)
        unsupported_layers = [l for l in self.net.layers.keys() if l not in supported_layers]
        if len(unsupported_layers) != 0:
            log.error("Unsupported layers found: {}".format(unsupported_layers))
            log.error("Check whether extensions are available to add to IECore.")
            exit(1)

    def preprocess_input(self, image):

        temp = image.copy()
        temp = cv2.resize(temp, (self.input_shape[3], self.input_shape[2]))  # n,c,h,w
        temp = temp.transpose((2, 0, 1))
        temp = temp.reshape(1, *temp.shape)
        return temp

    def preprocess_output(self, image, outputs, facebox, face, display):

        output = []
        output.append(outputs['angle_y_fc'].tolist()[0][0])
        output.append(outputs['angle_p_fc'].tolist()[0][0])
        output.append(outputs['angle_r_fc'].tolist()[0][0])

        pitch = np.squeeze(outputs['angle_p_fc'])
        roll = np.squeeze(outputs['angle_r_fc'])
        yaw = np.squeeze(outputs['angle_y_fc'])
        axes_op = np.array([pitch, roll, yaw])

        if display:
            xmin, ymin, _, _ = facebox
            face_center = (xmin + face.shape[1] / 2, ymin + face.shape[0] / 2, 0)
            self.draw_axes(image, face_center, yaw, pitch, roll)

        return axes_op

    # code source: https://knowledge.udacity.com/questions/171017
    def draw_axes(self, frame, center_of_face, yaw, pitch, roll):
        focal_length = 950.0
        scale = 100

        yaw *= np.pi / 180.0
        pitch *= np.pi / 180.0
        roll *= np.pi / 180.0
        cx = int(center_of_face[0])
        cy = int(center_of_face[1])
        Rx = np.array([[1, 0, 0],
                       [0, math.cos(pitch), -math.sin(pitch)],
                       [0, math.sin(pitch), math.cos(pitch)]])
        Ry = np.array([[math.cos(yaw), 0, -math.sin(yaw)],
                       [0, 1, 0],
                       [math.sin(yaw), 0, math.cos(yaw)]])
        Rz = np.array([[math.cos(roll), -math.sin(roll), 0],
                       [math.sin(roll), math.cos(roll), 0],
                       [0, 0, 1]])
        # ref: https://www.learnopencv.com/rotation-matrix-to-euler-angles/
        R = Rz @ Ry @ Rx
        camera_matrix = self.build_camera_matrix(center_of_face, focal_length)
        xaxis = np.array(([1 * scale, 0, 0]), dtype='float32').reshape(3, 1)
        yaxis = np.array(([0, -1 * scale, 0]), dtype='float32').reshape(3, 1)
        zaxis = np.array(([0, 0, -1 * scale]), dtype='float32').reshape(3, 1)
        zaxis1 = np.array(([0, 0, 1 * scale]), dtype='float32').reshape(3, 1)
        o = np.array(([0, 0, 0]), dtype='float32').reshape(3, 1)
        o[2] = camera_matrix[0][0]
        xaxis = np.dot(R, xaxis) + o
        yaxis = np.dot(R, yaxis) + o
        zaxis = np.dot(R, zaxis) + o
        zaxis1 = np.dot(R, zaxis1) + o
        xp2 = (xaxis[0] / xaxis[2] * camera_matrix[0][0]) + cx
        yp2 = (xaxis[1] / xaxis[2] * camera_matrix[1][1]) + cy
        p2 = (int(xp2), int(yp2))
        cv2.line(frame, (cx, cy), p2, (0, 0, 255), 2)
        xp2 = (yaxis[0] / yaxis[2] * camera_matrix[0][0]) + cx
        yp2 = (yaxis[1] / yaxis[2] * camera_matrix[1][1]) + cy
        p2 = (int(xp2), int(yp2))
        cv2.line(frame, (cx, cy), p2, (0, 255, 0), 2)
        xp1 = (zaxis1[0] / zaxis1[2] * camera_matrix[0][0]) + cx
        yp1 = (zaxis1[1] / zaxis1[2] * camera_matrix[1][1]) + cy
        p1 = (int(xp1), int(yp1))
        xp2 = (zaxis[0] / zaxis[2] * camera_matrix[0][0]) + cx
        yp2 = (zaxis[1] / zaxis[2] * camera_matrix[1][1]) + cy
        p2 = (int(xp2), int(yp2))
        cv2.line(frame, p1, p2, (255, 0, 0), 2)
        cv2.circle(frame, p2, 3, (255, 0, 0), 2)
        return frame

    def build_camera_matrix(self, center_of_face, focal_length):
        cx = int(center_of_face[0])
        cy = int(center_of_face[1])
        camera_matrix = np.zeros((3, 3), dtype='float32')
        camera_matrix[0][0] = focal_length
        camera_matrix[0][2] = cx
        camera_matrix[1][1] = focal_length
        camera_matrix[1][2] = cy
        camera_matrix[2][2] = 1
        return camera_matrix

    def get_model_name(self):
        return self.model

    def write_benchmark(self, title, data):

        with open("headpose_benchmark_timing.txt", "a") as f:
            f.write(str(title) + "\n")
            f.write(str(data) + '\n')
            f.close()
Beispiel #11
0
def main():
    args = build_argparser().parse_args()

    # ------------- 1. Plugin initialization for specified device and load extensions library if specified -------------
    log.info("Creating Inference Engine...")
    ie = IECore()
    if args.cpu_extension and 'CPU' in args.device:
        ie.add_extension(args.cpu_extension, "CPU")

    # -------------------- 2. Reading the IR generated by the Model Optimizer (.xml and .bin files) --------------------
    log.info("Loading network")
    net = ie.read_network(args.model, os.path.splitext(args.model)[0] + ".bin")

    # ---------------------------------- 3. Load CPU extension for support specific layer ------------------------------
    if "CPU" in args.device:
        supported_layers = ie.query_network(net, "CPU")
        not_supported_layers = [
            l for l in net.layers.keys() if l not in supported_layers
        ]
        if len(not_supported_layers) != 0:
            log.error(
                "Following layers are not supported by the plugin for specified device {}:\n {}"
                .format(args.device, ', '.join(not_supported_layers)))
            log.error(
                "Please try to specify cpu extensions library path in sample's command line parameters using -l "
                "or --cpu_extension command line argument")
            sys.exit(1)

    assert len(net.inputs.keys(
    )) == 1, "Sample supports only YOLO V3 based single input topologies"

    # ---------------------------------------------- 4. Preparing inputs -----------------------------------------------
    log.info("Preparing inputs")
    input_blob = next(iter(net.inputs))

    #  Defaulf batch_size is 1
    net.batch_size = 1

    # Read and pre-process input images
    n, c, h, w = net.inputs[input_blob].shape

    if args.labels:
        with open(args.labels, 'r') as f:
            labels_map = [x.strip() for x in f]
    else:
        labels_map = None

    input_stream = 0 if args.input == "cam" else args.input

    is_async_mode = True
    cap = cv2.VideoCapture(input_stream)
    number_input_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    number_input_frames = 1 if number_input_frames != -1 and number_input_frames < 0 else number_input_frames

    wait_key_code = 1

    # Number of frames in picture is 1 and this will be read in cycle. Sync mode is default value for this case
    if number_input_frames != 1:
        ret, frame = cap.read()
    else:
        is_async_mode = False
        wait_key_code = 0

    # ----------------------------------------- 5. Loading model to the plugin -----------------------------------------
    log.info("Loading model to the plugin")
    exec_net = ie.load_network(network=net,
                               num_requests=2,
                               device_name=args.device)

    cur_request_id = 0
    next_request_id = 1
    render_time = 0
    parsing_time = 0

    # ----------------------------------------------- 6. Doing inference -----------------------------------------------
    log.info("Starting inference...")
    print(
        "To close the application, press 'CTRL+C' here or switch to the output window and press ESC key"
    )
    print(
        "To switch between sync/async modes, press TAB key in the output window"
    )
    while cap.isOpened():
        # Here is the first asynchronous point: in the Async mode, we capture frame to populate the NEXT infer request
        # in the regular mode, we capture frame to the CURRENT infer request
        if is_async_mode:
            ret, next_frame = cap.read()
        else:
            ret, frame = cap.read()

        if not ret:
            break

        if is_async_mode:
            request_id = next_request_id
            in_frame = cv2.resize(next_frame, (w, h))
        else:
            request_id = cur_request_id
            in_frame = cv2.resize(frame, (w, h))

        # resize input_frame to network size
        in_frame = in_frame.transpose(
            (2, 0, 1))  # Change data layout from HWC to CHW
        in_frame = in_frame.reshape((n, c, h, w))

        # Start inference
        start_time = time()
        exec_net.start_async(request_id=request_id,
                             inputs={input_blob: in_frame})
        det_time = time() - start_time

        # Collecting object detection results
        objects = list()
        if exec_net.requests[cur_request_id].wait(-1) == 0:
            output = exec_net.requests[cur_request_id].outputs
            start_time = time()
            for layer_name, out_blob in output.items():
                out_blob = out_blob.reshape(
                    net.layers[net.layers[layer_name].parents[0]].shape)
                layer_params = YoloParams(net.layers[layer_name].params,
                                          out_blob.shape[2])
                log.info("Layer {} parameters: ".format(layer_name))
                layer_params.log_params()
                objects += parse_yolo_region(out_blob, in_frame.shape[2:],
                                             frame.shape[:-1], layer_params,
                                             args.prob_threshold)
            parsing_time = time() - start_time

        # Filtering overlapping boxes with respect to the --iou_threshold CLI parameter
        objects = sorted(objects,
                         key=lambda obj: obj['confidence'],
                         reverse=True)
        for i in range(len(objects)):
            if objects[i]['confidence'] == 0:
                continue
            for j in range(i + 1, len(objects)):
                if intersection_over_union(objects[i],
                                           objects[j]) > args.iou_threshold:
                    objects[j]['confidence'] = 0

        # Drawing objects with respect to the --prob_threshold CLI parameter
        objects = [
            obj for obj in objects if obj['confidence'] >= args.prob_threshold
        ]

        if len(objects) and args.raw_output_message:
            log.info("\nDetected boxes for batch {}:".format(1))
            log.info(
                " Class ID | Confidence | XMIN | YMIN | XMAX | YMAX | COLOR ")

        origin_im_size = frame.shape[:-1]
        for obj in objects:
            # Validation bbox of detected object
            if obj['xmax'] > origin_im_size[1] or obj['ymax'] > origin_im_size[
                    0] or obj['xmin'] < 0 or obj['ymin'] < 0:
                continue
            color = (int(min(obj['class_id'] * 12.5,
                             255)), min(obj['class_id'] * 7,
                                        255), min(obj['class_id'] * 5, 255))
            det_label = labels_map[obj['class_id']] if labels_map and len(labels_map) >= obj['class_id'] else \
                str(obj['class_id'])

            if args.raw_output_message:
                log.info(
                    "{:^9} | {:10f} | {:4} | {:4} | {:4} | {:4} | {} ".format(
                        det_label, obj['confidence'], obj['xmin'], obj['ymin'],
                        obj['xmax'], obj['ymax'], color))

            cv2.rectangle(frame, (obj['xmin'], obj['ymin']),
                          (obj['xmax'], obj['ymax']), color, 2)
            cv2.putText(
                frame, "#" + det_label + ' ' +
                str(round(obj['confidence'] * 100, 1)) + ' %',
                (obj['xmin'], obj['ymin'] - 7), cv2.FONT_HERSHEY_COMPLEX, 0.6,
                color, 1)

        # Draw performance stats over frame
        inf_time_message = "Inference time: N\A for async mode" if is_async_mode else \
            "Inference time: {:.3f} ms".format(det_time * 1e3)
        render_time_message = "OpenCV rendering time: {:.3f} ms".format(
            render_time * 1e3)
        async_mode_message = "Async mode is on. Processing request {}".format(cur_request_id) if is_async_mode else \
            "Async mode is off. Processing request {}".format(cur_request_id)
        parsing_message = "YOLO parsing time is {:.3f} ms".format(
            parsing_time * 1e3)

        cv2.putText(frame, inf_time_message, (15, 15),
                    cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)
        cv2.putText(frame, render_time_message, (15, 45),
                    cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1)
        cv2.putText(frame, async_mode_message,
                    (10, int(origin_im_size[0] - 20)),
                    cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1)
        cv2.putText(frame, parsing_message, (15, 30), cv2.FONT_HERSHEY_COMPLEX,
                    0.5, (10, 10, 200), 1)

        start_time = time()
        if not args.no_show:
            cv2.imshow("DetectionResults", frame)
        render_time = time() - start_time

        if is_async_mode:
            cur_request_id, next_request_id = next_request_id, cur_request_id
            frame = next_frame

        if not args.no_show:
            key = cv2.waitKey(wait_key_code)

            # ESC key
            if key == 27:
                break
            # Tab key
            if key == 9:
                exec_net.requests[cur_request_id].wait()
                is_async_mode = not is_async_mode
                log.info("Switched to {} mode".format(
                    "async" if is_async_mode else "sync"))

    cv2.destroyAllWindows()
Beispiel #12
0
class Facedetection:
    def __init__(self, model_name, threshold, device, extension, version):
        # Load all relevant variables into the class
        self.model_weights = model_name + '.bin'
        self.model_structure = model_name + '.xml'
        self.device = device
        self.extension = extension
        self.threshold = threshold
        self.version = version

        print("--------")
        print("START Facedetection")
        print("--------")

    def load_model(self):
        # Loads the model

        # Initialise the network and save it in the self.network variables
        try:
            self.core = IECore()
            #self.network = self.core.read_network(self.model_structure, self.model_weights) #new version
            self.network = IENetwork(model=self.model_structure,
                                     weights=self.model_weights)
            #log.info("Model is loaded as: ", self.network)
            self.input_name = next(iter(self.network.inputs))
        except Exception as e:
            log.error("Could not initialise the network")
            raise ValueError("Could not initialise the network")
        print("--------")
        print("Model is loaded as self.network : " + str(self.network))

        # Add extension
        if "CPU" in self.device and (self.version == 2019):
            log.info("Add extension: ({})".format(str(self.extension)))
            self.core.add_extension(self.extension, self.device)

        # Check supported layers
        self.check_model()
        # Load the network into an executable network
        self.exec_network = self.core.load_network(network=self.network,
                                                   device_name=self.device,
                                                   num_requests=1)
        #log.info("Exec_network is loaded as:" + str(self.exec_network))
        #print("Exec_network is loaded as:" + str(self.exec_network))
        #print("--------")

        model_data = [
            self.model_weights, self.model_structure, self.device,
            self.extension, self.threshold
        ]
        modellayers = self.getmodellayers()

        return model_data, modellayers

    def getmodellayers(self):
        # Get all necessary model values.
        self.input_name = next(iter(self.network.inputs))
        self.output_name = next(iter(self.network.outputs))
        self.input_shape = self.network.inputs[self.input_name].shape

        # Gets all input and outputs. Just for information.
        self.input_name_all = [i for i in self.network.inputs.keys()]
        self.input_name_all_02 = self.network.inputs.keys()
        self.input_name_first_entry = self.input_name_all[0]

        self.output_name_type = self.network.outputs[self.output_name]
        self.output_names = [i for i in self.network.outputs.keys()]
        self.output_names_total_entries = len(self.output_names)

        self.output_shape = self.network.outputs[self.output_name].shape
        self.output_shape_second_entry = self.network.outputs[
            self.output_name].shape[1]
        modellayers = [self.input_name, self.input_name_all, self.input_name_all_02,  self.input_name_first_entry, self.input_shape, self.output_name, self.output_name_type, \
            self.output_names, self.output_names_total_entries, self.output_shape, self.output_shape_second_entry]

        return modellayers

    def check_model(self):

        # Check for supported layers
        log.info("Checking for unsupported layers")
        if "CPU" in self.device:
            supported_layers = self.core.query_network(self.network, "CPU")
            print("--------")
            print("Check for supported layers")
            #print("supported_layers: " + str(supported_layers))
            not_supported_layers = [
                l for l in self.network.layers.keys()
                if l not in supported_layers
            ]
            print("--------")
            if len(not_supported_layers) != 0:
                log.error("Following layers are not supported:",
                          not_supported_layers)
                #print("Sorry, not all layers are supported")
                sys.exit(1)
        log.info("All layers are supported")

    def predict(self, frame):
        # Starts predictions face_detection
        print("--------")
        print("Starts predictions for face_detection")

        # Pre-process the image
        preprocessed_image = self.preprocess_input(frame)

        # Starts synchronous inference
        print("Start syncro inference")
        log.info("Start syncro inference face detection")

        outputs = self.exec_network.infer(
            {self.input_name: preprocessed_image})
        print("Output of the inference request: " + str(outputs))

        requestid = 0
        outputs = self.exec_network.requests[requestid].outputs[
            self.output_name]
        print("Output of the inference request (self.output_name): " +
              str(outputs))
        processed_image, frame_cropped, coords = self.preprocess_output(
            outputs, frame)
        #cv2.imwrite("output/cropped_image_02.png", frame_cropped)
        print("End predictions face_detection")
        print("--------")

        return processed_image, frame_cropped, coords

    def preprocess_input(self, frame):
        # In this function the original image is resized, transposed and reshaped to fit the model requirements.
        print("--------")
        print("Start preprocess image")
        log.info("Start preprocess image face detection")
        n, c, h, w = (self.core, self.input_shape)[1]
        print(w, h)
        preprocessed_image = cv2.resize(frame, (w, h))
        preprocessed_image = preprocessed_image.transpose((2, 0, 1))
        preprocessed_image = preprocessed_image.reshape((n, c, h, w))
        print(
            "The input shape from the face detection is n= ({})  c= ({})  h= ({})  w= ({})"
            .format(str(n), str(c), str(h), str(w)))
        log.info(
            "The input shape from the face detection is n= ({})  c= ({})  h= ({})  w= ({})"
            .format(str(n), str(c), str(h), str(w)))
        print("Image is now [BxCxHxW]: " + str(preprocessed_image.shape))
        log.info("Image is now [BxCxHxW]: " + str(preprocessed_image.shape))
        print("End: preprocess image")
        print("--------")

        return preprocessed_image

    def preprocess_output(self, outputs, frame):

        coords = []
        coords_02 = []
        print("--------")
        print("Start: preprocess_output")
        log.info("Start preprocess_output face_detection")
        print("Bounding box input: " + str(outputs))
        self.initial_w = frame.shape[1]
        self.initial_h = frame.shape[0]
        print("Original image size is (W x H): " + str(self.initial_w) + "x" +
              str(self.initial_h))
        for obj in outputs[0][0]:
            confidence = obj[2]
            if confidence >= self.threshold:
                obj[3] = int(obj[3] * self.initial_w)
                obj[4] = int(obj[4] * self.initial_h)
                obj[5] = int(obj[5] * self.initial_w)
                obj[6] = int(obj[6] * self.initial_h)
                coords.append([obj[3], obj[4], obj[5], obj[6]])
                print("Bounding box coordinates face detection: " +
                      str(obj[3]) + " x " + str(obj[4]) + " x " + str(obj[5]) +
                      " x " + str(obj[6]))
                log.info("Bounding box coordinates face detection: " +
                         str(obj[3]) + " x " + str(obj[4]) + " x " +
                         str(obj[5]) + " x " + str(obj[6]))
                self.xmin = int(obj[3])
                self.ymin = int(obj[4])
                self.xmax = int(obj[5])
                self.ymax = int(obj[6])
                cv2.rectangle(frame, ((self.xmin + 10), (self.ymin + 10)),
                              ((self.xmax - 10), (self.ymax - 10)), (0, 0, 0),
                              1)

                # draw line (just for fun)
                cv2.line(frame, (self.xmin, self.ymin),
                         (self.xmin, self.ymin + 20), (0, 0, 0), 3)
                cv2.line(frame, (self.xmin, self.ymin),
                         (self.xmin + 20, self.ymin), (0, 0, 0), 3)

                cv2.line(frame, (self.xmax, self.ymax),
                         (self.xmax, self.ymax - 20), (0, 0, 0), 3)
                cv2.line(frame, (self.xmax, self.ymax),
                         (self.xmax - 20, self.ymax), (0, 0, 0), 3)

                cv2.line(frame, (self.xmax, self.ymin),
                         (self.xmax, self.ymin + 20), (0, 0, 0), 3)
                cv2.line(frame, (self.xmax, self.ymin),
                         (self.xmax - 20, self.ymin), (0, 0, 0), 3)

                cv2.line(frame, (self.xmin, self.ymax),
                         (self.xmin, self.ymax - 20), (0, 0, 0), 3)
                cv2.line(frame, (self.xmin, self.ymax),
                         (self.xmin + 20, self.ymax), (0, 0, 0), 3)

                print("Bounding box coordinates face detection: " +
                      str(self.xmin) + " x " + str(self.ymin) + " x " +
                      str(self.xmax) + " x " + str(self.ymax))
                log.info(
                    "Bounding box coordinates face detection (int)xmin/ymin/xmax/ymax: "
                    + str(self.xmin) + " x " + str(self.ymin) + " x " +
                    str(self.xmax) + " x " + str(self.ymax))

        print("End: boundingbox")
        print("--------")
        frame_cropped = frame.copy()
        frame_cropped = frame_cropped[self.ymin:(self.ymax + 1),
                                      self.xmin:(self.xmax + 1)]
        cv2.imwrite("output/Face_cropped image.png", frame_cropped)
        cv2.imwrite("output/Face_image.png", frame)

        return frame, frame_cropped, coords

    def load_data(self, input_type, input_file):

        print("Start load_data from InputFeeder")
        if input_type == 'video':
            cap = cv2.VideoCapture(input_file)
            print("Input = video")
            log.info("Input = video")
        elif input_type == 'cam':
            cap = cv2.VideoCapture(0)
            print("Input = cam")
            log.info("Input = cam")
        else:
            cap = cv2.imread(input_file)
            print("Input = image")
            log.info("Input = image")

        return cap

    def start(self, frame, inputtype):
        # Start predictions
        if inputtype == 'video' or 'cam':
            try:
                while cap.isOpened():
                    ret, frame = cap.read()
                    if not ret:
                        break
                    frame = self.predict(frame)
                    cap.release()
            except Exception as e:
                print("Could not run Inference: ", e)
                log.info("Could not run Inference: ", e)

        if inputtype == 'image':
            print("Image")
            #image = '/home/pi/KeyBox/face_test.jpg'
            #frame=cv2.imread(image)
            frame = self.predict(frame)
            path = '/home/pi/KeyBox/Face_cropped image.png'
            image = cv2.imread(path)
            cv2.imshow("test", image)
            cv2.waitKey(0)
        cv2.destroyAllWindows()
Beispiel #13
0
class Network:
    """
    Load and configure inference plugins for the specified target devices 
    and performs synchronous and asynchronous modes for the specified infer requests.
    """
    def __init__(self):
        ### TODO: Initialize any class variables desired ###
        self.plugin = None
        self.network = None
        self.input_blob = None
        self.output_blob = None
        self.exec_network = None
        self.infer_request = None

    def load_model(self, model, device="CPU", cpu_extension=None):
        self.plugin = IECore()
        ### TODO: Load the model ###
        ### Load the Inference Engine API
        self.plugin = IECore()

        ### Load IR files into their related class
        model_xml = model
        model_bin = os.path.splitext(model_xml)[0] + ".bin"
        self.network = IENetwork(model=model_xml, weights=model_bin)

        ### Add a CPU extension, if applicable.
        if os.path.isfile(cpu_extension) and "CPU" in device:
            self.plugin.add_extension(cpu_extension, device)

    ### Get the supported layers of the network
        supported_layers_path = self.plugin.query_network(network=self.network,
                                                          device_name="CPU")

        ### Check for any unsupported layers, and let the user know if anything is missing. Exit the program, if so.
        keys = self.network.layers.keys()
        for l in keys:
            unsupported_layers_path = ""
            if l not in supported_layers_path:
                unsupported_layers_path = l
        if len(unsupported_layers_path) != 0:
            #print("Unsupported layers found: {}".format(unsupported_layers_path))
            #print("Check whether the extensions are available to add to IECore.")
            exit(1)

    ### Load the network into the Inference Engine
        self.exec_network = self.plugin.load_network(self.network, "CPU")
        self.input_blob = next(iter(self.network.inputs))
        self.output_blob = next(iter(self.network.outputs))

        #print("IR is successfully loaded into Inference Engine.")
        #print

        return
        ### Note: You may need to update the function parameters. ###

    def get_input_shape(self):
        ### TODO: Return the shape of the input layer ###
        #it returns the shape of the input layer
        return self.network.inputs[self.input_blob].shape

    def exec_net(self, request_id, image):
        #start acynchronous request
        self.infer_request_handle = self.exec_network.start_async(
            request_id=request_id, inputs={self.input_blob: image})
        return self.exec_network

    def wait(self, request_id):
        ### TODO: Wait for the request to be complete. ###
        ### TODO: Return any necessary information ###
        ### Note: You may need to update the function parameters. ###
        wait_n = self.exec_network.requests[request_id].wait(-1)
        return wait_n

    def get_output(self, request_id):
        ### TODO: Extract and return the output results
        ### Note: You may need to update the function parameters. ###
        return self.exec_network.requests[request_id].outputs[self.output_blob]
Beispiel #14
0
class Gaze_Estimator:
    '''
    Class for the Gaze Estimator model.
    '''
    def __init__(self, model, weights, device, extensions=None):
        self.device = device
        self.extensions = extensions
        self.model = model
        self.weights = weights
        self.plugin = None
        self.network = None
        self.input = None
        self.inputBlob = None
        self.output = None
        self.outputBlob = None
        self.execNetwork = None
        self.inferRequest = None

    def load_model(self):
        self.plugin = IECore()

        if self.extensions:
            self.plugin.add_extension(self.extensions, self.device)

        self.network = self.plugin.read_network(model=self.model,
                                                weights=self.weights)
        supported_layers = self.plugin.query_network(self.network, self.device)
        unsupported_layers = []

        for layer in self.network.layers.keys():
            if layer not in supported_layers:
                unsupported_layers.append(layer)

        if len(unsupported_layers) != 0:
            log.info(
                'Please add Extension as some unsupported layers currently exist'
            )

        self.execNetwork = self.plugin.load_network(self.network,
                                                    self.device,
                                                    num_requests=1)
        self.inputBlob = [i for i in self.network.inputs.keys()]
        self.outputBlob = [i for i in self.network.outputs.keys()]
        self.input_shape = self.network.inputs[self.inputBlob[1]].shape

    def predict(self, left_eye_image, right_eye_image, head_pose_angle):
        le_img_processed, re_img_processed = self.preprocess_input(
            left_eye_image, right_eye_image)
        outputs = self.execNetwork.infer({
            'head_pose_angles': head_pose_angle,
            'left_eye_image': le_img_processed,
            'right_eye_image': re_img_processed
        })
        mouse_coords, gaze_vec = self.preprocess_output(
            outputs, head_pose_angle)

        return mouse_coords, gaze_vec

    def preprocess_input(self, left_eye_image, right_eye_image):
        le_img_resized = cv2.resize(left_eye_image,
                                    (self.input_shape[3], self.input_shape[2]))
        le_img_processed = np.transpose(np.expand_dims(le_img_resized, axis=0),
                                        (0, 3, 1, 2))

        re_img_resized = cv2.resize(right_eye_image,
                                    (self.input_shape[3], self.input_shape[2]))
        re_img_processed = np.transpose(np.expand_dims(re_img_resized, axis=0),
                                        (0, 3, 1, 2))

        return le_img_processed, re_img_processed

    def preprocess_output(self, outputs, head_pose_angle):
        gaze_vec = outputs[self.outputBlob[0]].tolist()[0]
        angle_r_fc = head_pose_angle[2]
        cosine = math.cos(angle_r_fc * math.pi / 180)
        sine = math.sin(angle_r_fc * math.pi / 180)

        x_val = gaze_vec[0] * cosine + gaze_vec[1] * sine
        y_val = gaze_vec[0] * sine + gaze_vec[1] * sine

        return (x_val, y_val), gaze_vec
Beispiel #15
0
    def draw_inference_from_video(self):
        """
        Call this functions after creating object of VideoInfer
        class by passing all the required parameters to draw inference.
        """

        log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)
        log.info("Creating Inference Engine...")
        ie = IECore()
        if self.extention_lib_path and 'CPU' in self.device:
            ie.add_extension(self.extention_lib_path, "CPU")
        # Read IR
        log.info("Loading network files:\n\t{}\n\t{}".format(self.model_xml, self.model_path))
        net = IENetwork(model=self.model_xml, weights=self.model_path)

        if "CPU" in self.device:
            supported_layers = ie.query_network(net, "CPU")
            not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers]
            if len(not_supported_layers) != 0:
                log.error("Following layers are not supported by the plugin for specified device {}:\n {}".
                          format(self.device, ', '.join(not_supported_layers)))
                log.error("Please try to specify cpu extensions library path in config")
                sys.exit(1)

        img_info_input_blob = None
        feed_dict = {}
        for blob_name in net.inputs:
            if len(net.inputs[blob_name].shape) == 4:
                input_blob = blob_name
            elif len(net.inputs[blob_name].shape) == 2:
                img_info_input_blob = blob_name
            else:
                raise RuntimeError("Unsupported {}D input layer '{}'. Only 2D and 4D input layers are supported"
                                   .format(len(net.inputs[blob_name].shape), blob_name))

        assert len(net.outputs) == 1, "Demo supports only single output topologies"

        out_blob = next(iter(net.outputs))
        log.info("Loading IR to the plugin...")
        exec_net = ie.load_network(network=net, num_requests=2, device_name=self.device)
        # Read and pre-process input image
        n, c, h, w = net.inputs[input_blob].shape
        if img_info_input_blob:
            feed_dict[img_info_input_blob] = [h, w, 1]

        if self.input_stream == 'cam':
            input_stream = 0
        elif self.input_stream.startswith('rtsp'):
            log.info('Using RTSP feed')
            input_stream = self.input_stream
        else:
            input_stream = self.input_stream
            assert os.path.isfile(self.input_stream), "Specified input file doesn't exist"
        if self.labels:
            with open(self.labels, 'r') as f:
                labels_map = [x.strip() for x in f]
        else:
            labels_map = None

        cap = cv2.VideoCapture(input_stream)

        cur_request_id = 0
        next_request_id = 1

        log.info("Starting inference in async mode...")
        
        render_time = 0
        ret, frame = cap.read()

        print("To close the application, press 'CTRL+C' here or switch to the output window and press ESC key")
        print("To switch between sync/async modes, press TAB key in the output window")
        
        while cap.isOpened():
            if self.async_mode:
                ret, next_frame = cap.read()
            else:
                ret, frame = cap.read()
            if not ret:
                break
            initial_w = cap.get(3)
            initial_h = cap.get(4)
            # Main sync point:
            # in the truly Async mode we start the NEXT infer request, while waiting for the CURRENT to complete
            # in the regular mode we start the CURRENT request and immediately wait for it's completion
            inf_start = time.time()
            if self.async_mode:
                in_frame = cv2.resize(next_frame, (w, h))
                in_frame = in_frame.transpose((2, 0, 1))  # Change data layout from HWC to CHW
                in_frame = in_frame.reshape((n, c, h, w))
                feed_dict[input_blob] = in_frame
                exec_net.start_async(request_id=next_request_id, inputs=feed_dict)
            else:
                in_frame = cv2.resize(frame, (w, h))
                in_frame = in_frame.transpose((2, 0, 1))  # Change data layout from HWC to CHW
                in_frame = in_frame.reshape((n, c, h, w))
                feed_dict[input_blob] = in_frame
                exec_net.start_async(request_id=cur_request_id, inputs=feed_dict)
            if exec_net.requests[cur_request_id].wait(-1) == 0:
                inf_end = time.time()
                det_time = inf_end - inf_start

                # Parse detection results of the current request
                res = exec_net.requests[cur_request_id].outputs[out_blob]
                detections = list()
                # print(res[0][0].shape)
                # return
            for obj in res[0][0]:
                # Draw only objects when probability more than specified threshold
                if obj[2] > self.prob_thresh:
                    detection_data = dict()
                    xmin = int(obj[3] * initial_w)
                    ymin = int(obj[4] * initial_h)
                    xmax = int(obj[5] * initial_w)
                    ymax = int(obj[6] * initial_h)
                    class_id = int(obj[1])
                    detection_data['class'] = class_id
                    detection_data['bbox'] = [(xmin, ymin), (xmax, ymax)]
                    detections.append(detection_data)
                    # Draw box and label\class_id
                    color = (min(class_id * 12.5, 255),
                                min(class_id * 7, 255),
                                min(class_id * 5, 255))
                    cv2.rectangle(frame, (xmin, ymin), (xmax, ymax),
                                    color, 2)
                    det_label = labels_map[class_id] if labels_map else str(class_id)
                    cv2.putText(frame, det_label + ' ' + str(round(obj[2] * 100, 1)) + ' %', (xmin, ymin - 7),
                                cv2.FONT_HERSHEY_COMPLEX, 0.6, color, 1)
                if len(detections): yield(detections)
                # Draw performance stats
                inf_time_message = "Inference time: N\A for async mode" if self.async_mode else \
                    "Inference time: {:.3f} ms".format(det_time * 1000)
                render_time_message = "OpenCV rendering time: {:.3f} ms".format(render_time * 1000)
                async_mode_message = "Async mode is on. Processing request {}".format(cur_request_id) if self.async_mode else \
                    "Async mode is off. Processing request {}".format(cur_request_id)

                print('fps', 1/(render_time+det_time))
                cv2.putText(frame, inf_time_message, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)
                cv2.putText(frame, render_time_message, (15, 30), cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1)
                cv2.putText(frame, async_mode_message, (10, int(initial_h - 20)), cv2.FONT_HERSHEY_COMPLEX, 0.5,
                            (10, 10, 200), 1)

            #
            render_start = time.time()
            cv2.imshow("Detection Results", frame) # Comment this line to stop rendering output
            render_end = time.time()
            render_time = render_end - render_start
            if self.async_mode:
                cur_request_id, next_request_id = next_request_id, cur_request_id
                frame = next_frame

            key = cv2.waitKey(1)
            if key == 27:
                break
            if (9 == key):
                self.async_mode = not self.async_mode
                log.info("Switched to {} mode".format("async" if self.async_mode else "sync"))

        cv2.destroyAllWindows()
Beispiel #16
0
class FacialLandmarksDetection:
    '''
    Class for the Face Detection Model.
    '''
    def __init__(self, model_name, device='CPU', extensions=None):
        '''
        TODO: Use this to set your instance variables.
        '''
        #From params
        self.model_weights = model_name + '.bin'
        self.model_structure = model_name + '.xml'
        self.device = device
        self.extensions = extensions

        #For application
        self.core = None
        self.network = None
        self.exec_net = None
        self.unsupported_layers = None
        self.image = None

    def load_model(self):
        '''
        TODO: You will need to complete this method.
        This method is for loading the model to the device specified by the user.
        If your model requires any Plugins, this is where you can load them.
        '''
        self.core = IECore()
        self.network = self.core.read_network(model=self.model_structure, weights=self.model_weights)
        supported_layers = self.core.query_network(network=self.network, device_name=self.device)
        self.unsupported_layers = [l for l in self.network.layers.keys() if l not in supported_layers]

        self.check_model()
        logging.info("Checked facial-landmark-dection model")

        self.exec_net = self.core.load_network(network=self.network, device_name=self.device,num_requests=1)
        self.input = next(iter(self.network.inputs))
        self.output = next(iter(self.network.outputs))

    def predict(self, image):
        '''
        TODO: You will need to complete this method.
        This method is meant for running predictions on the input image.
        '''
        img_processed = self.preprocess_input(image.copy())
        self.image = image
        self.exec_net.start_async(request_id= 0, inputs={self.input: img_processed})
        while self.exec_net.requests[0].wait(-1) == 0:
            result = self.exec_net.requests[0].outputs[self.output]
            return self.preprocess_output(result[0])

    def check_model(self):
        if len(self.unsupported_layers)!=0 :
            self.core.add_extension(self.extensions, self.device)
            supported_layers = self.core.query_network(network = self.network, device_name=self.device)
            self.unsupported_layers = [l for l in self.network.layers.keys() if l not in supported_layers]
            if len(self.unsupported_layers)!=0:
                logging.error("Unsupported layers")
                exit(1)

    def preprocess_input(self, image):
        '''
        Before feeding the data into the model for inference,
        you might have to preprocess it. This function is where you can do that.
        '''
        net_input_shape = []
        net_input_shape = self.network.inputs[self.input].shape
        p_frame = None
        p_frame = cv2.resize(image, (net_input_shape[3], net_input_shape[2]))
        p_frame = p_frame.transpose(2, 0, 1)
        p_frame = p_frame.reshape(1, *p_frame.shape)
        return p_frame


    def preprocess_output(self, outputs):
        '''
        Before feeding the output of this model to the next model,
        you might have to preprocess the output. This function is where you can do that.
        '''
        both_eye_coors = []
        both_eye_coors.append(outputs[0].tolist()[0][0]*self.image.shape[1])
        both_eye_coors.append(outputs[1].tolist()[0][0]*self.image.shape[0])
        both_eye_coors.append(outputs[2].tolist()[0][0]*self.image.shape[1])
        both_eye_coors.append(outputs[3].tolist()[0][0]*self.image.shape[0])
        both_eye_coors = [round(x) for x in both_eye_coors]
        return self.image[both_eye_coors[1]-20 : both_eye_coors[1]+20 , both_eye_coors[0]-20:both_eye_coors[0]+20],self.image[both_eye_coors[3]-20 : both_eye_coors[3]+20 , both_eye_coors[2]-20:both_eye_coors[2]+20], both_eye_coors
Beispiel #17
0
class Model_HeadPoseEstimation:

    def __init__(self, model_name, device='CPU', extensions=None):
        
        self.model_name = model_name
        self.device = device
        self.extensions = extensions
        self.plugin = None
        self.network = None
        self.exec_net = None
        self.in_name = None
        self.in_shape = None
        self.out_name = None

    def load_model(self):
  
        model_structure = self.model_name
        model_weights = self.model_name.split('.')[0]+'.bin'

        self.plugin = IECore()
      
        if self.extensions and 'CPU' in self.device:
            self.plugin.add_extension(self.extensions,self.device)

        self.network = IENetwork(model=model_structure, weights=model_weights)

        self.check_model()

        self.exec_net = self.plugin.load_network(network=self.network, device_name=self.device,num_requests=1)
        
        self.in_name = next(iter(self.network.inputs))
        self.in_shape = self.network.inputs[self.in_name].shape
        self.out_name = [i for i in self.network.outputs.keys()]

    def predict(self, image):
    
        processed_image = self.preprocess_input(image.copy())
        outputs = self.exec_net.infer({self.in_name:processed_image})
        final = self.preprocess_output(outputs)
        return final

    def check_model(self):  

        if self.device == "CPU":     
            supported_layers = self.plugin.query_network(network=self.network, device_name=self.device)  
            notsupported_layers = [l for l in self.network.layers.keys() if l not in supported_layers]

            if len(notsupported_layers) != 0:
                logging.error("[ERROR] Unsupported layers found: {}".format(notsupported_layers))
                sys.exit(1)

    def preprocess_input(self, image):

        image_processed = cv2.resize(image,(self.in_shape[3], self.in_shape[2]))
        image_processed = image_processed.transpose(2, 0, 1)
        image_processed = image_processed.reshape(1, *image_processed.shape)
        return image_processed

    def preprocess_output(self, outputs):

        preprocessed_outputs = []
        preprocessed_outputs.append(outputs['angle_y_fc'].tolist()[0][0])
        preprocessed_outputs.append(outputs['angle_p_fc'].tolist()[0][0])
        preprocessed_outputs.append(outputs['angle_r_fc'].tolist()[0][0])
        return preprocessed_outputs
Beispiel #18
0
class Network:
    """
    Load and configure inference plugins for the specified target devices 
    and performs synchronous and asynchronous modes for the specified infer requests.
    """
    def __init__(self):
        ### TODO: Initialize any class variables desired ###

        self.plugin = None
        self.net = None
        self.input_blob = None
        self.output_blob = None
        self.exec_network = None
        self.infer_request = None
        return

    def load_model(self, model, CPU_EXTENSION, DEVICE, console_output=False):
        ### TODO: Load the model ###
        model_xml = model
        model_bin = os.path.splitext(model_xml)[0] + ".bin"

        self.plugin = IECore()
        self.net = IENetwork(model=model_xml, weights=model_bin)
        ### TODO: Check for supported layers ###
        #Adding cpu extension if unsupported layer is found

        supported_layers = self.plugin.query_network(network=self.net,
                                                     device_name=DEVICE)
        unsupported_layers = [
            l for l in self.net.layers.keys() if l not in supported_layers
        ]
        if len(unsupported_layers) != 0:
            self.plugin.add_extension(CPU_EXTENSION, DEVICE)
            print("CPU Extension added")

        ### TODO: Add any necessary extensions ###
        ### TODO: Return the loaded inference plugin ###

        self.exec_network = self.plugin.load_network(self.net, DEVICE)

        ### input and output layer
        self.input_blob = next(iter(self.net.inputs))
        self.output_blob = next(iter(self.net.outputs))

        return

    def get_input_shape(self):
        ### TODO: Return the shape of the input layer ###

        return self.net.inputs[self.input_blob].shape

    def exec_net(self, frame):
        ### TODO: Start an asynchronous request ###
        ### TODO: Return any necessary information ###
        ### Note: You may need to update the function parameters. ###

        self.exec_network.start_async(request_id=0,
                                      inputs={self.input_blob: frame})

        return

    def wait(self):
        ### TODO: Wait for the request to be complete. ###
        ### TODO: Return any necessary information ###
        ### Note: You may need to update the function parameters. ###
        status = self.exec_network.requests[0].wait(-1)
        return status

    def get_output(self):
        ### TODO: Extract and return the output results
        ### Note: You may need to update the function parameters. ###

        return self.exec_network.requests[0].outputs
Beispiel #19
0
class Benchmark:
    def __init__(self, device: str, number_infer_requests: int = None, number_iterations: int = None,
                 duration_seconds: int = None, api_type: str = 'async'):
        self.device = device
        self.ie = IECore()
        self.nireq = number_infer_requests
        self.niter = number_iterations
        self.duration_seconds = get_duration_seconds(duration_seconds, self.niter, self.device)
        self.api_type = api_type

    def __del__(self):
        del self.ie

    def add_extension(self, path_to_extension: str=None, path_to_cldnn_config: str=None):
        if path_to_cldnn_config:
            self.ie.set_config({'CONFIG_FILE': path_to_cldnn_config}, GPU_DEVICE_NAME)
            logger.info('GPU extensions is loaded {}'.format(path_to_cldnn_config))

        if path_to_extension:
            self.ie.add_extension(extension_path=path_to_extension, device_name=CPU_DEVICE_NAME)
            logger.info('CPU extensions is loaded {}'.format(path_to_extension))

    def get_version_info(self) -> str:
        logger.info('InferenceEngine:\n{: <9}{:.<24} {}'.format('', 'API version', get_version()))
        version_string = 'Device info\n'
        for device, version in self.ie.get_versions(self.device).items():
            version_string += '{: <9}{}\n'.format('', device)
            version_string += '{: <9}{:.<24}{} {}.{}\n'.format('', version.description, ' version', version.major,
                                                               version.minor)
            version_string += '{: <9}{:.<24} {}\n'.format('', 'Build', version.build_number)
        return version_string

    def set_config(self, config = {}):
        for device in config.keys():
            self.ie.set_config(config[device], device)

    def read_network(self, path_to_model: str):
        model_filename = os.path.abspath(path_to_model)
        head, ext = os.path.splitext(model_filename)
        weights_filename = os.path.abspath(head + BIN_EXTENSION) if ext == XML_EXTENSION else ""
        ie_network = self.ie.read_network(model_filename, weights_filename)
        return ie_network

    def load_network(self, ie_network: IENetwork, config = {}):
        exe_network = self.ie.load_network(ie_network,
                                           self.device,
                                           config=config,
                                           num_requests=1 if self.api_type == 'sync' else self.nireq or 0)
        # Number of requests
        self.nireq = len(exe_network.requests)

        return exe_network

    def import_network(self, path_to_file : str, config = {}):
        exe_network = self.ie.import_network(model_file=path_to_file,
                                             device_name=self.device,
                                             config=config,
                                             num_requests=1 if self.api_type == 'sync' else self.nireq or 0)
        # Number of requests
        self.nireq = len(exe_network.requests)
        return exe_network

    def infer(self, exe_network, batch_size, progress_bar=None):
        progress_count = 0
        infer_requests = exe_network.requests

        # warming up - out of scope
        if self.api_type == 'sync':
            infer_requests[0].infer()
        else:
            infer_requests[0].async_infer()
            status = exe_network.wait()
            if status != StatusCode.OK:
                raise Exception("Wait for all requests is failed with status code {}!".format(status))

        start_time = datetime.utcnow()
        exec_time = 0
        iteration = 0

        times = []
        in_fly = set()
        # Start inference & calculate performance
        # to align number if iterations to guarantee that last infer requests are executed in the same conditions **/
        while (self.niter and iteration < self.niter) or \
              (self.duration_seconds and exec_time < self.duration_seconds) or \
              (self.api_type == 'async' and iteration % self.nireq):
            if self.api_type == 'sync':
                infer_requests[0].infer()
                times.append(infer_requests[0].latency)
            else:
                infer_request_id = exe_network.get_idle_request_id()
                if infer_request_id < 0:
                    status = exe_network.wait(num_requests=1)
                    if status != StatusCode.OK:
                        raise Exception("Wait for idle request failed!")
                    infer_request_id = exe_network.get_idle_request_id()
                    if infer_request_id < 0:
                        raise Exception("Invalid request id!")
                if infer_request_id in in_fly:
                    times.append(infer_requests[infer_request_id].latency)
                else:
                    in_fly.add(infer_request_id)
                infer_requests[infer_request_id].async_infer()
            iteration += 1

            exec_time = (datetime.utcnow() - start_time).total_seconds()

            if progress_bar:
              if self.duration_seconds:
                  # calculate how many progress intervals are covered by current iteration.
                  # depends on the current iteration time and time of each progress interval.
                  # Previously covered progress intervals must be skipped.
                  progress_interval_time = self.duration_seconds / progress_bar.total_num
                  new_progress = int(exec_time / progress_interval_time - progress_count)
                  progress_bar.add_progress(new_progress)
                  progress_count += new_progress
              elif self.niter:
                  progress_bar.add_progress(1)

        # wait the latest inference executions
        status = exe_network.wait()
        if status != StatusCode.OK:
            raise Exception("Wait for all requests is failed with status code {}!".format(status))

        total_duration_sec = (datetime.utcnow() - start_time).total_seconds()
        for infer_request_id in in_fly:
            times.append(infer_requests[infer_request_id].latency)
        times.sort()
        latency_ms = median(times)
        fps = batch_size * 1000 / latency_ms if self.api_type == 'sync' else batch_size * iteration / total_duration_sec
        if progress_bar:
            progress_bar.finish()
        return fps, latency_ms, total_duration_sec, iteration
class Model_HeadPoseEstimation:
    '''
    Class for the Face Detection Model.
    '''
    def __init__(self, model_name, device='CPU', extensions=None):
        '''
        TODO: Use this to set your instance variables.
        '''
        self.model_name = model_name
        self.device = device
        self.extensions = extensions
        self.model_weights = self.model_name.split(".")[0] + '.bin'
        self.input_name = None
        self.input_shape = None
        self.output_names = None
        self.output_shape = None
        self.plugin = None
        self.network = None
        self.exec_net = None

    def load_model(self):
        '''
        TODO: You will need to complete this method.
        This method is for loading the model to the device specified by the user.
        If your model requires any Plugins, this is where you can load them.
        '''
        self.plugin = IECore()
        self.network = self.plugin.read_network(model=self.model_name,
                                                weights=self.model_weights)
        self.supported_layers = self.plugin.query_network(
            network=self.network, device_name=self.device)
        self.unsupported_layers = [
            layer for layer in self.network.layers.keys()
            if layer not in self.supported_layers
        ]

        if (not self.check_model()):
            exit(1)

        self.exec_net = self.plugin.load_network(network=self.network,
                                                 device_name=self.device,
                                                 num_requests=1)
        self.input_name = next(iter(self.network.inputs))
        self.input_shape = self.network.inputs[self.input_name].shape
        self.output_names = [i for i in self.network.outputs.keys()]

    def predict(self, image):
        '''
        TODO: You will need to complete this method.
        This method is meant for running predictions on the input image.
        '''
        img2 = self.preprocess_input(image.copy())
        outputs = self.exec_net.infer({self.input_name: img2})
        return self.preprocess_output(outputs)

    def check_model(self):
        # check for unsupported layers
        if len(self.unsupported_layers) != 0 and self.device == 'CPU':
            print("unsupported layers :{}".format(self.unsupported_layers))
            if not self.extensions == None:
                self.plugin.add_extension(self.extensions, self.device)
                self.supported_layers = self.plugin.query_network(
                    network=self.network, device_name=self.device)
                self.unsupported_layers = [
                    lalyer for lalyer in self.network.layers.keys()
                    if lalyer not in self.supported_layers
                ]
                if len(self.unsupported_layers) != 0:
                    print("unsupported layers found")
                    return False
            else:
                print("cpu extension path not found")
                return False
        return True

    def preprocess_input(self, image):
        '''
        Before feeding the data into the model for inference,
        you might have to preprocess it. This function is where you can do that.
        '''
        resized = cv2.resize(image, (self.input_shape[3], self.input_shape[2]))
        return np.transpose(np.expand_dims(resized, axis=0), (0, 3, 1, 2))

    def preprocess_output(self, outputs):
        '''
        Before feeding the output of this model to the next model,
        you might have to preprocess the output. This function is where you can do that.
        '''
        return [
            outputs['angle_y_fc'].tolist()[0][0],
            outputs['angle_p_fc'].tolist()[0][0],
            outputs['angle_r_fc'].tolist()[0][0]
        ]
def main():  # noqa
    log.basicConfig(format='[ %(levelname)s ] %(message)s', level=log.INFO, stream=sys.stdout)
    args = parse_args()

    # ---------------------------Step 1. Initialize inference engine core--------------------------------------------------
    log.info('Creating Inference Engine')
    ie = IECore()

    if args.extension and args.device == 'CPU':
        log.info(f'Loading the {args.device} extension: {args.extension}')
        ie.add_extension(args.extension, args.device)

    if args.config and args.device in ('GPU', 'MYRIAD', 'HDDL'):
        log.info(f'Loading the {args.device} configuration: {args.config}')
        ie.set_config({'CONFIG_FILE': args.config}, args.device)

    # ---------------------------Step 2. Read a model in OpenVINO Intermediate Representation or ONNX format---------------
    log.info(f'Reading the network: {args.model}')
    # (.xml and .bin files) or (.onnx file)
    net = ie.read_network(model=args.model)

    if len(net.input_info) != 1:
        log.error('The sample supports only single input topologies')
        return -1

    if len(net.outputs) != 1 and not ('boxes' in net.outputs or 'labels' in net.outputs):
        log.error('The sample supports models with 1 output or with 2 with the names "boxes" and "labels"')
        return -1

    # ---------------------------Step 3. Configure input & output----------------------------------------------------------
    log.info('Configuring input and output blobs')
    # Get name of input blob
    input_blob = next(iter(net.input_info))

    # Set input and output precision manually
    net.input_info[input_blob].precision = 'U8'

    if len(net.outputs) == 1:
        output_blob = next(iter(net.outputs))
        net.outputs[output_blob].precision = 'FP32'
    else:
        net.outputs['boxes'].precision = 'FP32'
        net.outputs['labels'].precision = 'U16'

    # ---------------------------Step 4. Loading model to the device-------------------------------------------------------
    log.info('Loading the model to the plugin')
    exec_net = ie.load_network(network=net, device_name=args.device)

    # ---------------------------Step 5. Create infer request--------------------------------------------------------------
    # load_network() method of the IECore class with a specified number of requests (default 1) returns an ExecutableNetwork
    # instance which stores infer requests. So you already created Infer requests in the previous step.

    # ---------------------------Step 6. Prepare input---------------------------------------------------------------------
    original_image = cv2.imread(args.input)
    image = original_image.copy()
    _, _, net_h, net_w = net.input_info[input_blob].input_data.shape

    if image.shape[:-1] != (net_h, net_w):
        log.warning(f'Image {args.input} is resized from {image.shape[:-1]} to {(net_h, net_w)}')
        image = cv2.resize(image, (net_w, net_h))

    # Change data layout from HWC to CHW
    image = image.transpose((2, 0, 1))
    # Add N dimension to transform to NCHW
    image = np.expand_dims(image, axis=0)

    # ---------------------------Step 7. Do inference----------------------------------------------------------------------
    log.info('Starting inference in synchronous mode')
    res = exec_net.infer(inputs={input_blob: image})

    # ---------------------------Step 8. Process output--------------------------------------------------------------------
    # Generate a label list
    if args.labels:
        with open(args.labels, 'r') as f:
            labels = [line.split(',')[0].strip() for line in f]

    output_image = original_image.copy()
    h, w, _ = output_image.shape

    if len(net.outputs) == 1:
        res = res[output_blob]
        # Change a shape of a numpy.ndarray with results ([1, 1, N, 7]) to get another one ([N, 7]),
        # where N is the number of detected bounding boxes
        detections = res.reshape(-1, 7)
    else:
        detections = res['boxes']
        labels = res['labels']
        # Redefine scale coefficients
        w, h = w / net_w, h / net_h

    for i, detection in enumerate(detections):
        if len(net.outputs) == 1:
            _, class_id, confidence, xmin, ymin, xmax, ymax = detection
        else:
            class_id = labels[i]
            xmin, ymin, xmax, ymax, confidence = detection

        if confidence > 0.5:
            label = int(labels[class_id]) if args.labels else int(class_id)

            xmin = int(xmin * w)
            ymin = int(ymin * h)
            xmax = int(xmax * w)
            ymax = int(ymax * h)

            log.info(f'Found: label = {label}, confidence = {confidence:.2f}, ' f'coords = ({xmin}, {ymin}), ({xmax}, {ymax})')

            # Draw a bounding box on a output image
            cv2.rectangle(output_image, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)

    cv2.imwrite('out.bmp', output_image)
    if os.path.exists('out.bmp'):
        log.info('Image out.bmp created!')
    else:
        log.error('Image out.bmp was not created. Check your permissions.')

    # ----------------------------------------------------------------------------------------------------------------------
    log.info('This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool\n')
    return 0
Beispiel #22
0
class Model_FacialLandmarksDetection:
    '''
    Class for the Facial Landmark Detection Model.
    '''
    def __init__(self, model_name, device='CPU', extension=None):

        self.net = None
        self.net_plug = None
        self.inp_name = None
        self.out_name = None
        self.inp_shape = None
        self.out_shape = None

        self.model = model_name
        self.device = device
        self.ext = extension
        self.weights = self.model.split('.')[0] + '.bin'

    def load_model(self, plugin=None):

        if not plugin:
            self.plugin = IECore()
        else:
            self.plugin = plugin

        self.net = IENetwork(model=self.model, weights=self.weights)

        self.net_plug = self.plugin.load_network(network=self.net,
                                                 device_name=self.device,
                                                 num_requests=1)
        self.inp_name = next(iter(self.net.inputs))
        self.out_name = next(iter(self.net.outputs))
        self.inp_shape = self.net.inputs[self.inp_name].shape

    def predict(self, frame):
        processed_frame = self.preprocess_input(frame.copy())
        out_img = self.net_plug.infer({self.inp_name: processed_frame})
        out_img = self.preprocess_output(out_img)
        ht = frame.shape[0]
        wd = frame.shape[1]
        out_img = out_img * np.array([wd, ht, wd, ht])
        out_img = out_img.astype(np.int32)
        lxmin = out_img[0] - 15
        lymin = out_img[1] - 15
        lxmax = out_img[0] + 15
        lymax = out_img[1] + 15
        rxmin = out_img[2] - 15
        rymin = out_img[3] - 15
        rxmax = out_img[2] + 15
        rymax = out_img[3] + 15
        l = frame[lymin:lymax, lxmin:lxmax]
        r = frame[rymin:rymax, rxmin:rxmax]
        eye_dim = [[lxmin, lymin, lxmax, lymax], [rxmin, rymin, rxmax, rymax]]
        return l, r, eye_dim

    def check_model(self):
        pass

    def preprocess_input(self, frame):

        h = self.inp_shape[2]
        w = self.inp_shape[3]
        reshaped_frame = cv2.resize(frame, (w, h))
        reshaped_frame = reshaped_frame.transpose((2, 0, 1))
        reshaped_frame = reshaped_frame.reshape(1, 3, h, w)
        return reshaped_frame

    def preprocess_output(self, out):

        cell = out[self.out_name][0]
        return (cell[0][0][0], cell[1][0][0], cell[2][0][0], cell[3][0][0])
def main():
    path = os.getcwd()
    print("Welcome to Blindspot Assistance")

    log.basicConfig(format="[ %(levelname)s ] %(message)s",
                    level=log.INFO,
                    stream=sys.stdout)
    args = build_argparser().parse_args()

    model_xml = args.model
    model_bin = os.path.splitext(model_xml)[0] + ".bin"

    log.info("Creating Inference Engine...")
    ie = IECore()
    if args.cpu_threads:
        ie.set_config({'CPU_THREADS_NUM': args.cpu_threads}, args.device)
    if args.cpu_extension and 'CPU' in args.device:
        ie.add_extension(args.cpu_extension, "CPU")
    # Read IR
    log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin))
    net = IENetwork(model=model_xml, weights=model_bin)

    if "CPU" in args.device:
        supported_layers = ie.query_network(net, "CPU")
        not_supported_layers = [
            l for l in net.layers.keys() if l not in supported_layers
        ]
        if len(not_supported_layers) != 0:
            log.error(
                "Following layers are not supported by the plugin for specified device {}:\n {}"
                .format(args.device, ', '.join(not_supported_layers)))
            log.error(
                "Please try to specify cpu extensions library path in sample's command line parameters using -l "
                "or --cpu_extension command line argument")
            sys.exit(1)

    img_info_input_blob = None
    feed_dict = {}
    for blob_name in net.inputs:
        if len(net.inputs[blob_name].shape) == 4:
            input_blob = blob_name
        elif len(net.inputs[blob_name].shape) == 2:
            img_info_input_blob = blob_name
        else:
            raise RuntimeError(
                "Unsupported {}D input layer '{}'. Only 2D and 4D input layers are supported"
                .format(len(net.inputs[blob_name].shape), blob_name))

    assert len(net.outputs) == 1, "Demo supports only single output topologies"

    out_blob = next(iter(net.outputs))
    log.info("Loading IR to the plugin...")
    exec_net = ie.load_network(network=net,
                               num_requests=2,
                               device_name=args.device)
    # Read and pre-process input image
    n, c, h, w = net.inputs[input_blob].shape
    if img_info_input_blob:
        feed_dict[img_info_input_blob] = [h, w, 1]

    if args.input == 'cam':
        input_stream = 0
    else:
        input_stream = args.input
        assert os.path.isfile(args.input), "Specified input file doesn't exist"
    if args.labels:
        with open(args.labels, 'r') as f:
            labels_map = [x.strip() for x in f]
    else:
        labels_map = None

    cap = cv2.VideoCapture(input_stream)

    if args.output:
        FILE_OUTPUT = args.output
        if os.path.isfile(FILE_OUTPUT):
            os.remove(FILE_OUTPUT)
        fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')
        fps = cap.get(cv2.CAP_PROP_FPS)
        out = cv2.VideoWriter(FILE_OUTPUT, fourcc, fps,
                              (int(cap.get(3)), int(cap.get(4))))

    cur_request_id = 0
    next_request_id = 1

    log.info("Starting inference in async mode...")
    is_async_mode = True
    render_time = 0
    ret, frame = cap.read()

    roi = [0, 0, int(cap.get(3) * 0.25),
           int(cap.get(4))]  # ROI: Autoselected 15% of the left

    print(
        "To close the application, press 'CTRL+C' here or switch to the output window and press ESC key"
    )
    print(
        "To switch between sync/async modes, press TAB key in the output window"
    )

    object_time = 0
    alarm = False
    object_detected = False

    while cap.isOpened():
        if is_async_mode:
            ret, next_frame = cap.read()
        else:
            ret, frame = cap.read()
        if not ret:
            break
        initial_w = cap.get(3)
        initial_h = cap.get(4)

        # Selected rectangle overlay
        overlay = frame.copy()
        cv2.rectangle(overlay, (roi[0], roi[1]),
                      (roi[0] + roi[2], roi[1] + roi[3]), (0, 0, 0),
                      -1)  # A filled rectangle
        alpha = 0.3  # Transparency factor.
        cv2.addWeighted(
            overlay, alpha, frame, 1 - alpha, 0, frame
        )  # Following line overlays transparent rectangle over the image

        # Main sync point:
        # in the truly Async mode we start the NEXT infer request, while waiting for the CURRENT to complete
        # in the regular mode we start the CURRENT request and immediately wait for it's completion
        inf_start = time.time()
        if is_async_mode:
            in_frame = cv2.resize(next_frame, (w, h))
            in_frame = in_frame.transpose(
                (2, 0, 1))  # Change data layout from HWC to CHW
            in_frame = in_frame.reshape((n, c, h, w))
            feed_dict[input_blob] = in_frame
            exec_net.start_async(request_id=next_request_id, inputs=feed_dict)
        else:
            in_frame = cv2.resize(frame, (w, h))
            in_frame = in_frame.transpose(
                (2, 0, 1))  # Change data layout from HWC to CHW
            in_frame = in_frame.reshape((n, c, h, w))
            feed_dict[input_blob] = in_frame
            exec_net.start_async(request_id=cur_request_id, inputs=feed_dict)
        if exec_net.requests[cur_request_id].wait(-1) == 0:
            inf_end = time.time()
            det_time = inf_end - inf_start

            # Parse detection results of the current request
            # output_blob = [image_id, label, conf, x_min, y_min, x_max, y_max]
            res = exec_net.requests[cur_request_id].outputs[out_blob]
            for obj in res[0][0]:
                # Draw only objects when probability more than specified threshold
                if obj[2] > args.prob_threshold:
                    xmin = int(obj[3] * initial_w)
                    ymin = int(obj[4] * initial_h)
                    xmax = int(obj[5] * initial_w)
                    ymax = int(obj[6] * initial_h)
                    class_id = int(obj[1])
                    # Draw box and label\class_id
                    if (class_id == 1):
                        color = (0, 255, 0)
                    else:
                        color = (255, 0, 0)
                    #color = (min(class_id * 12.5, 255), min(class_id * 7, 255), min(class_id * 5, 255))
                    cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 1)
                    det_label = labels_map[class_id] if labels_map else str(
                        switch_class(class_id))
                    cv2.putText(
                        frame,
                        det_label + ' ' + str(round(obj[2] * 100, 1)) + ' %',
                        (xmin, ymin - 7), cv2.FONT_HERSHEY_COMPLEX, 0.5, color,
                        1)

                    if (xmin > roi[0] and xmin < roi[0] + roi[2]) or (
                            xmax > roi[0] and xmax < roi[0] + roi[2]) or (
                                xmin < roi[0] and xmax > roi[0] + roi[2]):
                        if (ymin > roi[1] and ymin < roi[1] + roi[3]) or (
                                ymax > roi[1] and ymax < roi[1] + roi[3]) or (
                                    ymin < roi[1] and ymax > roi[1] + roi[3]):
                            object_detected = True
                            last_object = str(switch_class(class_id))

        if object_detected:
            object_time = time.time()
            object_detected = False
            alarm = True
        else:
            if (time.time() - object_time > 2):
                alarm = False
        if alarm:
            cv2.circle(frame, (25, 50), 10, (0, 0, 255), -1)
            cv2.putText(frame, "Last object detected: " + last_object,
                        (40, 55), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255),
                        1)
        else:
            cv2.circle(frame, (25, 50), 10, (0, 255, 0), -1)
            cv2.putText(frame, "Nothing detected", (40, 55),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 255, 0), 1)

        # Draw performance stats
        inf_time_message = "Inference time: N\A for async mode" if is_async_mode else \
            "Inference time: {:.3f} ms".format(det_time * 1000)
        render_time_message = "OpenCV rendering time: {:.3f} ms".format(
            render_time * 1000)
        async_mode_message = "Async mode is on. Processing request {}".format(cur_request_id) if is_async_mode else \
            "Async mode is off. Processing request {}".format(cur_request_id)

        cv2.putText(frame, inf_time_message, (15, 15),
                    cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)
        cv2.putText(frame, render_time_message, (15, 30),
                    cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1)
        cv2.putText(frame, async_mode_message, (10, int(initial_h - 20)),
                    cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1)

        render_start = time.time()

        if args.output:
            out.write(frame)
        if not args.hide_output:
            cv2.imshow("Detection Results", frame)

        render_end = time.time()
        render_time = render_end - render_start

        if is_async_mode:
            cur_request_id, next_request_id = next_request_id, cur_request_id
            frame = next_frame

        key = cv2.waitKey(1)
        if key == ord('l'):
            showCrosshair = False
            fromCenter = True

            roi = cv2.selectROI("Detection Results", frame, fromCenter,
                                showCrosshair)
        if key == 27:
            break
        if (9 == key):
            is_async_mode = not is_async_mode
            log.info("Switched to {} mode".format(
                "async" if is_async_mode else "sync"))

    cv2.destroyAllWindows()
class HeadPoseEstimationModel:
    '''
    Class for the Face Detection Model.
    '''
    def __init__(self, model_name, device='CPU', extensions=None):
        '''
        TODO: Use this to set your instance variables.
        '''
        self.model_name = model_name
        self.device = device
        self.extensions = extensions
        self.model_structure = self.model_name
        self.model_weights = self.model_name.split(".")[0]+'.bin'
        self.plugin = None
        self.network = None
        self.exec_net = None
        self.input_name = None
        self.input_shape = None
        self.output_names = None

    def load_model(self):
        '''
        TODO: You will need to complete this method.
        This method is for loading the model to the device specified by the user.
        If your model requires any Plugins, this is where you can load them.
        '''
        self.plugin = IECore()
        self.network = self.plugin.read_network(model=self.model_structure, weights=self.model_weights)
        supported_layers = self.plugin.query_network(network=self.network, device_name=self.device)
        unsupported_layers = [l for l in self.network.layers.keys() if l not in supported_layers]
        
        
        if len(unsupported_layers)!=0 and self.device=='CPU':
            print("unsupported layers found:{}".format(unsupported_layers))
            if not self.extensions==None:
                print("Adding CPU Extension")
                self.plugin.add_extension(self.extensions, self.device)
                supported_layers = self.plugin.query_network(network = self.network, device_name=self.device)
                unsupported_layers = [l for l in self.network.layers.keys() if l not in supported_layers]
                if len(unsupported_layers)!=0:
                    print("Extension was Ineffective")
                    exit(1)
                print("CPU Extension Successful")
            else:
                print("CPU Extension Required")
                exit(1) 
        self.exec_net = self.plugin.load_network(network=self.network, device_name=self.device,num_requests=1) 
        self.input_name = next(iter(self.network.inputs))
        self.input_shape = self.network.inputs[self.input_name].shape
        self.output_names = [i for i in self.network.outputs.keys()]
        
    def predict(self, image):
        '''
        TODO: You will need to complete this method.
        This method is meant for running predictions on the input image.
        '''
        processed_img = self.preprocess_input(image.copy())
        outputs = self.exec_net.infer({self.input_name:processed_img})
        finalOutput = self.preprocess_output(outputs)
        return finalOutput
        

    def check_model(self):
        supported_layers = self.plugin.query_network(network=self.network, device_name=self.device)
        unsupported_layers = [l for l in self.network.layers.keys() if l not in supported_layers]
        
        if len(unsupported_layers)!=0 and self.device=='CPU':
            print("unsupported layers found:{}".format(unsupported_layers))
            if not self.extensions==None:
                print("Added CPU Extension")
                self.plugin.add_extension(self.extensions, self.device)
                supported_layers = self.plugin.query_network(network = self.network, device_name=self.device)
                unsupported_layers = [l for l in self.network.layers.keys() if l not in supported_layers]
                if len(unsupported_layers)!=0:
                    print("CPU Extension Ineffective")
                    exit(1)
                print("CPU Extension Succesful")
            else:
                print("CPU Extension Required")
                exit(1)

    def preprocess_input(self, image):
        '''
        Before feeding the data into the model for inference,
        you might have to preprocess it. This function is where you can do that.
        '''
        resized_img = cv2.resize(image, (self.input_shape[3], self.input_shape[2]))
        processed_img = np.transpose(np.expand_dims(resized_img,axis=0), (0,3,1,2))
        return processed_img
            

    def preprocess_output(self, outputs):
        '''
        Before feeding the output of this model to the next model,
        you might have to preprocess the output. This function is where you can do that.
        '''
        outs = []
        outs.append(outputs['angle_y_fc'].tolist()[0][0])
        outs.append(outputs['angle_p_fc'].tolist()[0][0])
        outs.append(outputs['angle_r_fc'].tolist()[0][0])
        return outs
Beispiel #25
0
def main():
    # log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)
    args = build_argparser().parse_args()
    model_xml = args.model
    model_bin = os.path.splitext(model_xml)[0] + ".bin"
    trfm = transforms.Compose([
        transforms.Lambda(lambd=cut_pil_image),
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])
    # Plugin initialization for specified device and load extensions library if specified
    print("Creating Inference Engine")
    ie = IECore()
    if args.cpu_extension and 'CPU' in args.device:
        ie.add_extension(args.cpu_extension, "CPU")
    # Read IR
    print("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin))
    net = IENetwork(model=model_xml, weights=model_bin)

    if "CPU" in args.device:
        supported_layers = ie.query_network(net, "CPU")
        not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers]
        if len(not_supported_layers) != 0:
            print("Following layers are not supported by the plugin for specified device {}:\n {}".
                  format(args.device, ', '.join(not_supported_layers)))
            print("Please try to specify cpu extensions library path in sample's command line parameters using -l "
                  "or --cpu_extension command line argument")
            sys.exit(1)

    assert len(net.inputs.keys()) == 1, "Sample supports only single input topologies"
    assert len(net.outputs) == 1, "Sample supports only single output topologies"

    print("Preparing input blobs")
    input_blob = next(iter(net.inputs))
    out_blob = next(iter(net.outputs))

    # Read and pre-process input images
    n, c, h, w = net.inputs[input_blob].shape
    print(f'input {input_blob}: {net.inputs[input_blob].shape}')
    print(f'input {out_blob}: {net.outputs[out_blob].shape}')
    bs, ots = net.outputs[out_blob].shape
    print(f'read from:{args.input}')

    # print(f'labels:{labels}')
    # images = [os.path.join(a, b) for a, b in zip(*make_list_of_files(args.input))]

    print("Loading model to the plugin")
    exec_net = ie.load_network(network=net, device_name=args.device)


    attributes = AttributesDataset(args.attributes_file)
    test_dataset = CSVDataset(annotation_path=args.attributes_file,
                              images_dir=args.images_dir,
                              attributes=attributes,
                              transform=trfm)
    test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=1)
    device = torch.device("cuda" if torch.cuda.is_available() and args.device == 'cuda' else "cpu")
    model = MultiOutputModel(trained_labels=attributes.fld_names,
                             attrbts=attributes).to(device)
    statedict = torch.load(args.checkpoint, map_location='cuda')
    model.load_state_dict(statedict)
    model.eval()
    with torch.no_grad():
        for image in test_dataloader:
            img = cv2.imread(image['img_path'][0])
            print(f'image shape={img.shape}: {image["img_path"][0]}')
            cv2.imshow('xxxx', img)
            img = cv2.resize(img, (w, h), interpolation=cv2.INTER_CUBIC)
            img = img.transpose((2, 0, 1))  # Change data layout from HWC to CHW
            res = exec_net.infer(inputs={input_blob: img})[out_blob]
            res2 = model(image['img'].to(device))
            for il, (v, v2) in enumerate(zip(res[0], res2['label'][0])):
                l = attributes.labels_id_to_name["label"][il]
                print(f'{il} {l:.<18} {v:+.4f} ... {v2:+.4f}')
            k = cv2.waitKey(0)
            if k == ord('q'):
                exit()
def main():
    log.basicConfig(format='[ %(levelname)s ] %(message)s',
                    level=log.INFO,
                    stream=sys.stdout)
    args = parse_args()

    # ---------------------------Step 1. Initialize inference engine core--------------------------------------------------
    log.info('Creating Inference Engine')
    ie = IECore()

    # ---------------------------Step 2. Read a model in OpenVINO Intermediate Representation------------------------------
    log.info(
        f'Loading the network using ngraph function with weights from {args.model}'
    )
    ngraph_function = create_ngraph_function(args)
    net = IENetwork(ngraph.impl.Function.to_capsule(ngraph_function))

    # ---------------------------Step 3. Configure input & output----------------------------------------------------------
    log.info('Configuring input and output blobs')
    # Get names of input and output blobs
    input_blob = next(iter(net.input_info))
    out_blob = next(iter(net.outputs))

    # Set input and output precision manually
    net.input_info[input_blob].precision = 'U8'
    net.outputs[out_blob].precision = 'FP32'

    # Set a batch size to a equal number of input images
    net.batch_size = len(args.input)

    # ---------------------------Step 4. Loading model to the device-------------------------------------------------------
    log.info('Loading the model to the plugin')
    exec_net = ie.load_network(network=net, device_name=args.device)

    # ---------------------------Step 5. Create infer request--------------------------------------------------------------
    # load_network() method of the IECore class with a specified number of requests (default 1) returns an ExecutableNetwork
    # instance which stores infer requests. So you already created Infer requests in the previous step.

    # ---------------------------Step 6. Prepare input---------------------------------------------------------------------
    n, c, h, w = net.input_info[input_blob].input_data.shape
    input_data = np.ndarray(shape=(n, c, h, w))

    for i in range(n):
        image = read_image(args.input[i])

        light_pixel_count = np.count_nonzero(image > 127)
        darK_pixel_count = np.count_nonzero(image < 127)
        is_light_image = (light_pixel_count - darK_pixel_count) > 0

        if is_light_image:
            log.warning(
                f'Image {args.input[i]} is inverted to white over black')
            image = cv2.bitwise_not(image)

        if image.shape != (h, w):
            log.warning(
                f'Image {args.input[i]} is resized from {image.shape} to {(h, w)}'
            )
            image = cv2.resize(image, (w, h))

        input_data[i] = image

# ---------------------------Step 7. Do inference----------------------------------------------------------------------
    log.info('Starting inference in synchronous mode')
    res = exec_net.infer(inputs={input_blob: input_data})

    # ---------------------------Step 8. Process output--------------------------------------------------------------------
    # Generate a label list
    if args.labels:
        with open(args.labels, 'r') as f:
            labels = [line.split(',')[0].strip() for line in f]

    res = res[out_blob]

    for i in range(n):
        probs = res[i]
        # Get an array of args.number_top class IDs in descending order of probability
        top_n_idexes = np.argsort(probs)[-args.number_top:][::-1]

        header = 'classid probability'
        header = header + ' label' if args.labels else header

        log.info(f'Image path: {args.input[i]}')
        log.info(f'Top {args.number_top} results: ')
        log.info(header)
        log.info('-' * len(header))

        for class_id in top_n_idexes:
            probability_indent = ' ' * (len('classid') - len(str(class_id)) +
                                        1)
            label_indent = ' ' * (len('probability') -
                                  8) if args.labels else ''
            label = labels[class_id] if args.labels else ''
            log.info(
                f'{class_id}{probability_indent}{probs[class_id]:.7f}{label_indent}{label}'
            )
        log.info('')


# ----------------------------------------------------------------------------------------------------------------------
    log.info(
        'This sample is an API example, '
        'for any performance measurements please use the dedicated benchmark_app tool\n'
    )
    return 0
Beispiel #27
0
class GazeEstimationModel:
    '''
    Class for defining GazeEstimation Model and Attributes.
-    '''
    def __init__(self,
                 model_name,
                 threshold,
                 device='CPU',
                 extensions=None,
                 async_mode=True,
                 plugin=None):
        '''
        TODO: Use this to set your instance variables.
        '''
        self.plugin = None
        self.network = None
        self.input_blob = None
        self.output_blob = None
        self.out_shape = None
        self.exec_network = None
        self.threshold = threshold
        self.device = device
        self.async_mode = async_mode
        self.infer_request = None
        self.net_plugin = None
        self.net = None
        self.model_xml = model_name
        self.extensions = extensions

    def load_model(self,
                   model_xml,
                   gaze_angles,
                   input_gaze_angles,
                   cpu_extension=None):
        '''
        TODO: load models
        '''
        self.model_xml = model_name
        model_bin = os.path.splitext(model_xml)[0] + ".bin"
        self.device = device
        self.extensions = extensions

        # Initializing the plugins
        self.plugin = IECore()

        # Add any neccesary extensions ##
        if cpu_extension and "CPU" in device:
            self.plugin.add_extension(cpu_extension, device)

        # Reading the Intermediate Representation (IR) model as a IENetwork
        # deprecated in 2020 version
        self.network = self.plugin.read_network(model=model_xml,
                                                weights=model_bin)

        self.check_plugin(self.plugin)

        ## check for supported layer
        supported_layers = self.plugin.query_network(network=self.network,
                                                     device_name=device)
        ## check for unsupported layers
        unsupported_layers = [
            l for l in self.network.layers.keys()
            if l not in self.plugin.get_supported_layers(self.network)
        ]
        if len(unsupported_layers) != 0:
            print("Unsupported layers found: {}".format(unsupported_layers))
            print("Please check for supported extensions.")
            exit(1)

        # Loading the IENetwork into the plugin
        self.exec_network = self.plugin.load_network(self.network, device)

        # Get the input layer
        self.input_gaze_angles = self.network.inputs['gaze_angles']
        # print(self.input_pose_angles)
        self.output_blob = next(iter(self.network.outputs))
        self.out_shape = self.network.outputs[self.output_blob].shape
        logging.info("Model Gaze Estimation output shape printed : ",
                     self.out_shape)
        return

    def predict(self, l_eye_img, r_eye_img, target_gaze, img_frame, width,
                height):
        '''
        TODO: 
        The accuracy of gaze direction prediction is evaluated through the use of "mean absolute error (MAE)" of the angle
        (in degrees) between the ground truth and predicted gaze direction.
        Input_blob
        Blob in the format [BxCxHxW] where 
        B = batch size
        C = number of channels
        H = image height
        W = image width
        with the name right_eye_image and the shape[1x3x60x60]
        Blob in the format [BxC] where:
        B = batch size
        C = number of channels
        with the name head_pose_angles and the shape[1x3]

        outputs_blob
        The net outputs a blob with the shape: [1x3], containing cartesian coordinates of
        gaze direction vector. Please note that output vector is not normalized and has non-unit length.
        Output layer name in INference Engine format: gaze_vector

        Ref: https://docs.openvinotoolkit.org/latest/omz_models_intel_gaze_estimation_adas_0002_description_gaze_estimation_adas_0002.html

        '''
        ## for left and right eye image and shape
        tally = 0
        values = None
        width = l_eye_img.shape[1]
        height = l_eye_img.shape[0]
        l_eye_img, r_eye_img = self.preprocess_input(l_eye_img, r_eye_img)

        # perform inference on image shape
        #ref: https://github.com/gauravshelangia/computer-pointer-controller/blob/master/src/facial_landmark_detection.py
        if self.async_mode:
            self.exec_network.requests[0].async_infer(
                inputs={
                    "gaze_angles": target_gaze,
                    "l_eye_img": l_eye_img,
                    "r_eye_img": r_eye_img
                })
        else:
            self.exec_network.requests[0].infer(
                inputs={
                    "gaze_angles": target_gaze,
                    "l_eye_img": l_eye_img,
                    "r_eye_img": r_eye_img
                })

        if self.exec_network.requests[0].wait(-1) == 0:
            outputs = self.exec_network.requests[0].outputs[self.output_blob]
            vout = self.preprocess_output(l_eye_img, r_eye_img, target_gaze,
                                          outputs)
            return vout

    def preprocess_input(self, l_eye_img, r_eye_img):
        '''
        TODO: You will need to complete this method.
        Here I preprocess the data before feeding the data into the model for inference.
        '''
        # left eye input shape [1,3,60,60]
        l_eye_img = cv2.resize(l_eye_img, (60, 60))
        l_eye_img = l_eye_img.transpose((2, 0, 1))
        l_eye_img = l_eye_img.reshape((1, 3, 60, 60))
        # and right eye input shape[1,3,60,60]
        r_eye_img = cv2.resize(r_eye_img, (60, 60))
        r_eye_img = r_eye_img.transpose((2, 0, 1))
        r_eye_img = r_eye_img.reshape((1, 3, 60, 60))

        return img_frame, l_eye_img, r_eye_img

    def preprocess_output(self, l_eye_img, r_eye_img, outputs, target_gaze):
        '''
        TODO: You will need to complete this method.
        Here I preprocess the model before feeding the output of this model to the next model.
        '''
        # ref source code: # Ref: https://knowledge.udacity.com/questions/254779
        gaze_vector = outputs[0]
        roll = gaze_vector[2]  #pose_angles[0][2][0]
        gaze_vector = gaze_vector / np.linalg.norm(gaze_vector)
        cs = math.cos(roll * math.pi / 180.0)
        sn = math.sin(roll * math.pi / 180.0)

        tmpX = gaze_vector[0] * cs + gaze_vector[1] * sn
        tmpY = -gaze_vector[0] * sn + gaze_vector[1] * cs

        return (tmpX, tmpY), (gaze_vector)
        # raise NotImplementedError

    def clean(self):
        """
        This function deletes all the open instances
        :return: None
        """
        del self.plugin
        del self.network
        del self.exec_network
        del self.net
        del self.device
Beispiel #28
0
    def draw_inference_from_image(self):

        log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)
        # Plugin initialization for specified device and load extensions library if specified
        log.info("Creating Inference Engine")
        ie = IECore()
        if self.extention_lib_path and 'CPU' in self.device:
            ie.add_extension(self.extention_lib_path, "CPU")
        # Read IR
        log.info("Loading network files:\n\t{}\n\t{}".format(self.model_xml, self.model_path))
        net = IENetwork(model=self.model_xml, weights=self.model_path)

        if "CPU" in self.device:
            supported_layers = ie.query_network(net, "CPU")
            not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers]
            if len(not_supported_layers) != 0:
                log.error("Following layers are not supported by the plugin for specified device {}:\n {}".
                          format(self.device, ', '.join(not_supported_layers)))
                log.error("Please try to specify cpu extensions library path in config")
                sys.exit(1)

        assert len(net.inputs.keys()) == 1, "Sample supports only single input topologies"
        assert len(net.outputs) == 1, "Sample supports only single output topologies"

        log.info("Preparing input blobs")
        input_blob = next(iter(net.inputs))
        out_blob = next(iter(net.outputs))
        net.batch_size = 1

        # Read and pre-process input images
        n, c, h, w = net.inputs[input_blob].shape
        
        
        image = cv2.imread(self.input_stream)
        initial_h, initial_w = image.shape[:2]

        if image.shape[:-1] != (h, w):
            log.warning("Image {} is resized from {} to {}".format(self.input_stream, image.shape[:-1], (h, w)))
            input_image = cv2.resize(image, (w, h))
        input_image = input_image.transpose((2, 0, 1))  # Change data layout from HWC to CHW

        # Loading model to the plugin
        log.info("Loading model to the plugin")
        exec_net = ie.load_network(network=net, device_name=self.device)

        if self.labels:
            with open(self.labels, 'r') as f:
                labels_map = [x.strip() for x in f]
        else:
            labels_map = None

        # Start sync inference
        log.info("Starting inference in synchronous mode")
        res = exec_net.infer(inputs={input_blob: input_image})

        # Processing output blob
        log.info("Processing output blob")
        res = res[out_blob]
        
        detections = list()
        for obj in res[0][0]:
            if obj[2] > self.prob_thresh:
                detection_data = dict()
                xmin = int(obj[3] * initial_w)
                ymin = int(obj[4] * initial_h)
                xmax = int(obj[5] * initial_w)
                ymax = int(obj[6] * initial_h)
                class_id = int(obj[1])
                detection_data['class'] = class_id
                detection_data['bbox'] = [(xmin, ymin), (xmax, ymax)]
                detections.append(detection_data)
                # cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)
                # Draw box and label\class_id
                color = (min(class_id * 12.5, 255),
                            min(class_id * 7, 255),
                            min(class_id * 5, 255))
                cv2.rectangle(image, (xmin, ymin), (xmax, ymax),
                                color, 2)
                det_label = labels_map[class_id] if labels_map else str(class_id)
                cv2.putText(image, det_label + ' ' + str(round(obj[2] * 100, 1)) + ' %', (xmin, ymin - 7),
                            cv2.FONT_HERSHEY_COMPLEX, 0.6, color, 1)
        # comment next two lines to stop rendering detection result 
        cv2.imshow("Detection Result(s)", image)
        cv2.waitKey(0)
        return detections
Beispiel #29
0
class Model_HeadPose:
    '''
    Class for the Head Pose Estimation Model.
    '''
    def __init__(self, model_name, device='CPU', extensions=None):
        self.model_weights = model_name + '.bin'
        self.model_structure = model_name + '.xml'
        self.device = device
        self.extensions = extensions
#         self.check_model()
#         try:
#             self.input_name = next(iter(self.model.inputs))
#             self.input_shape = self.model.inputs[self.input_name].shape
#             self.output_name = next(iter(self.model.outputs))
#             self.output_shape = self.model.outputs[self.output_name].shape
#             print('Initialise.. completed.')
#         except Exception as e:
#             raise ValueError('Something is wrong with input and output values..')

    def load_model(self):
        '''
        This method is for loading the model to the device specified by the user.
        If your model requires any Plugins, this is where you can load them.
        '''
        try:
            print('Model is loading...')
            self.core = IECore()
            self.net = self.core.read_network(model=self.model_structure,
                                              weights=self.model_weights)
            supported = self.core.query_network(self.net, self.device)
            not_supported = [
                layer for layer in self.net.layers.keys()
                if layer not in supported
            ]
            if len(not_supported) != 0 and self.device == 'CPU':
                print('Unsuported', not_supported)
                if not self.extensions == None:
                    print('***Quick fix.\n ~CPU Extension added')
                    self.core.add_extension(self.extensions, device)
                    supported = self.core.query_network(self.net, self.device)
                    not_supported = [
                        layer for layer in self.net.layers.keys()
                        if layer not in supported
                    ]
                    if len(not_supported) == 0:
                        print('***Quick fix, Failed.')
                else:
                    print('Check the extension path.')
            self.net_exec = self.core.load_network(network=self.net,
                                                   device_name=self.device)
        except Exception as e:
            raise ('Something is wrong.. ~debug load model~')

        try:
            self.input_name = next(iter(self.net.inputs))
            self.input_shape = self.net.inputs[self.input_name].shape
            self.output_name = next(iter(self.net.outputs))
            self.output_shape = self.net.outputs[self.output_name].shape
            print('Initialise.. completed.')
        except Exception as e:
            raise ValueError(
                'Something is wrong with input and output values..')

    def predict(self, image):
        '''
        This method is meant for running predictions on the input image.
        '''
        self.image = image
        print('HeadPose predict..')
        pre_image = self.preprocess_input(self.image)
        input_name = self.input_name
        input_dict = {input_name: pre_image}
        #         infer = self.net_exec.start_async(request_id=0, inputs=input_dict)
        #         status = infer.wait()
        results = self.net_exec.infer(input_dict)
        outputs = self.preprocess_output(results)
        #         if status == 0:
        #             results = infer.outputs[self.output_name]
        #             print(results)
        #             print(self.input_name)
        #             outputs = self.preprocess_output(results)
        return outputs

    def check_model(self):
        '''
        Check - initialise the model
        '''
        try:
            self.model = IENetwork(self.model_structure, self.model_weights)
        except Exception as e:
            raise ValueError(
                "Could not Initialise the network. Have you enterred the correct model path?"
            )

    def preprocess_input(self, image):
        '''
        An input image in [1xCxHxW] format.

        B - batch size
        C - number of channels
        H - image height
        W - image width
        '''
        image = cv2.resize(image, (self.input_shape[3], self.input_shape[2]))
        image = image.transpose((2, 0, 1))
        image = image.reshape(1, *image.shape)
        return image

    def preprocess_output(self, outputs):
        '''
        Output layer names in Inference Engine format:

            name: "angle_y_fc", shape: [1, 1] - Estimated yaw (in degrees).
            name: "angle_p_fc", shape: [1, 1] - Estimated pitch (in degrees).
            name: "angle_r_fc", shape: [1, 1] - Estimated roll (in degrees).
        '''
        object_list = []
        print('PreOutput-headpose..')
        #         print(outputs)
        object_list.append(outputs['angle_y_fc'].tolist()[0][0])
        object_list.append(outputs['angle_p_fc'].tolist()[0][0])
        object_list.append(outputs['angle_r_fc'].tolist()[0][0])

        return object_list
Beispiel #30
0
class PoseDetector:
    '''
    Class for the Face Detection Model.
    '''
    def __init__(
            self,
            model_name='models/intel/head-pose-estimation-adas-0001/FP32-INT8/head-pose-estimation-adas-0001',
            device='CPU',
            extensions=None):
        self.model_name = model_name
        self.model_w = model_name + '.bin'
        self.model_s = model_name + '.xml'
        self.device = device
        self.extension = extensions
        self.inference_results = None
        self.pre_image = None
        self.post_image = None
        self.network = None
        self.input_name = None
        self.input_shape = None
        self.output_shape = None
        self.output_name = None
        self.plugin = None
        self.exec_network = None

    def load_model(self):
        self.plugin = IECore()
        log.info("Attempting to load network for model:")
        log.info(self.model_name)
        self.network = self.plugin.read_network(model=self.model_s,
                                                weights=self.model_w)
        self.exec_network = self.plugin.load_network(self.network, self.device)
        self.input_name = next(iter(self.network.inputs))
        self.output_name = next(iter(self.network.outputs))
        self.input_shape = self.network.inputs[self.input_name].shape
        self.output_shape = self.network.outputs[self.output_name].shape

    def predict(self, image):
        self.pre_image = self.preprocess_input(image)
        self.inference_results = self.exec_network.infer(
            {self.input_name: self.pre_image})
        pose = self.preprocess_output(self.inference_results, image)
        return pose

    def check_model(self):
        self.plugin = IECore()
        log.info("Checking model layers for model:")
        log.info(self.model_name)
        self.network = self.plugin.read_network(model=self.model_s,
                                                weights=self.model_w)
        # double check supported network layers
        # code taken from Project-01 (ND131)
        if "CPU" in self.device:
            supported_layers = self.plugin.query_network(self.network, "CPU")
            not_supported_layers = [
                l for l in self.network.layers.keys()
                if l not in supported_layers
            ]
            if len(not_supported_layers) != 0:
                log.error(
                    "Following layers are not supported by the plugin for specified device {}:\n {}"
                    .format(self.device, ', '.join(not_supported_layers)))
                log.error(
                    "Please try to specify cpu extensions library path in sample's command line parameters using -l "
                    "or --cpu_extension command line argument")
        versions = self.plugin.get_versions(self.device)
        log.info("{}{}".format(" " * 8, self.device))
        log.info("{}MKLDNNPlugin version ......... {}.{}".format(
            " " * 8, versions[self.device].major, versions[self.device].minor))
        log.info("{}Build ........... {}".format(
            " " * 8, versions[self.device].build_number))

    def preprocess_input(self, image):
        post_frame = cv2.resize(image,
                                (self.input_shape[3], self.input_shape[2]))
        post_frame = post_frame.transpose((2, 0, 1))
        post_frame = post_frame.reshape(1, *post_frame.shape)
        input_width = post_frame.shape[1]
        input_height = post_frame.shape[0]
        log.debug("input width x height: %d x %d", self.input_shape[3],
                  self.input_shape[2])
        return post_frame

    def preprocess_output(self, outputs, image):
        width = image.shape[1]
        height = image.shape[0]
        pose = []
        pose.append(outputs['angle_y_fc'].tolist()[0][0])
        pose.append(outputs['angle_p_fc'].tolist()[0][0])
        pose.append(outputs['angle_r_fc'].tolist()[0][0])
        return pose