def prepare_inputs(self, frame: np.ndarray, frame_input_name: str = None) \ -> Tuple[OV_INPUT_TYPE, Resize]: """A helper method to create an OpenVINO input like {input_name: array} This method takes a frame, resizes it to fit the network inputs, then returns two things: The input, and the Resize information. The Resize information contains all of the operations that were done on the frame, allowing users to then map the detections from a resized frame to the coordinate space of the original frame. :param frame: The image. BGR ordered. :param frame_input_name: Set this value to force a certain node to be used as the frame input. Useful if you still want to use the default implementation from a subclass with network with multiple inputs :returns: ({input_name: resized_frame}, Resize) """ if not frame_input_name and len(self.net.inputs) > 1: raise ValueError("More than one input was expected for model, but " "default prepare_inputs implementation was used.") input_blob_name = frame_input_name or self.input_blob_names[0] input_blob = self.net.inputs[input_blob_name] _, _, h, w = input_blob.shape resize = Resize(frame).resize(w, h, Resize.ResizeType.EXACT) # Change data layout from HWC to CHW in_frame = np.transpose(resize.frame.copy(), (2, 0, 1)) return {input_blob_name: in_frame}, resize
def test_resize_pad(): input_width, input_height = 10, 5 frame = np.arange(50, dtype=np.uint8).reshape((input_height, input_width)) # Pad bottom/right, then top/left frame_resize = Resize(frame) \ .pad(13, 9, 255, Resize.CropPadType.RIGHT_BOTTOM) \ .pad(17, 11, 254, Resize.CropPadType.LEFT_TOP) \ .frame frame_expected = np.pad(np.arange(50, dtype=np.uint8).reshape( (input_height, input_width)), ((0, 4), (0, 3)), 'constant', constant_values=255) frame_expected = np.pad(frame_expected, ((2, 0), (4, 0)), 'constant', constant_values=254) assert frame_resize.shape[1] == 17 assert frame_resize.shape[0] == 11 assert (frame_resize == frame_expected).all() # Pad all around frame_resize = Resize(frame) \ .pad(13, 9, 255, Resize.CropPadType.ALL) \ .frame frame_expected = np.pad(np.arange(50, dtype=np.uint8).reshape( (input_height, input_width)), ((2, 2), (1, 2)), 'constant', constant_values=255) assert frame_resize.shape[1] == 13 assert frame_resize.shape[0] == 9 # noinspection PyUnresolvedReferences assert (frame_resize == frame_expected).all() # Crop larger than frame should return frame frame_resize = Resize(frame).pad(9, 4, -1, Resize.CropPadType.ALL).frame # noinspection PyUnresolvedReferences assert (frame == frame_resize).all() # CropPadType.NONE should be a nop frame_resize = Resize(frame).pad(-1, -1, -1, Resize.CropPadType.NONE).frame # noinspection PyUnresolvedReferences assert (frame == frame_resize).all()
def process_frame(self, frame: np.ndarray, detection_node: DETECTION_NODE_TYPE, options: Dict[str, OPTION_TYPE], state: BaseStreamState) -> DETECTION_NODE_TYPE: crop = Resize(frame).crop_bbox(detection_node.bbox).frame input_dict, _ = self.prepare_inputs(crop) prediction = self.send_to_batch(input_dict).get() max_color = config.colors[prediction["color"].argmax()] max_type = config.vehicle_types[prediction["type"].argmax()] detection_node.attributes["color"] = max_color
def process_frame(self, frame: np.ndarray, detection_node: DETECTION_NODE_TYPE, options: Dict[str, OPTION_TYPE], state: BaseStreamState) -> DETECTION_NODE_TYPE: crop = Resize(frame).crop_bbox(detection_node.bbox).frame input_dict, _ = self.prepare_inputs(crop) prediction = self.send_to_batch(input_dict).get() emotion_id = int(prediction["prob_emotion"].argmax()) emotion = EMOTION_TYPES[emotion_id] emotion_score = float(prediction["prob_emotion"].flatten()[emotion_id]) detection_node.attributes["emotion"] = emotion detection_node.extra_data["emotion_confidence"] = emotion_score
def process_frame(self, frame: np.ndarray, detection_node: DETECTION_NODE_TYPE, options: Dict[str, OPTION_TYPE], state: BaseStreamState) -> DETECTION_NODE_TYPE: crop = Resize(frame).crop_bbox(detection_node.bbox).frame input_dict, _ = self.prepare_inputs(crop) prediction = self.send_to_batch(input_dict).get() # Convert prediction to a label probability = prediction["fc5"].flatten()[0] threshold = options["threshold"] label = self.LABELS[int(probability > threshold)] detection_node.attributes["mask"] = label detection_node.extra_data["mask_confidence"] = float(probability)
def process_frame(self, frame: np.ndarray, detection_node: DETECTION_NODE_TYPE, options: Dict[str, OPTION_TYPE], state: BaseStreamState) -> DETECTION_NODE_TYPE: crop = Resize(frame).crop_bbox(detection_node.bbox).frame input_dict, _ = self.prepare_inputs(crop) prediction = self.send_to_batch(input_dict).get() age = int(prediction['age_conv3'] * 100) gender_id = prediction['prob'].argmax() gender = config.genders[gender_id] gender_confidence = float(prediction['prob'].flatten()[gender_id]) detection_node.extra_data['age'] = age detection_node.attributes['gender'] = gender detection_node.extra_data['gender_confidence'] = gender_confidence detection_node.attributes['age'] = _get_age_bin(age)
def process_frame(self, frame: np.ndarray, detection_node: DETECTION_NODE_TYPE, options: Dict[str, OPTION_TYPE], state: BaseStreamState) -> DETECTION_NODE_TYPE: crop = Resize(frame).crop_bbox(detection_node.bbox).frame input_dict, _ = self.prepare_inputs(crop) prediction = self.send_to_batch(input_dict).get() prediction = prediction['453'].flatten() # Iterate over predictions and add attributes accordingly for attribute_key, confidence in zip(ATTRIBUTES.keys(), prediction): attribute = ATTRIBUTES[attribute_key][ 0 if confidence >= 0.5 else 1] option_key = f"{attribute}_confidence" # The confidence value is remapped to create 2 confidence # thresholds for the attribute; one for how confident it is in the # upper range, the other for the confidence in the lower range. remapped_confidence = abs(confidence - 0.5) * 2 float_option = options[option_key] detection_node.attributes[attribute_key] = ( attribute if remapped_confidence > float_option else ATTRIBUTES[attribute_key][2])
def test_resize_crop(): input_width, input_height = 5, 10 frame = np.arange(50, dtype=np.uint8).reshape((input_height, input_width)) # Simple right/bottom crop frame_resize = Resize(frame) \ .crop(3, 4, Resize.CropPadType.RIGHT_BOTTOM) \ .frame # noinspection PyUnresolvedReferences assert (frame[:4, :3] == frame_resize).all() # Simple top/left crop frame_resize = Resize(frame).crop(4, 3, Resize.CropPadType.LEFT_TOP).frame # noinspection PyUnresolvedReferences assert (frame[-3:, -4:] == frame_resize).all() # Crop starting at a point frame_resize = Resize(frame) \ .crop(2, 7, Resize.CropPadType.CROP_START_POINT, top_left=(1, 3)).frame # noinspection PyUnresolvedReferences assert (frame[3:10, 1:3] == frame_resize).all() # Crop all sides (keep center) frame_resize = Resize(frame).crop(2, 3, Resize.CropPadType.ALL).frame # noinspection PyUnresolvedReferences assert (frame[3:6, 1:3] == frame_resize).all() # Crop larger than frame should return frame frame_resize = Resize(frame).crop(6, 11, Resize.CropPadType.ALL).frame # noinspection PyUnresolvedReferences assert (frame == frame_resize).all() # CropPadType.NONE should be a nop frame_resize = Resize(frame).crop(-1, -1, Resize.CropPadType.NONE).frame # noinspection PyUnresolvedReferences assert (frame == frame_resize).all()
def test_resize(): input_width, input_height = 5, 10 frame = np.arange(50, dtype=np.uint8).reshape((input_height, input_width)) # Basic resize up frame_resize = Resize(frame) \ .resize(10, 20, Resize.ResizeType.FIT_BOTH).frame assert frame_resize.shape[1] == 10 assert frame_resize.shape[0] == 20 frame_resize = Resize(frame) \ .resize(10, 20, Resize.ResizeType.FIT_ONE).frame assert frame_resize.shape[1] == 10 assert frame_resize.shape[0] == 20 # Resize up where target aspect ratio is wider than source frame_resize = Resize(frame) \ .resize(30, 30, Resize.ResizeType.FIT_BOTH).frame assert frame_resize.shape[1] == 15 assert frame_resize.shape[0] == 30 frame_resize = Resize(frame) \ .resize(30, 30, Resize.ResizeType.FIT_ONE).frame assert frame_resize.shape[1] == 30 assert frame_resize.shape[0] == 60 # Resize up where target aspect ratio is taller than source frame_resize = Resize(frame) \ .resize(10, 30, Resize.ResizeType.FIT_BOTH).frame assert frame_resize.shape[1] == 10 assert frame_resize.shape[0] == 20 frame_resize = Resize(frame) \ .resize(10, 30, Resize.ResizeType.FIT_ONE).frame assert frame_resize.shape[1] == 15 assert frame_resize.shape[0] == 30 # Resize to width frame_resize = Resize(frame).resize(30, -1, Resize.ResizeType.WIDTH).frame assert frame_resize.shape[1] == 30 assert frame_resize.shape[0] == 60 # Resize to height frame_resize = Resize(frame).resize(-1, 30, Resize.ResizeType.HEIGHT).frame assert frame_resize.shape[1] == 15 assert frame_resize.shape[0] == 30 # Resize exactly frame_resize = Resize(frame).resize(8, 7, Resize.ResizeType.EXACT).frame assert frame_resize.shape[1] == 8 assert frame_resize.shape[0] == 7 # Resize where the scaling is not an integer # Round up frame_resize = Resize(frame) \ .resize(10, 15, Resize.ResizeType.FIT_BOTH).frame assert frame_resize.shape[1] == 8 assert frame_resize.shape[0] == 15 # Round down input_width, input_height = 15, 4 frame = np.arange(60, dtype=np.uint8).reshape((input_height, input_width)) frame_resize = Resize(frame) \ .resize(20, 10, Resize.ResizeType.FIT_BOTH).frame assert frame_resize.shape[1] == 20 assert frame_resize.shape[0] == 5
def test_resize_scale(): input_width, input_height = 5, 10 frame = np.arange(50, dtype=np.uint8).reshape((input_height, input_width)) # Single integer resize resize = Resize(frame).resize(10, 20, Resize.ResizeType.EXACT) node = DetectionNode(name="person", coords=rect_to_coords([10, 10, 20, 20])) resize.scale_and_offset_detection_nodes([node]) assert node.bbox == BoundingBox(5, 5, 10, 10) # Double integer resize (note that coords are rounded in node.bbox output) resize = Resize(frame) \ .resize(10, 20, Resize.ResizeType.EXACT) \ .resize(20, 40, Resize.ResizeType.EXACT) node = DetectionNode(name="person", coords=rect_to_coords([15, 15, 20, 20])) resize.scale_and_offset_detection_nodes([node]) assert node.bbox == BoundingBox(4, 4, 5, 5) # Single crop input_width, input_height = 20, 30 frame = np.arange(600, dtype=np.uint8).reshape((input_height, input_width)) resize = Resize(frame) \ .crop(15, 20, Resize.CropPadType.LEFT_TOP) node = DetectionNode(name="person", coords=rect_to_coords([15, 15, 20, 20])) resize.scale_and_offset_detection_nodes([node]) assert node.bbox == BoundingBox(20, 25, 25, 30) # Two affecting crops plus one that should not change the offset input_width, input_height = 20, 30 frame = np.arange(600, dtype=np.uint8).reshape((input_height, input_width)) resize = Resize(frame) \ .crop(15, 20, Resize.CropPadType.LEFT_TOP) \ .crop(10, 15, Resize.CropPadType.RIGHT_BOTTOM) \ .crop(8, 5, Resize.CropPadType.ALL) node = DetectionNode(name="person", coords=rect_to_coords([15, 15, 20, 20])) resize.scale_and_offset_detection_nodes([node]) assert node.bbox == BoundingBox(21, 30, 26, 35) # Crop then resize input_width, input_height = 20, 30 frame = np.arange(600, dtype=np.uint8).reshape((input_height, input_width)) resize = Resize(frame) \ .crop(15, 20, Resize.CropPadType.LEFT_TOP) \ .resize(30, 40, Resize.ResizeType.EXACT) node = DetectionNode(name="person", coords=rect_to_coords([15, 15, 20, 20])) resize.scale_and_offset_detection_nodes([node]) assert node.bbox == BoundingBox(13, 18, 15, 20) # Resize then crop input_width, input_height = 20, 30 frame = np.arange(600, dtype=np.uint8).reshape((input_height, input_width)) resize = Resize(frame) \ .resize(30, 40, Resize.ResizeType.EXACT) \ .crop(15, 20, Resize.CropPadType.LEFT_TOP) node = DetectionNode(name="person", coords=rect_to_coords([15, 15, 20, 20])) resize.scale_and_offset_detection_nodes([node]) assert node.bbox == BoundingBox(20, 26, 23, 30)