def parse_detection_results(
            self,
            results: np.ndarray,
            resize: Resize,
            label_map: Dict[int, str],
            min_confidence: float = 0.0,
            boxes_output_name: str = None,
            frame_input_name: str = None) -> List[DetectionNode]:
        """A helper method to take results from a detection-type network.
        :param results: The inference results from the network
        :param resize: A Resize object that was used to resize the image to
        fit into the network originally.
        :param label_map: A dictionary mapping integers to class_names.
        :param min_confidence: Filter out detections that have a confidence
        less than this number.
        :param boxes_output_name: The name of output that carries the bounding
        box information to be parsed. Default=self.output_blob_names[0]
        :param frame_input_name: The name of the input that took the frame in.

        :returns: A list of DetectionNodes, in this case representing bounding
        boxes.
        """
        output_blob_name = boxes_output_name or self.output_blob_names[0]
        inference_results = results[output_blob_name]

        input_name = frame_input_name or self.input_blob_names[0]
        _, _, h, w = self.net.input_info[input_name].input_data.shape

        nodes: List[DetectionNode] = []
        for result in inference_results[0][0]:
            # If the first index == 0, that's the end of real predictions
            # The network always outputs an array of length 200 even if it does
            # not have that many predictions
            if result[0] != 0:
                break

            confidence = float(result[2])
            if confidence <= min_confidence:
                continue

            x_min, y_min, x_max, y_max = result[3:7]
            # x and y in res are in terms of percent of image width/height
            x_min, x_max = x_min * w, x_max * w
            y_min, y_max = y_min * h, y_max * h
            coords = [[x_min, y_min], [x_max, y_min], [x_max, y_max],
                      [x_min, y_max]]

            class_id = round(result[1])
            res = DetectionNode(
                name=label_map[class_id],
                coords=coords,
                extra_data={"detection_confidence": confidence})
            nodes.append(res)

        # Convert the coordinate space of the detections from the
        # resized frame to the
        resize.scale_and_offset_detection_nodes(nodes)
        return nodes
Esempio n. 2
0
    def prepare_inputs(self, frame: np.ndarray, frame_input_name: str = None) \
            -> Tuple[OV_INPUT_TYPE, Resize]:
        """A helper method to create an OpenVINO input like {input_name: array}

        This method takes a frame, resizes it to fit the network inputs, then
        returns two things: The input, and the Resize information. The
        Resize information contains all of the operations that were done on
        the frame, allowing users to then map the detections from a resized
        frame to the coordinate space of the original frame.

        :param frame: The image. BGR ordered.
        :param frame_input_name: Set this value to force a certain node to be
            used as the frame input. Useful if you still want to use the
            default implementation from a subclass with network with multiple
            inputs
        :returns: ({input_name: resized_frame}, Resize)
        """

        if not frame_input_name and len(self.net.inputs) > 1:
            raise ValueError("More than one input was expected for model, but "
                             "default prepare_inputs implementation was used.")

        input_blob_name = frame_input_name or self.input_blob_names[0]
        input_blob = self.net.inputs[input_blob_name]

        _, _, h, w = input_blob.shape
        resize = Resize(frame).resize(w, h, Resize.ResizeType.EXACT)

        # Change data layout from HWC to CHW
        in_frame = np.transpose(resize.frame.copy(), (2, 0, 1))

        return {input_blob_name: in_frame}, resize
Esempio n. 3
0
def test_resize_pad():
    input_width, input_height = 10, 5
    frame = np.arange(50, dtype=np.uint8).reshape((input_height, input_width))

    # Pad bottom/right, then top/left
    frame_resize = Resize(frame) \
        .pad(13, 9, 255, Resize.CropPadType.RIGHT_BOTTOM) \
        .pad(17, 11, 254, Resize.CropPadType.LEFT_TOP) \
        .frame
    frame_expected = np.pad(np.arange(50, dtype=np.uint8).reshape(
        (input_height, input_width)), ((0, 4), (0, 3)),
                            'constant',
                            constant_values=255)
    frame_expected = np.pad(frame_expected, ((2, 0), (4, 0)),
                            'constant',
                            constant_values=254)
    assert frame_resize.shape[1] == 17
    assert frame_resize.shape[0] == 11
    assert (frame_resize == frame_expected).all()

    # Pad all around
    frame_resize = Resize(frame) \
        .pad(13, 9, 255, Resize.CropPadType.ALL) \
        .frame
    frame_expected = np.pad(np.arange(50, dtype=np.uint8).reshape(
        (input_height, input_width)), ((2, 2), (1, 2)),
                            'constant',
                            constant_values=255)
    assert frame_resize.shape[1] == 13
    assert frame_resize.shape[0] == 9
    # noinspection PyUnresolvedReferences
    assert (frame_resize == frame_expected).all()

    # Crop larger than frame should return frame
    frame_resize = Resize(frame).pad(9, 4, -1, Resize.CropPadType.ALL).frame
    # noinspection PyUnresolvedReferences
    assert (frame == frame_resize).all()

    # CropPadType.NONE should be a nop
    frame_resize = Resize(frame).pad(-1, -1, -1, Resize.CropPadType.NONE).frame
    # noinspection PyUnresolvedReferences
    assert (frame == frame_resize).all()
Esempio n. 4
0
    def process_frame(self, frame: np.ndarray,
                      detection_node: DETECTION_NODE_TYPE,
                      options: Dict[str, OPTION_TYPE],
                      state: BaseStreamState) -> DETECTION_NODE_TYPE:
        crop = Resize(frame).crop_bbox(detection_node.bbox).frame

        input_dict, _ = self.prepare_inputs(crop)
        prediction = self.send_to_batch(input_dict).get()

        max_color = config.colors[prediction["color"].argmax()]
        max_type = config.vehicle_types[prediction["type"].argmax()]

        detection_node.attributes["color"] = max_color
Esempio n. 5
0
    def process_frame(self, frame: np.ndarray,
                      detection_node: DETECTION_NODE_TYPE,
                      options: Dict[str, OPTION_TYPE],
                      state: BaseStreamState) -> DETECTION_NODE_TYPE:
        crop = Resize(frame).crop_bbox(detection_node.bbox).frame

        input_dict, _ = self.prepare_inputs(crop)
        prediction = self.send_to_batch(input_dict).get()

        emotion_id = int(prediction["prob_emotion"].argmax())
        emotion = EMOTION_TYPES[emotion_id]
        emotion_score = float(prediction["prob_emotion"].flatten()[emotion_id])

        detection_node.attributes["emotion"] = emotion
        detection_node.extra_data["emotion_confidence"] = emotion_score
Esempio n. 6
0
    def process_frame(self, frame: np.ndarray,
                      detection_node: DETECTION_NODE_TYPE,
                      options: Dict[str, OPTION_TYPE],
                      state: BaseStreamState) -> DETECTION_NODE_TYPE:
        crop = Resize(frame).crop_bbox(detection_node.bbox).frame

        input_dict, _ = self.prepare_inputs(crop)
        prediction = self.send_to_batch(input_dict).get()

        # Convert prediction to a label
        probability = prediction["fc5"].flatten()[0]
        threshold = options["threshold"]
        label = self.LABELS[int(probability > threshold)]

        detection_node.attributes["mask"] = label
        detection_node.extra_data["mask_confidence"] = float(probability)
Esempio n. 7
0
    def process_frame(self, frame: np.ndarray,
                      detection_node: DETECTION_NODE_TYPE,
                      options: Dict[str, OPTION_TYPE],
                      state: BaseStreamState) -> DETECTION_NODE_TYPE:
        crop = Resize(frame).crop_bbox(detection_node.bbox).frame

        input_dict, _ = self.prepare_inputs(crop)
        prediction = self.send_to_batch(input_dict).get()

        age = int(prediction['age_conv3'] * 100)
        gender_id = prediction['prob'].argmax()
        gender = config.genders[gender_id]
        gender_confidence = float(prediction['prob'].flatten()[gender_id])

        detection_node.extra_data['age'] = age
        detection_node.attributes['gender'] = gender
        detection_node.extra_data['gender_confidence'] = gender_confidence
        detection_node.attributes['age'] = _get_age_bin(age)
Esempio n. 8
0
    def process_frame(self, frame: np.ndarray,
                      detection_node: DETECTION_NODE_TYPE,
                      options: Dict[str, OPTION_TYPE],
                      state: BaseStreamState) -> DETECTION_NODE_TYPE:
        crop = Resize(frame).crop_bbox(detection_node.bbox).frame
        input_dict, _ = self.prepare_inputs(crop)
        prediction = self.send_to_batch(input_dict).get()

        prediction = prediction['453'].flatten()

        # Iterate over predictions and add attributes accordingly
        for attribute_key, confidence in zip(ATTRIBUTES.keys(), prediction):
            attribute = ATTRIBUTES[attribute_key][
                0 if confidence >= 0.5 else 1]
            option_key = f"{attribute}_confidence"

            # The confidence value is remapped to create 2 confidence
            # thresholds for the attribute; one for how confident it is in the
            # upper range, the other for the confidence in the lower range.
            remapped_confidence = abs(confidence - 0.5) * 2
            float_option = options[option_key]
            detection_node.attributes[attribute_key] = (
                attribute if remapped_confidence > float_option else
                ATTRIBUTES[attribute_key][2])
Esempio n. 9
0
def test_resize_crop():
    input_width, input_height = 5, 10
    frame = np.arange(50, dtype=np.uint8).reshape((input_height, input_width))

    # Simple right/bottom crop
    frame_resize = Resize(frame) \
        .crop(3, 4, Resize.CropPadType.RIGHT_BOTTOM) \
        .frame
    # noinspection PyUnresolvedReferences
    assert (frame[:4, :3] == frame_resize).all()

    # Simple top/left crop
    frame_resize = Resize(frame).crop(4, 3, Resize.CropPadType.LEFT_TOP).frame
    # noinspection PyUnresolvedReferences
    assert (frame[-3:, -4:] == frame_resize).all()

    # Crop starting at a point
    frame_resize = Resize(frame) \
        .crop(2, 7, Resize.CropPadType.CROP_START_POINT, top_left=(1, 3)).frame
    # noinspection PyUnresolvedReferences
    assert (frame[3:10, 1:3] == frame_resize).all()

    # Crop all sides (keep center)
    frame_resize = Resize(frame).crop(2, 3, Resize.CropPadType.ALL).frame
    # noinspection PyUnresolvedReferences
    assert (frame[3:6, 1:3] == frame_resize).all()

    # Crop larger than frame should return frame
    frame_resize = Resize(frame).crop(6, 11, Resize.CropPadType.ALL).frame
    # noinspection PyUnresolvedReferences
    assert (frame == frame_resize).all()

    # CropPadType.NONE should be a nop
    frame_resize = Resize(frame).crop(-1, -1, Resize.CropPadType.NONE).frame
    # noinspection PyUnresolvedReferences
    assert (frame == frame_resize).all()
Esempio n. 10
0
def test_resize():
    input_width, input_height = 5, 10
    frame = np.arange(50, dtype=np.uint8).reshape((input_height, input_width))

    # Basic resize up
    frame_resize = Resize(frame) \
        .resize(10, 20, Resize.ResizeType.FIT_BOTH).frame
    assert frame_resize.shape[1] == 10
    assert frame_resize.shape[0] == 20

    frame_resize = Resize(frame) \
        .resize(10, 20, Resize.ResizeType.FIT_ONE).frame
    assert frame_resize.shape[1] == 10
    assert frame_resize.shape[0] == 20

    # Resize up where target aspect ratio is wider than source
    frame_resize = Resize(frame) \
        .resize(30, 30, Resize.ResizeType.FIT_BOTH).frame
    assert frame_resize.shape[1] == 15
    assert frame_resize.shape[0] == 30

    frame_resize = Resize(frame) \
        .resize(30, 30, Resize.ResizeType.FIT_ONE).frame
    assert frame_resize.shape[1] == 30
    assert frame_resize.shape[0] == 60

    # Resize up where target aspect ratio is taller than source
    frame_resize = Resize(frame) \
        .resize(10, 30, Resize.ResizeType.FIT_BOTH).frame
    assert frame_resize.shape[1] == 10
    assert frame_resize.shape[0] == 20

    frame_resize = Resize(frame) \
        .resize(10, 30, Resize.ResizeType.FIT_ONE).frame
    assert frame_resize.shape[1] == 15
    assert frame_resize.shape[0] == 30

    # Resize to width
    frame_resize = Resize(frame).resize(30, -1, Resize.ResizeType.WIDTH).frame
    assert frame_resize.shape[1] == 30
    assert frame_resize.shape[0] == 60

    # Resize to height
    frame_resize = Resize(frame).resize(-1, 30, Resize.ResizeType.HEIGHT).frame
    assert frame_resize.shape[1] == 15
    assert frame_resize.shape[0] == 30

    # Resize exactly
    frame_resize = Resize(frame).resize(8, 7, Resize.ResizeType.EXACT).frame
    assert frame_resize.shape[1] == 8
    assert frame_resize.shape[0] == 7

    # Resize where the scaling is not an integer
    # Round up
    frame_resize = Resize(frame) \
        .resize(10, 15, Resize.ResizeType.FIT_BOTH).frame
    assert frame_resize.shape[1] == 8
    assert frame_resize.shape[0] == 15
    # Round down
    input_width, input_height = 15, 4
    frame = np.arange(60, dtype=np.uint8).reshape((input_height, input_width))
    frame_resize = Resize(frame) \
        .resize(20, 10, Resize.ResizeType.FIT_BOTH).frame
    assert frame_resize.shape[1] == 20
    assert frame_resize.shape[0] == 5
Esempio n. 11
0
def test_resize_scale():

    input_width, input_height = 5, 10
    frame = np.arange(50, dtype=np.uint8).reshape((input_height, input_width))

    # Single integer resize
    resize = Resize(frame).resize(10, 20, Resize.ResizeType.EXACT)
    node = DetectionNode(name="person",
                         coords=rect_to_coords([10, 10, 20, 20]))
    resize.scale_and_offset_detection_nodes([node])
    assert node.bbox == BoundingBox(5, 5, 10, 10)

    # Double integer resize (note that coords are rounded in node.bbox output)
    resize = Resize(frame) \
        .resize(10, 20, Resize.ResizeType.EXACT) \
        .resize(20, 40, Resize.ResizeType.EXACT)
    node = DetectionNode(name="person",
                         coords=rect_to_coords([15, 15, 20, 20]))
    resize.scale_and_offset_detection_nodes([node])
    assert node.bbox == BoundingBox(4, 4, 5, 5)

    # Single crop
    input_width, input_height = 20, 30
    frame = np.arange(600, dtype=np.uint8).reshape((input_height, input_width))
    resize = Resize(frame) \
        .crop(15, 20, Resize.CropPadType.LEFT_TOP)
    node = DetectionNode(name="person",
                         coords=rect_to_coords([15, 15, 20, 20]))
    resize.scale_and_offset_detection_nodes([node])
    assert node.bbox == BoundingBox(20, 25, 25, 30)

    # Two affecting crops plus one that should not change the offset
    input_width, input_height = 20, 30
    frame = np.arange(600, dtype=np.uint8).reshape((input_height, input_width))
    resize = Resize(frame) \
        .crop(15, 20, Resize.CropPadType.LEFT_TOP) \
        .crop(10, 15, Resize.CropPadType.RIGHT_BOTTOM) \
        .crop(8, 5, Resize.CropPadType.ALL)
    node = DetectionNode(name="person",
                         coords=rect_to_coords([15, 15, 20, 20]))
    resize.scale_and_offset_detection_nodes([node])
    assert node.bbox == BoundingBox(21, 30, 26, 35)

    # Crop then resize
    input_width, input_height = 20, 30
    frame = np.arange(600, dtype=np.uint8).reshape((input_height, input_width))
    resize = Resize(frame) \
        .crop(15, 20, Resize.CropPadType.LEFT_TOP) \
        .resize(30, 40, Resize.ResizeType.EXACT)
    node = DetectionNode(name="person",
                         coords=rect_to_coords([15, 15, 20, 20]))
    resize.scale_and_offset_detection_nodes([node])
    assert node.bbox == BoundingBox(13, 18, 15, 20)

    # Resize then crop
    input_width, input_height = 20, 30
    frame = np.arange(600, dtype=np.uint8).reshape((input_height, input_width))
    resize = Resize(frame) \
        .resize(30, 40, Resize.ResizeType.EXACT) \
        .crop(15, 20, Resize.CropPadType.LEFT_TOP)
    node = DetectionNode(name="person",
                         coords=rect_to_coords([15, 15, 20, 20]))
    resize.scale_and_offset_detection_nodes([node])
    assert node.bbox == BoundingBox(20, 26, 23, 30)