Ejemplo n.º 1
0
class Detector(object):
    def __init__(self,
                 model_path=DEFAULT_MODEL_PATH,
                 labels_path=DEFAULT_LABEL_PATH):

        self.labels = self.load_labels(labels_path)
        self.interpreter = Interpreter(model_path)
        self.interpreter.allocate_tensors()

    def set_input_tensor(self, image):
        tensor_index = self.interpreter.get_input_details()[0]["index"]
        input_tensor = self.interpreter.tensor(tensor_index)()[0]
        input_tensor[:, :] = image

    def get_output_tensor(self, index):
        output_details = self.interpreter.get_output_details()[index]
        tensor = np.squeeze(
            self.interpreter.get_tensor(output_details["index"]))
        return tensor

    def detect_objects(self, image, threshold=0.5):
        self.set_input_tensor(image)
        self.interpreter.invoke()

        # Get all output details
        boxes = self.get_output_tensor(0)
        classes = self.get_output_tensor(1)
        scores = self.get_output_tensor(2)
        count = int(self.get_output_tensor(3))
        results = []
        for i in range(count):
            if scores[i] >= threshold:
                result = {
                    'bounding_box': boxes[i],
                    'class_id': classes[i],
                    'class': self.labels[classes[i]],
                    'score': scores[i]
                }
                results.append(result)
        print(results)
        return results

    def load_labels(self, path):
        """Loads the labels file. Supports files with or without index numbers."""
        with open(path, 'r', encoding='utf-8') as f:
            lines = f.readlines()
            labels = {}
            for row_number, content in enumerate(lines):
                pair = re.split(r'[:\s]+', content.strip(), maxsplit=1)
                if len(pair) == 2 and pair[0].strip().isdigit():
                    labels[int(pair[0])] = pair[1].strip()
                else:
                    labels[row_number] = pair[0].strip()
            return labels
Ejemplo n.º 2
0
class Detector(object):
    def __init__(self, path_to_model, threshold):
        self.threshold = threshold
        self.__camera = picamera.PiCamera(resolution=(640, 480), framerate=30)
        self.__camera.start_preview()

        self.__is_cat_detected = False

        self.__stream = io.BytesIO()

        self.__interpreter = Interpreter(path_to_model)
        self.__interpreter.allocate_tensors()
        self.__img_input_size = self.__interpreter.get_input_details(
        )[0]['shape'][1:3]

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.__camera.close()

    def check(self):
        result = True
        try:
            self.__stream.seek(0)
            self.__camera.capture(self.__stream, 'jpeg')
            image = Image.open(self.__stream).convert('RGB').resize(
                self.__img_input_size, Image.ANTIALIAS)

            self.__is_cat_detected = self.__is_cat(image)

            self.__stream.seek(0)
            self.__stream.truncate()
        except BaseException as err:
            print(err)
            result = False
        finally:
            self.__camera.stop_preview()
        return result

    def is_detect(self):
        return self.__is_cat_detected

    def __is_cat(self, image):
        tensor_index = self.__interpreter.get_input_details()[0]['index']
        self.__interpreter.tensor(tensor_index)()[0][:, :] = image

        self.__interpreter.invoke()
        output_details = self.__interpreter.get_output_details()[0]
        output = np.squeeze(
            self.__interpreter.get_tensor(output_details['index']))

        return output < self.threshold if self.threshold < 0 else output > self.threshold
Ejemplo n.º 3
0
class ImageClassifier:
    def __init__(self):
        try:
            self.interpreter = Interpreter(model_path='model.tflite')
            self.interpreter.allocate_tensors()
            _, self.height, self.width, _ = self.interpreter.get_input_details(
            )[0]['shape']
            print('model successfully loaded')
        except Exception as e:
            print("error:", e)
            return

    def get_output_tensor(self, index):
        """Returns the output tensor at the given index."""
        output_details = self.interpreter.get_output_details()[index]
        tensor = np.squeeze(
            self.interpreter.get_tensor(output_details['index']))

        # # If the model is quantized (uint8 data), then dequantize the results
        # if output_details['dtype'] == np.uint8:
        #     scale, zero_point = output_details['quantization']
        #     tensor = scale * (tensor - zero_point)

        return tensor

    def set_input_tensor(self, image):
        tensor_index = self.interpreter.get_input_details()[0]['index']
        input_tensor = self.interpreter.tensor(tensor_index)()[0]
        input_tensor[:, :] = image

    def __call__(self, stream, top_k=1):
        # start_time = time()
        image = Image.open(stream).convert('RGB').resize(
            (self.width, self.height), Image.ANTIALIAS)
        image = np.array(image).astype('float') / 255.0

        self.set_input_tensor(image)
        self.interpreter.invoke()

        classes = self.get_output_tensor(1)
        scores = self.get_output_tensor(2)

        # print('classes', classes)
        # print('scores', scores)

        index = np.argmax(scores)
        # elapsed_ms = (time() - start_time) * 1000
        return int(classes[index]), scores[index]
Ejemplo n.º 4
0
class Classifier:
    def __init__(self, label_file, model_file):
        self.labels = self.load_labels(label_file)
        self.interpreter = Interpreter(model_file)
        self.interpreter.allocate_tensors()
        _, self.height, self.width, _ = self.interpreter.get_input_details(
        )[0]['shape']

    def load_labels(self, path):
        with open(path, 'r') as f:
            return {i: line.strip() for i, line in enumerate(f.readlines())}

    def set_input_tensor(self, image):
        tensor_index = self.interpreter.get_input_details()[0]['index']
        input_tensor = self.interpreter.tensor(tensor_index)()[0]
        input_tensor[:, :] = image

    def classify(self, original_image, top_k=1):
        start_time = time.time()
        image = cv2.resize(original_image, (self.height, self.width))
        """Returns a sorted array of classification results."""
        self.set_input_tensor(image)
        self.interpreter.invoke()
        output_details = self.interpreter.get_output_details()[0]
        output = np.squeeze(
            self.interpreter.get_tensor(output_details['index']))

        # If the model is quantized (uint8 data), then dequantize the results
        if output_details['dtype'] == np.uint8:
            scale, zero_point = output_details['quantization']
            output = scale * (output - zero_point)

        ordered = np.argpartition(-output, top_k)
        results = [(i, output[i]) for i in ordered[:top_k]]

        elapsed_ms = (time.time() - start_time) * 1000
        label_id, prob = results[0]
        text = 'Class: %s Confidence: %.2f  TIME: %.1fms' % (
            self.labels[label_id], prob, elapsed_ms)
        cv2.putText(original_image, text, (10, 20), cv2.FONT_HERSHEY_SIMPLEX,
                    self.width / 400, (0, 0, 255), 2, True)

        return cv2.imencode('.jpg', original_image)[1].tobytes()
Ejemplo n.º 5
0
def compute(model_path):
    now = time.monotonic()
    intp = Interpreter(model_path)
    x = intp.tensor(intp.get_input_details()[0]['index'])
    iy = intp.get_output_details()[0]['index']
    intp.allocate_tensors()
    t1 = time.monotonic() - now
    now = time.monotonic()
    
    for i in range(WARMUP):
        #x().fill(CONSTANT)
        x =np.random.rand()
        intp.invoke()
        y = intp.get_tensor(iy)
    t2 = time.monotonic() - now
    now = time.monotonic()
    for i in range(ITER):
        #x().fill(CONSTANT)
        x =np.random.rand()
        intp.invoke()
        y = intp.get_tensor(iy)
    t3 = time.monotonic() - now    
    return t1, t2/float(WARMUP), t3/float(ITER)
Ejemplo n.º 6
0
class Classifier:
    def __init__(self, model, labels):
        self.labels = labels
        self.model = Interpreter(model)

    def set_input_tensor(self, image):
        tensor_index = self.model.get_input_details()[0]['index']
        input_tensor = self.model.tensor(tensor_index)()[0]
        input_tensor[:, :] = image

    def classify_image(self, image, top_k=1):
        """Returns a sorted array of classification results."""
        set_input_tensor(self.model, image)
        self.model.invoke()
        output_details = self.model.get_output_details()[0]
        output = np.squeeze(self.model.get_tensor(output_details['index']))

        # If the model is quantized (uint8 data), then dequantize the results
        if output_details['dtype'] == np.uint8:
            scale, zero_point = output_details['quantization']
            output = scale * (output - zero_point)

        ordered = np.argpartition(-output, top_k)
        return [(i, output[i]) for i in ordered[:top_k]]
Ejemplo n.º 7
0
class Segnet(object):
    def __init__(self, model_file, label_file, overlay):
        self.interpreter = Interpreter(model_file)
        self.interpreter.allocate_tensors()
        _, self.input_height, self.input_width, _ = self.interpreter.get_input_details(
        )[0]['shape']
        self.labels = self.load_labels(label_file)
        self.class_colors = [(random.randint(0, 255), random.randint(0, 255),
                              random.randint(0, 255)) for _ in range(5000)]
        self.legend_img = self.get_legends(self.labels)
        self.overlay = overlay

    def load_labels(self, path):
        with open(path, 'r') as f:
            return [
                line.strip() for line in f.read().replace('"', '').split(',')
            ]

    def preprocess(self, img):
        img = img.astype(np.float32)
        img[:, :, 0] -= 103.939
        img[:, :, 1] -= 116.779
        img[:, :, 2] -= 123.68
        image = img[:, :, ::-1]
        return image

    def set_input_tensor(self, image):
        """Sets the input tensor."""
        tensor_index = self.interpreter.get_input_details()[0]['index']
        input_tensor = self.interpreter.tensor(tensor_index)()[0]
        input_tensor[:, :] = image

    def get_output_tensor(self, index):
        """Returns the output tensor at the given index."""
        output_details = self.interpreter.get_output_details()[index]
        tensor = np.squeeze(
            self.interpreter.get_tensor(output_details['index']))
        return tensor

    def get_legends(self, class_names):
        colors = self.class_colors
        n_classes = len(class_names)
        legend = np.zeros(
            ((len(class_names) * 25) + 25, 125, 3), dtype="uint8") + 255

        for (i, (class_name, color)) in enumerate(zip(class_names, colors)):
            color = [int(c) for c in color]
            cv2.putText(legend, class_name, (5, (i * 25) + 17),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 0), 1)
            cv2.rectangle(legend, (100, (i * 25)), (125, (i * 25) + 25),
                          tuple(color), -1)

        return legend

    def overlay_seg_image(self, inp_img, seg_img):
        orininal_h = inp_img.shape[0]
        orininal_w = inp_img.shape[1]
        seg_img = cv2.resize(seg_img, (orininal_w, orininal_h))

        fused_img = (inp_img / 2 + seg_img / 2).astype('uint8')
        return fused_img

    def concat_lenends(self, seg_img, legend_img):

        new_h = np.maximum(seg_img.shape[0], legend_img.shape[0])
        new_w = seg_img.shape[1] + legend_img.shape[1]

        out_img = np.zeros(
            (new_h, new_w, 3)).astype('uint8') + legend_img[0, 0, 0]

        out_img[:legend_img.shape[0], :legend_img.shape[1]] = np.copy(
            legend_img)
        out_img[:seg_img.shape[0], legend_img.shape[1]:] = np.copy(seg_img)

        return out_img

    def segment_objects(self, image):
        img = cv2.resize(image, (self.input_height, self.input_width))
        img = self.preprocess(img)
        """Returns a list of detection results, each a dictionary of object info."""
        self.set_input_tensor(image)
        self.interpreter.invoke()
        # Get all output details
        seg_arr = self.get_output_tensor(0)
        return seg_arr

    def segment(self, original_image):
        self.output_height, self.output_width = original_image.shape[0:2]
        start_time = time.time()
        image = cv2.resize(original_image,
                           (self.input_width, self.input_height))
        #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        results = self.segment_objects(image)
        elapsed_ms = (time.time() - start_time) * 1000

        fps = 1 / elapsed_ms * 1000
        print("Estimated frames per second : {0:.2f} Inference time: {1:.2f}".
              format(fps, elapsed_ms))

        seg_arr = results.argmax(axis=2)

        output_height = results.shape[0]
        output_width = results.shape[1]

        seg_img = np.zeros((output_height, output_width, 3))

        for c in range(20):
            seg_img[:, :, 0] += ((seg_arr[:, :] == c) *
                                 (self.class_colors[c][0])).astype('uint8')
            seg_img[:, :, 1] += ((seg_arr[:, :] == c) *
                                 (self.class_colors[c][1])).astype('uint8')
            seg_img[:, :, 2] += ((seg_arr[:, :] == c) *
                                 (self.class_colors[c][2])).astype('uint8')

        seg_img = cv2.resize(seg_img, (self.output_width, self.output_height))
        if self.overlay == True:
            seg_img = self.overlay_seg_image(original_image, seg_img)
        seg_img = self.concat_lenends(seg_img, self.legend_img)

        return cv2.imencode('.jpg', seg_img)[1].tobytes()
class PoseEngine:
    """Engine used for pose tasks."""

    def __init__(self, model_path, mirror=False):
        """Creates a PoseEngine with given model.

        Args:
          model_path: String, path to TF-Lite Flatbuffer file.
          mirror: Flip keypoints horizontally

        Raises:
          ValueError: An error occurred when model output is invalid.
        """
        self._mirror = mirror

        edgetpu_delegate = load_delegate(EDGETPU_SHARED_LIB)
        posenet_decoder_delegate = load_delegate(POSENET_SHARED_LIB)
        self._interpreter = Interpreter(
            model_path, experimental_delegates=[edgetpu_delegate, posenet_decoder_delegate])
        self._interpreter.allocate_tensors()
        self._input_tensor_shape = self._interpreter.get_input_details()[0]['shape']
        self._input_details = self._interpreter.get_input_details()
        if (self._input_tensor_shape.size != 4 or
                self._input_tensor_shape[3] != 3 or
                self._input_tensor_shape[0] != 1):
            raise ValueError(
                ('Image model should have input shape [1, height, width, 3]!'
                 ' This model has {}.'.format(self._input_tensor_shape)))
        _, self.image_height, self.image_width, self.image_depth = self._input_tensor_shape


        # Auto-detect stride size
        def calcStride(h,w,L):
          return int((2*h*w)/(math.sqrt(h**2 + 4*h*L*w - 2*h*w + w**2) - h - w))

        details = self._interpreter.get_output_details()[4]
        self.heatmap_zero_point = details['quantization_parameters']['zero_points'][0]
        self.heatmap_scale = details['quantization_parameters']['scales'][0]
        heatmap_size = self._interpreter.tensor(details['index'])().nbytes
        self.stride = calcStride(self.image_height, self.image_width, heatmap_size)
        self.heatmap_size = (self.image_width // self.stride + 1, self.image_height // self.stride + 1)
        details = self._interpreter.get_output_details()[5]
        self.parts_zero_point = details['quantization_parameters']['zero_points'][0]
        self.parts_scale = details['quantization_parameters']['scales'][0]

        print("Heatmap size: ", self.heatmap_size)
        print("Stride: ", self.stride, self.heatmap_size)


    def DetectPosesInImage(self, img):
        """Detects poses in a given image.

           For ideal results make sure the image fed to this function is close to the
           expected input size - it is the caller's responsibility to resize the
           image accordingly.

        Args:
          img: numpy array containing image
        """

        # Extend or crop the input to match the input shape of the network.
        if img.shape[0] < self.image_height or img.shape[1] < self.image_width:
            pads = [[0, max(0, self.image_height - img.shape[0])],
                    [0, max(0, self.image_width - img.shape[1])], [0, 0]]
            img = np.pad(img, pads, mode='constant')
        img = img[0:self.image_height, 0:self.image_width]
        assert (img.shape == tuple(self._input_tensor_shape[1:]))

        # Run the inference (API expects the data to be flattened)
        inference_time, outputs = self.run_inference(img)
        poses = self._parse_poses(outputs)
        heatmap, bodyparts = self._parse_heatmaps(outputs)
        return inference_time, poses, heatmap, bodyparts

    def ParseOutputs(self, outputs):
        poses = self._parse_poses(outputs)
        heatmap, bodyparts = self._parse_heatmaps(outputs)
        return poses, heatmap, bodyparts

    def _parse_poses(self, outputs):
        keypoints = outputs[0].reshape(-1, len(KEYPOINTS), 2)
        keypoint_scores = outputs[1].reshape(-1, len(KEYPOINTS))
        pose_scores = outputs[2].flatten()
        nposes = int(outputs[3][0])

        # Convert the poses to a friendlier format of keypoints with associated
        # scores.
        poses = []
        for pose_i in range(nposes):
            keypoint_dict = {}
            for point_i, point in enumerate(keypoints[pose_i]):
                keypoint = Keypoint(KEYPOINTS[point_i], point,
                                    keypoint_scores[pose_i, point_i])
                if self._mirror: keypoint.yx[1] = self.image_width - keypoint.yx[1]
                keypoint_dict[KEYPOINTS[point_i]] = keypoint
            poses.append(Pose(keypoint_dict, pose_scores[pose_i]))

        return poses

    def softmax(self, y, axis):
        y = y - np.expand_dims(np.max(y, axis = axis), axis)
        y = np.exp(y)
        return y / np.expand_dims(np.sum(y, axis = axis), axis)

    def _parse_heatmaps(self, outputs):
        # Heatmaps are really float32.
        heatmap = (outputs[4].astype(np.float32) - self.heatmap_zero_point) * self.heatmap_scale
        heatmap = np.reshape(heatmap, [self.heatmap_size[1], self.heatmap_size[0]])
        part_heatmap = (outputs[5].astype(np.float32) - self.parts_zero_point) * self.parts_scale
        part_heatmap = np.reshape(part_heatmap, [self.heatmap_size[1], self.heatmap_size[0], -1])
        part_heatmap = self.softmax(part_heatmap, axis=2)
        return heatmap, part_heatmap

    def run_inference(self, input):
        start_time = time.monotonic()
        self._interpreter.set_tensor(self._input_details[0]['index'], np.expand_dims(input, axis=0))
        self._interpreter.invoke()
        duration_ms = (time.monotonic() - start_time) * 1000

        output = []
        for details in self._interpreter.get_output_details():
            tensor = self._interpreter.get_tensor(details['index'])
            output.append(tensor)

        return (duration_ms, output)
class PoseEngine:
    """Engine used for pose tasks."""
    def __init__(self,
                 model_path,
                 mirror=False,
                 offsetRefineStep=2,
                 scoreThreshold=0.8,
                 maxPoseDetections=5,
                 nmsRadius=30,
                 minPoseConfidence=0.15):
        """Creates a PoseEngine with given model.

        Args:
          model_path: String, path to TF-Lite Flatbuffer file.
          mirror: Flip keypoints horizontally

        Raises:
          ValueError: An error occurred when model output is invalid.
        """
        self.interpreter = Interpreter(model_path)
        self.interpreter.allocate_tensors()

        self._mirror = mirror

        self._input_tensor_shape = self.get_input_tensor_shape()
        if (self._input_tensor_shape.size != 4
                or self._input_tensor_shape[3] != 3
                or self._input_tensor_shape[0] != 1):
            raise ValueError(
                ('Image model should have input shape [1, height, width, 3]!'
                 ' This model has {}.'.format(self._input_tensor_shape)))
        _, self.image_height, self.image_width, self.image_depth = self.get_input_tensor_shape(
        )

        self.heatmaps_nx = self.interpreter.get_output_details()[0]['shape'][2]
        self.heatmaps_ny = self.interpreter.get_output_details()[0]['shape'][1]
        self.heatmaps_stride_x = self.getStride(self.image_width,
                                                self.heatmaps_nx)
        self.heatmaps_stride_y = self.getStride(self.image_height,
                                                self.heatmaps_ny)
        self.quant_heatmaps_r, self.quant_heatmaps_off = self.interpreter.get_output_details(
        )[0]['quantization']
        self.quant_offsets_short_r, self.quant_offsets_short_off = self.interpreter.get_output_details(
        )[1]['quantization']
        self.quant_offsets_mid_r, self.quant_offsets_mid_off = self.interpreter.get_output_details(
        )[2]['quantization']

        self.offsetRefineStep = offsetRefineStep
        self.scoreThreshold = scoreThreshold
        self.maxPoseDetections = maxPoseDetections
        self.nmsRadius = nmsRadius
        self.sqRadius = self.nmsRadius * self.nmsRadius
        self.minPoseConfidence = minPoseConfidence

        # The API returns all the output tensors flattened and concatenated. We
        # have to figure out the boundaries from the tensor shapes & sizes.
        offset = 0
        self._output_offsets = [0]
        for size in self.get_all_output_tensors_sizes():
            offset += size
            self._output_offsets.append(offset)

    def getStride(self, l, n):
        strides = (8, 16, 32)
        return strides[np.argmin(np.abs(strides - l / n))]

    def get_input_tensor_shape(self):
        return self.interpreter.get_input_details()[0]['shape']

    def get_all_output_tensors_sizes(self):
        sizes = np.array([], dtype='int32')
        for d in self.interpreter.get_output_details():
            s = np.squeeze(self.interpreter.get_tensor(
                d['index'])).flatten().size
            sizes = np.append(sizes, int(s))
        return sizes

    def DetectPosesInImage(self, img):
        """Detects poses in a given image.

           For ideal results make sure the image fed to this function is close to the
           expected input size - it is the caller's responsibility to resize the
           image accordingly.

        Args:
          img: numpy array containing image
        """

        # Extend or crop the input to match the input shape of the network.
        if img.shape[0] < self.image_height or img.shape[1] < self.image_width:
            img = np.pad(
                img, [[0, max(0, self.image_height - img.shape[0])],
                      [0, max(0, self.image_width - img.shape[1])], [0, 0]],
                mode='constant')
        img = img[0:self.image_height, 0:self.image_width]
        assert (img.shape == tuple(self._input_tensor_shape[1:]))

        # Run the inference (API expects the data to be flattened)
        return self.ParseOutput(self.run_inference(img))

    def run_inference(self, img):
        if img.shape[0] < self.image_height or img.shape[1] < self.image_width:
            img = np.pad(
                img, [[0, max(0, self.image_height - img.shape[0])],
                      [0, max(0, self.image_width - img.shape[1])], [0, 0]],
                mode='constant')
        img = img[0:self.image_height, 0:self.image_width]
        assert (img.shape == tuple(self._input_tensor_shape[1:]))

        tensor_index = self.interpreter.get_input_details()[0]['index']
        input_tensor = self.interpreter.tensor(tensor_index)
        input_tensor()[:, :, :, :] = img
        start_time = time.monotonic()
        self.interpreter.invoke()
        elapsed_ms = (time.monotonic() - start_time) * 1000
        out = np.empty(0)
        for d in self.interpreter.get_output_details():
            o = np.squeeze(self.interpreter.get_tensor(d['index'])).flatten()
            out = np.append(out, o)
        return (elapsed_ms, out)

    def logistic(self, x):
        return 1 / (1 + np.exp(-x))

    def isPeak(self, heatmaps_flat, index):
        maxindex = index // len(KEYPOINTS)
        maxkeypoint = index % len(KEYPOINTS)

        y_index = maxindex // self.heatmaps_nx
        x_index = maxindex % self.heatmaps_nx

        y_index_min = np.max((y_index - 1, 0))
        y_index_max = np.min((y_index + 1, self.heatmaps_ny - 1))
        x_index_min = np.max((x_index - 1, 0))
        x_index_max = np.min((x_index + 1, self.heatmaps_nx - 1))

        for y_current in range(y_index_min, y_index_max + 1):
            for x_current in range(x_index_min, x_index_max + 1):
                index_current = len(KEYPOINTS) * (
                    y_current * self.heatmaps_nx + x_current) + maxkeypoint
                if (heatmaps_flat[index_current] >
                        heatmaps_flat[index]) and (index_current != index):
                    return False
        return True

    def ParseOutput(self, output):
        inference_time, output = output
        outputs = [
            output[i:j]
            for i, j in zip(self._output_offsets, self._output_offsets[1:])
        ]

        heatmaps = outputs[0].reshape(-1, len(KEYPOINTS))
        offsets_short_y = outputs[1].reshape(
            -1, 2 * len(KEYPOINTS))[:, 0:len(KEYPOINTS)]
        offsets_short_x = outputs[1].reshape(
            -1, 2 * len(KEYPOINTS))[:, len(KEYPOINTS):2 * len(KEYPOINTS)]
        offsets_mid_fwd_y = outputs[2].reshape(
            -1, 4 * len(poseChain))[:, 0:len(poseChain)]
        offsets_mid_fwd_x = outputs[2].reshape(
            -1, 4 * len(poseChain))[:, len(poseChain):2 * len(poseChain)]
        offsets_mid_bwd_y = outputs[2].reshape(
            -1, 4 * len(poseChain))[:, 2 * len(poseChain):3 * len(poseChain)]
        offsets_mid_bwd_x = outputs[2].reshape(
            -1, 4 * len(poseChain))[:, 3 * len(poseChain):4 * len(poseChain)]
        heatmaps = self.logistic(
            (heatmaps - self.quant_heatmaps_off) * self.quant_heatmaps_r)
        heatmaps_flat = heatmaps.flatten()
        offsets_short_y = (offsets_short_y - self.quant_offsets_short_off
                           ) * self.quant_offsets_short_r
        offsets_short_x = (offsets_short_x - self.quant_offsets_short_off
                           ) * self.quant_offsets_short_r
        offsets_mid_fwd_y = (offsets_mid_fwd_y - self.quant_offsets_mid_off
                             ) * self.quant_offsets_mid_r
        offsets_mid_fwd_x = (offsets_mid_fwd_x - self.quant_offsets_mid_off
                             ) * self.quant_offsets_mid_r
        offsets_mid_bwd_y = (offsets_mid_bwd_y - self.quant_offsets_mid_off
                             ) * self.quant_offsets_mid_r
        offsets_mid_bwd_x = (offsets_mid_bwd_x - self.quant_offsets_mid_off
                             ) * self.quant_offsets_mid_r

        # Obtaining the peaks of heatmaps larger than scoreThreshold
        orderedindices = np.argsort(heatmaps_flat)[::-1]
        largeheatmaps_indices = np.empty(0, dtype='int32')
        for i in range(len(orderedindices)):
            if heatmaps_flat[orderedindices[i]] < self.scoreThreshold:
                break
            if self.isPeak(heatmaps_flat, orderedindices[i]):
                largeheatmaps_indices = np.append(largeheatmaps_indices,
                                                  orderedindices[i])

        pose_list = np.full(self.maxPoseDetections * 2 * len(KEYPOINTS),
                            0.0,
                            dtype='float32').reshape(-1, len(KEYPOINTS), 2)
        maxindex_list = np.full(self.maxPoseDetections * len(KEYPOINTS),
                                -1,
                                dtype='int32').reshape(-1, len(KEYPOINTS))
        score_list = np.full(self.maxPoseDetections * len(KEYPOINTS),
                             0.0,
                             dtype='float32').reshape(-1, len(KEYPOINTS))
        pose_score_list = np.full(self.maxPoseDetections, 0.0, dtype='float32')

        nPoses = 0
        # obtaining at most maxPoseDetections poses
        for point in range(len(largeheatmaps_indices)):
            if nPoses >= self.maxPoseDetections:
                break

            # obtain a root canidate
            maxindex = largeheatmaps_indices[point] // len(KEYPOINTS)
            maxkeypoint = largeheatmaps_indices[point] % len(KEYPOINTS)
            y = self.heatmaps_stride_y * (maxindex // self.heatmaps_nx)
            x = self.heatmaps_stride_x * (maxindex % self.heatmaps_nx)
            y += offsets_short_y[maxindex, maxkeypoint]
            x += offsets_short_x[maxindex, maxkeypoint]

            # skip keypoint with (x, y) that is close to the existing keypoints
            skip = 0
            for p in range(nPoses):
                y_exist = pose_list[p, maxkeypoint, 0]
                x_exist = pose_list[p, maxkeypoint, 1]
                if (y_exist - y) * (y_exist - y) + (x_exist - x) * (
                        x_exist - x) < self.sqRadius:
                    skip = 1
                    break
            if skip == 1:
                continue

            # setting the maxkeypoint as root
            pose_list[nPoses, maxkeypoint, 0] = y
            pose_list[nPoses, maxkeypoint, 1] = x
            maxindex_list[nPoses, maxkeypoint] = maxindex
            score_list[nPoses, maxkeypoint] = heatmaps[maxindex, maxkeypoint]

            # backward decoding
            for edge in reversed(range(len(poseChain))):
                sourceKeypointId = parentToChildEdges[edge]
                targetKeypointId = childToParentEdges[edge]
                if maxindex_list[nPoses,
                                 sourceKeypointId] != -1 and maxindex_list[
                                     nPoses, targetKeypointId] == -1:
                    maxindex = maxindex_list[nPoses, sourceKeypointId]
                    y = pose_list[nPoses, sourceKeypointId, 0]
                    x = pose_list[nPoses, sourceKeypointId, 1]
                    y += offsets_mid_bwd_y[maxindex, edge]
                    x += offsets_mid_bwd_x[maxindex, edge]

                    y_index = np.clip(round(y / self.heatmaps_stride_y), 0,
                                      self.heatmaps_ny - 1)
                    x_index = np.clip(round(x / self.heatmaps_stride_x), 0,
                                      self.heatmaps_nx - 1)
                    maxindex_list[
                        nPoses,
                        targetKeypointId] = self.heatmaps_nx * y_index + x_index
                    for i in range(self.offsetRefineStep):
                        y_index = np.clip(round(y / self.heatmaps_stride_y), 0,
                                          self.heatmaps_ny - 1)
                        x_index = np.clip(round(x / self.heatmaps_stride_x), 0,
                                          self.heatmaps_nx - 1)
                        maxindex_list[
                            nPoses,
                            targetKeypointId] = self.heatmaps_nx * y_index + x_index
                        y = self.heatmaps_stride_y * y_index
                        x = self.heatmaps_stride_x * x_index
                        y += offsets_short_y[maxindex_list[nPoses,
                                                           targetKeypointId],
                                             targetKeypointId]
                        x += offsets_short_x[maxindex_list[nPoses,
                                                           targetKeypointId],
                                             targetKeypointId]

                    pose_list[nPoses, targetKeypointId, 0] = y
                    pose_list[nPoses, targetKeypointId, 1] = x
                    score_list[nPoses, targetKeypointId] = heatmaps[
                        maxindex_list[nPoses,
                                      targetKeypointId], targetKeypointId]

            # forward decoding
            for edge in range(len(poseChain)):
                sourceKeypointId = childToParentEdges[edge]
                targetKeypointId = parentToChildEdges[edge]
                if maxindex_list[nPoses,
                                 sourceKeypointId] != -1 and maxindex_list[
                                     nPoses, targetKeypointId] == -1:
                    maxindex = maxindex_list[nPoses, sourceKeypointId]
                    y = pose_list[nPoses, sourceKeypointId, 0]
                    x = pose_list[nPoses, sourceKeypointId, 1]
                    y += offsets_mid_fwd_y[maxindex, edge]
                    x += offsets_mid_fwd_x[maxindex, edge]

                    y_index = np.clip(round(y / self.heatmaps_stride_y), 0,
                                      self.heatmaps_ny - 1)
                    x_index = np.clip(round(x / self.heatmaps_stride_x), 0,
                                      self.heatmaps_nx - 1)
                    maxindex_list[
                        nPoses,
                        targetKeypointId] = self.heatmaps_nx * y_index + x_index
                    for i in range(self.offsetRefineStep):
                        y_index = np.clip(round(y / self.heatmaps_stride_y), 0,
                                          self.heatmaps_ny - 1)
                        x_index = np.clip(round(x / self.heatmaps_stride_x), 0,
                                          self.heatmaps_nx - 1)
                        maxindex_list[
                            nPoses,
                            targetKeypointId] = self.heatmaps_nx * y_index + x_index
                        y = self.heatmaps_stride_y * y_index
                        x = self.heatmaps_stride_x * x_index
                        y += offsets_short_y[maxindex_list[nPoses,
                                                           targetKeypointId],
                                             targetKeypointId]
                        x += offsets_short_x[maxindex_list[nPoses,
                                                           targetKeypointId],
                                             targetKeypointId]

                    pose_list[nPoses, targetKeypointId, 0] = y
                    pose_list[nPoses, targetKeypointId, 1] = x
                    score_list[nPoses, targetKeypointId] = heatmaps[
                        maxindex_list[nPoses,
                                      targetKeypointId], targetKeypointId]

            # calclate pose score
            score = 0
            for k in range(len(KEYPOINTS)):
                y = pose_list[nPoses, k, 0]
                x = pose_list[nPoses, k, 1]
                closekeypoint_exists = False
                for p in range(nPoses):
                    y_exist = pose_list[p, k, 0]
                    x_exist = pose_list[p, k, 1]
                    if (y_exist - y) * (y_exist - y) + (x_exist - x) * (
                            x_exist - x) < self.sqRadius:
                        closekeypoint_exists = True
                        break
                if not closekeypoint_exists:
                    score += score_list[nPoses, k]
            score /= len(KEYPOINTS)

            if score > self.minPoseConfidence:
                pose_score_list[nPoses] = score
                nPoses += 1
            else:
                for k in range(len(KEYPOINTS)):
                    maxindex_list[nPoses, k] = -1

        # Convert the poses to a friendlier format of keypoints with associated
        # scores.
        poses = []
        for pose_i in range(nPoses):
            keypoint_dict = {}
            for point_i, point in enumerate(pose_list[pose_i]):
                keypoint = Keypoint(KEYPOINTS[point_i], point,
                                    score_list[pose_i, point_i])
                if self._mirror:
                    keypoint.yx[1] = self.image_width - keypoint.yx[1]
                keypoint_dict[KEYPOINTS[point_i]] = keypoint
            poses.append(Pose(keypoint_dict, pose_score_list[pose_i]))

        return poses, inference_time
Ejemplo n.º 10
0
class PoseEngine():
    """Engine used for pose tasks."""
    def __init__(self, model_path, mirror=False):
        """Creates a PoseEngine with given model.

        Args:
          model_path: String, path to TF-Lite Flatbuffer file.
          mirror: Flip keypoints horizontally.

        Raises:
          ValueError: An error occurred when model output is invalid.
        """
        edgetpu_delegate = load_delegate(EDGETPU_SHARED_LIB)
        posenet_decoder_delegate = load_delegate(POSENET_SHARED_LIB)
        self._interpreter = Interpreter(model_path,
                                        experimental_delegates=[
                                            edgetpu_delegate,
                                            posenet_decoder_delegate
                                        ])
        self._interpreter.allocate_tensors()

        self._mirror = mirror

        self._input_tensor_shape = self.get_input_tensor_shape()
        if (self._input_tensor_shape.size != 4
                or self._input_tensor_shape[3] != 3
                or self._input_tensor_shape[0] != 1):
            raise ValueError(
                ('Image model should have input shape [1, height, width, 3]!'
                 ' This model has {}.'.format(self._input_tensor_shape)))
        _, self._input_height, self._input_width, self._input_depth = self.get_input_tensor_shape(
        )
        self._input_type = self._interpreter.get_input_details()[0]['dtype']
        self._inf_time = 0

    def run_inference(self, input_data):
        """Run inference using the zero copy feature from pycoral and returns inference time in ms.
        """
        start = time.monotonic()
        edgetpu.run_inference(self._interpreter, input_data)
        self._inf_time = time.monotonic() - start
        return (self._inf_time * 1000)

    def DetectPosesInImage(self, img):
        """Detects poses in a given image.

           For ideal results make sure the image fed to this function is close to the
           expected input size - it is the caller's responsibility to resize the
           image accordingly.

        Args:
          img: numpy array containing image
        """
        input_details = self._interpreter.get_input_details()
        image_width, image_height = img.size
        resized_image = img.resize((self._input_width, self._input_height),
                                   Image.NEAREST)
        input_data = np.expand_dims(resized_image, axis=0)
        if self._input_type is np.float32:
            # Floating point versions of posenet take image data in [-1,1] range.
            input_data = np.float32(resized_image) / 128.0 - 1.0
        else:
            # Assuming to be uint8
            input_data = np.asarray(resized_image)
        self.run_inference(input_data.flatten())
        return self.ParseOutput()

    def get_input_tensor_shape(self):
        """Returns input tensor shape."""
        return self._interpreter.get_input_details()[0]['shape']

    def get_output_tensor(self, idx):
        """Returns output tensor view."""
        return np.squeeze(
            self._interpreter.tensor(
                self._interpreter.get_output_details()[idx]['index'])())

    def ParseOutput(self):
        """Parses interpreter output tensors and returns decoded poses."""
        keypoints = self.get_output_tensor(0)
        keypoint_scores = self.get_output_tensor(1)
        pose_scores = self.get_output_tensor(2)
        num_poses = self.get_output_tensor(3)
        poses = []
        for i in range(int(num_poses)):
            pose_score = pose_scores[i]
            pose_keypoints = {}
            for j, point in enumerate(keypoints[i]):
                y, x = point
                if self._mirror:
                    y = self._input_width - y
                pose_keypoints[KeypointType(j)] = Keypoint(
                    Point(x, y), keypoint_scores[i, j])
            poses.append(Pose(pose_keypoints, pose_score))
        return poses, self._inf_time
Ejemplo n.º 11
0
class Detector(object):
    def __init__(self, label_file, model_file, threshold):
        self._threshold = float(threshold)
        self.labels = self.load_labels(label_file)
        self.interpreter = Interpreter(model_file)
        self.interpreter.allocate_tensors()
        _, self.input_height, self.input_width, _ = self.interpreter.get_input_details(
        )[0]['shape']

    def load_labels(self, path):
        with open(path, 'r') as f:
            return {
                i: line.strip()
                for i, line in enumerate(f.read().replace('"', '').split(','))
            }

    def set_input_tensor(self, image):
        """Sets the input tensor."""
        tensor_index = self.interpreter.get_input_details()[0]['index']
        input_tensor = self.interpreter.tensor(tensor_index)()[0]
        input_tensor[:, :] = image

    def get_output_tensor(self, index):
        """Returns the output tensor at the given index."""
        output_details = self.interpreter.get_output_details()[index]
        tensor = np.squeeze(
            self.interpreter.get_tensor(output_details['index']))
        return tensor

    def detect_objects(self, image):
        """Returns a list of detection results, each a dictionary of object info."""
        self.set_input_tensor(image)
        self.interpreter.invoke()
        # Get all output details
        boxes = self.get_output_tensor(0)
        return boxes

    def detect(self, original_image):
        self.output_width, self.output_height = original_image.shape[0:2]
        start_time = time.time()
        image = cv2.resize(original_image,
                           (self.input_width, self.input_height))
        #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        results = self.detect_objects(image)
        elapsed_ms = (time.time() - start_time) * 1000

        fps = 1 / elapsed_ms * 1000
        print("Estimated frames per second : {0:.2f} Inference time: {1:.2f}".
              format(fps, elapsed_ms))

        def _to_original_scale(boxes):
            minmax_boxes = to_minmax(boxes)
            minmax_boxes[:, 0] *= self.output_width
            minmax_boxes[:, 2] *= self.output_width
            minmax_boxes[:, 1] *= self.output_height
            minmax_boxes[:, 3] *= self.output_height
            return minmax_boxes.astype(np.int)

        boxes, probs = self.run(results)
        print(boxes)
        if len(boxes) > 0:
            boxes = _to_original_scale(boxes)
            original_image = draw_boxes(original_image, boxes, probs,
                                        self.labels)
        return cv2.imencode('.jpg', original_image)[1].tobytes()

    def run(self, netout):
        anchors = [
            0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282,
            3.52778, 9.77052, 9.16828
        ]
        nms_threshold = 0.2
        """Convert Yolo network output to bounding box
        
        # Args
            netout : 4d-array, shape of (grid_h, grid_w, num of boxes per grid, 5 + n_classes)
                YOLO neural network output array
        
        # Returns
            boxes : array, shape of (N, 4)
                coordinate scale is normalized [0, 1]
            probs : array, shape of (N, nb_classes)
        """
        grid_h, grid_w, nb_box = netout.shape[:3]
        boxes = []

        # decode the output by the network
        netout[..., 4] = _sigmoid(netout[..., 4])
        netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(
            netout[..., 5:])
        netout[..., 5:] *= netout[..., 5:] > self._threshold

        for row in range(grid_h):
            for col in range(grid_w):
                for b in range(nb_box):
                    # from 4th element onwards are confidence and class classes
                    classes = netout[row, col, b, 5:]

                    if np.sum(classes) > 0:
                        # first 4 elements are x, y, w, and h
                        x, y, w, h = netout[row, col, b, :4]

                        x = (col + _sigmoid(x)
                             ) / grid_w  # center position, unit: image width
                        y = (row + _sigmoid(y)
                             ) / grid_h  # center position, unit: image height
                        w = anchors[2 * b + 0] * np.exp(
                            w) / grid_w  # unit: image width
                        h = anchors[2 * b + 1] * np.exp(
                            h) / grid_h  # unit: image height
                        confidence = netout[row, col, b, 4]
                        box = BoundBox(x, y, w, h, confidence, classes)
                        boxes.append(box)

        boxes = nms_boxes(boxes, len(classes), nms_threshold, self._threshold)
        boxes, probs = boxes_to_array(boxes)
        return boxes, probs
Ejemplo n.º 12
0
class detector():
    def __init__(self, modelPath = 'objectdetect.tflite', \
                 classLabelPath = 'objectlabelmap.txt', \
                 threshold = 0.7):

        self.modelPath = modelPath
        self.classLabelPath = classLabelPath
        self.inputImg = None
        self.threshold = threshold

        self.labels = {}
        self.load_labels()

        self.interpreter = Interpreter(self.modelPath)
        self.interpreter.allocate_tensors()

    def load_labels(self):
        with open(self.classLabelPath, 'r', encoding='utf-8') as f:
            lines = f.readlines()
            for row_number, content in enumerate(lines):
                pair = re.split(r'[:\s]+', content.strip(), maxsplit=1)
                if len(pair) == 2 and pair[0].strip().isdigit():
                    self.labels[int(pair[0])] = pair[1].strip()
                else:
                    self.labels[row_number] = pair[0].strip()

    def setTensors(self, Img):
        """ Input argument Img MUST BE RESIZED before passing into function."""
        self.inputImg = Img
        """Set input tensor as Img"""
        tensor_index = self.interpreter.get_input_details()[0]['index']
        input_tensor = self.interpreter.tensor(tensor_index)()[0]
        input_tensor[:, :] = self.inputImg

    def get_output_tensor(self, index):
        """Get output from tensor given an index"""
        output_details = self.interpreter.get_output_details()[index]
        tensor = np.squeeze(
            self.interpreter.get_tensor(output_details['index']))
        return tensor

    def detect_objects(self):
        """Returns a list of detection results, each a dictionary of object info."""
        self.interpreter.invoke()

        # Get all output details
        boxes = self.get_output_tensor(0)
        classes = self.get_output_tensor(1)
        scores = self.get_output_tensor(2)
        count = int(self.get_output_tensor(3))

        results = []
        items = (0, 1, 2, 3, 5, 6, 7, 9, 10, 11)
        for i in range(count):
            if scores[i] >= self.threshold and int(classes[i]) in items:
                result = {'bounding_box': boxes[i], \
                          'class_id': int(classes[i]), \
                          'score': scores[i] \
                          }
                results.append(result)
        return results
Ejemplo n.º 13
0
class ObjDetect:
    _labels = None
    _interpreter = None
    _threshold = 0.5
    _input_width = 0
    _input_height = 0
    _jpeg_quality = 95  # OpenCV default value

    def __init__(self, model_file, label_file, threshold, jpeg_quality):
        self._threshold = threshold
        self._labels = self.load_labels(label_file)
        self._interpreter = Interpreter(model_file)
        self._interpreter.allocate_tensors()
        _, self._input_width, self._input_height, _ = self._interpreter.get_input_details(
        )[0]['shape']
        self._jpeg_quality = jpeg_quality

    def Detect(self, image_bytes, detect_list):
        original_image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
        image = original_image.resize((self._input_width, self._input_height),
                                      Image.ANTIALIAS)
        results = self.detect_objects(image)
        CAMERA_WIDTH, CAMERA_HEIGHT = original_image.size
        draw = ImageDraw.Draw(original_image)
        has_objects = False
        for obj in results:
            # Check class_name is in detect_list?
            class_name = self._labels[obj['class_id']]
            if class_name not in detect_list:
                # Skip draw bouding box
                continue
            else:
                # Set hasObjects to True
                has_objects = True

            # Convert the bounding box figures from relative coordinates
            # to absolute coordinates based on the original resolution
            ymin, xmin, ymax, xmax = obj['bounding_box']
            xmin = int(xmin * CAMERA_WIDTH)
            xmax = int(xmax * CAMERA_WIDTH)
            ymin = int(ymin * CAMERA_HEIGHT)
            ymax = int(ymax * CAMERA_HEIGHT)

            # Overlay the box, label, and score on the camera preview
            score = obj['score'] * 100
            draw.rectangle([xmin, ymin, xmax, ymax], outline="red")
            draw.text([xmin, ymax],
                      '{}:{:.2f}%'.format(self._labels[obj['class_id']],
                                          score),
                      fill="red")
        del draw

        if has_objects:
            image_bytes_array = io.BytesIO()
            original_image.save(image_bytes_array,
                                format='JPEG',
                                quality=self._jpeg_quality,
                                subsampling=0)
            return image_bytes_array.getvalue()
        else:
            return None

    def load_labels(self, path):
        """Loads the labels file. Supports files with or without index numbers."""
        with open(path, 'r', encoding='utf-8') as f:
            lines = f.readlines()
            labels = {}
            for row_number, content in enumerate(lines):
                pair = re.split(r'[:\s]+', content.strip(), maxsplit=1)
                if len(pair) == 2 and pair[0].strip().isdigit():
                    labels[int(pair[0])] = pair[1].strip()
                else:
                    labels[row_number] = pair[0].strip()
        return labels

    def set_input_tensor(self, image):
        """Sets the input tensor."""
        tensor_index = self._interpreter.get_input_details()[0]['index']
        input_tensor = self._interpreter.tensor(tensor_index)()[0]
        input_tensor[:, :] = image

    def get_output_tensor(self, index):
        """Returns the output tensor at the given index."""
        output_details = self._interpreter.get_output_details()[index]
        tensor = np.squeeze(
            self._interpreter.get_tensor(output_details['index']))
        return tensor

    def detect_objects(self, image):
        """Returns a list of detection results, each a dictionary of object info."""
        self.set_input_tensor(image)
        self._interpreter.invoke()

        # Get all output details
        boxes = self.get_output_tensor(0)
        classes = self.get_output_tensor(1)
        scores = self.get_output_tensor(2)
        count = int(self.get_output_tensor(3))

        results = []
        for i in range(count):
            if scores[i] >= self._threshold:
                result = {
                    'bounding_box': boxes[i],
                    'class_id': classes[i],
                    'score': scores[i]
                }
                results.append(result)
        return results
Ejemplo n.º 14
0
class Detector(object):
    """Detector class which acts as a wrapper for the tensor flow library API"""
    def __init__(self, model, path_to_label_file, threshold=0.4):
        self.interpreter = Interpreter(model)
        self.interpreter.allocate_tensors()
        self.labels = self.load_labels(path_to_label_file)
        self.threshold = threshold
        _, self.input_height, self.input_width, _ = self.interpreter.get_input_details(
        )[0]['shape']

    @staticmethod
    def load_labels(path):
        """Loads the labels file. Supports files with or without index numbers."""
        with open(path, 'r', encoding='utf-8') as f:
            lines = f.readlines()
            labels = {}
            for row_number, content in enumerate(lines):
                pair = re.split(r'[:\s]+', content.strip(), maxsplit=1)
                if len(pair) == 2 and pair[0].strip().isdigit():
                    labels[int(pair[0])] = pair[1].strip()
                else:
                    labels[row_number] = pair[0].strip()
        return labels

    def set_input_tensor(self, image):
        """Sets the input tensor."""
        tensor_index = self.interpreter.get_input_details()[0]['index']
        input_tensor = self.interpreter.tensor(tensor_index)()[0]
        input_tensor[:, :] = image

    def get_output_tensor(self, index):
        """Returns the output tensor at the given index."""
        output_details = self.interpreter.get_output_details()[index]
        tensor = np.squeeze(
            self.interpreter.get_tensor(output_details['index']))
        return tensor

    def detect_objects(self, image):
        """Returns a list of detection results, each a dictionary of object info."""
        self.set_input_tensor(image)
        self.interpreter.invoke()

        # Get all output details
        boxes = self.get_output_tensor(0)
        classes = self.get_output_tensor(1)
        scores = self.get_output_tensor(2)
        count = int(self.get_output_tensor(3))

        results = []
        for i in range(count):
            if scores[i] >= self.threshold and classes[i] == 0:
                result = {
                    'bounding_box': boxes[i],
                    'class_id': classes[i],
                    'score': scores[i]
                }
                results.append(result)
        return results

    def annotate_objects(self, annotator, results):
        """Draws the bounding box and label for each object in the results."""
        for obj in results:
            # Convert the bounding box figures from relative coordinates
            # to absolute coordinates based on the original resolution
            ymin, xmin, ymax, xmax = obj['bounding_box']
            xmin = int(xmin * CAMERA_WIDTH)
            xmax = int(xmax * CAMERA_WIDTH)
            ymin = int(ymin * CAMERA_HEIGHT)
            ymax = int(ymax * CAMERA_HEIGHT)

            # Overlay the box, label, and score on the camera preview
            annotator.bounding_box([xmin, ymin, xmax, ymax])
            annotator.text([xmin, ymin], '%s\n%.2f' %
                           (self.labels[obj['class_id']], obj['score']))
for image_path in images:
    image_name = image_path.split('\\')[-1]

    # Load image and resize to expected shape [1xHxWx3]
    image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    imH, imW, _ = image.shape
    image_resized = cv2.resize(image_rgb, (width, height))
    input_data = np.expand_dims(image_resized, axis=0)

    # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
    if floating_model:
        input_data = (np.float32(input_data) - input_mean) / input_std

    # Perform the actual detection by running the model with the image as input
    interpreter.tensor(interpreter.get_input_details()[0]["index"])
    interpreter.set_tensor(input_details[0]["index"], input_data)
    interpreter.invoke()

    # Retrieve detection results
    boxes = interpreter.get_tensor(output_details[0]["index"])[0]
    # Bounding box coordinates of detected objects
    classes = interpreter.get_tensor(output_details[1]["index"])[0]
    # Class index of detected objects
    scores = interpreter.get_tensor(output_details[2]["index"])[0]
    # Confidence of detected objects
    # Total number of detected objects (inaccurate and not needed)
    num = interpreter.get_tensor(output_details[3]['index'])[0]

    # Loop over all detections and draw detection box if confidence is above minimum threshold
    for i in range(len(scores)):
Ejemplo n.º 16
0
def set_input_tensor(interpreter: tflite.Interpreter, frame: Image) -> None:
    """Sets the input tensor of the model to the current frame"""
    tensor_index = interpreter.get_input_details()[0]['index']
    input_tensor = interpreter.tensor(tensor_index)()[0]
    input_tensor[:, :] = frame
Ejemplo n.º 17
0
class ImageCapture(Thread):
  def __init__(self):
    super().__init__()

    self.logger = logging.getLogger(__name__)
    self.logger.debug('Init image capture')

    self.WIDTH=640
    self.HEIGHT=480

    # Initialize the camera
    self.camera = PiCamera() 
    # Set the camera resolution
    self.camera.resolution = (self.WIDTH, self.HEIGHT)
    # Set the number of frames per second
    self.camera.framerate = 32 
    # Generates a 3D RGB array and stores it in rawCapture
    self.raw_capture = PiRGBArray(self.camera, size=(self.WIDTH, self.HEIGHT))
    # Wait a certain number of seconds to allow the camera time to warmup
    time.sleep(0.1)

    # load COCO labels
    self.labels = {}
    self.load_labels("./models/coco_labels.txt")
    # init the tf interpreter
    self.interpreter = Interpreter("./models/detect.tflite")
    self.interpreter.allocate_tensors()
    _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape']

    # current image
    self.image=None
    # loop bool
    self.running=True

  def run(self):
    self.logger.debug('starting capture thread')
    while self.running:
      # proces the next camera frame
      self.nextFrame()
      time.sleep(0.05)

  def nextFrame(self):
    self.logger.debug('Capturing next frame')
    # Capture frames continuously from the camera
    self.camera.capture(self.raw_capture, format="bgr", use_video_port=True)
    # analyse the raw image to detect objects
    self.analyse()
    # convert raw image to jpeg    
    res, self.image=cv2.imencode('.JPEG', self.raw_capture.array)
    # Clear the stream in preparation for the next frame
    self.raw_capture.truncate(0)

  def getEncodedImage(self):
    self.logger.debug('Returning current jpeg image base64 encoded')
    if self.image is not None:
      return base64.b64encode(self.image.tobytes()).decode('utf-8')
    return None

  def analyse(self):
    self.logger.debug('analyse raw frame for common objects')
    # resize the image
    resized = cv2.resize(self.raw_capture.array, (self.input_width,self.input_height), interpolation = cv2.INTER_AREA)
    results = self.detect_objects(resized, 0.4)
    self.annotate_objects(results)

  def load_labels(self, path):
    """Loads the labels file. Supports files with or without index numbers."""
    self.logger.debug('loading labels from '+path)
    with open(path, 'r', encoding='utf-8') as f:
      lines = f.readlines()
      for row_number, content in enumerate(lines):
        pair = re.split(r'[:\s]+', content.strip(), maxsplit=1)
        if len(pair) == 2 and pair[0].strip().isdigit():
          self.labels[int(pair[0])] = pair[1].strip()
        else:
          self.labels[row_number] = pair[0].strip()

  def set_input_tensor(self, image):
    """Sets the input tensor."""
    self.logger.debug('setting input tensor')
    tensor_index = self.interpreter.get_input_details()[0]['index']
    input_tensor = self.interpreter.tensor(tensor_index)()[0]
    input_tensor[:, :] = image

  def get_output_tensor(self, index):
    """Returns the output tensor at the given index."""
    self.logger.debug('getting output tensor')
    output_details = self.interpreter.get_output_details()[index]
    tensor = np.squeeze(self.interpreter.get_tensor(output_details['index']))
    return tensor

  def detect_objects(self, image, threshold):
    """Returns a list of detection results, each a dictionary of object info."""
    self.logger.debug('starting to detect objects')

    self.set_input_tensor(image)
    self.interpreter.invoke()

    # Get all output details
    boxes = self.get_output_tensor(0)
    classes = self.get_output_tensor(1)
    scores = self.get_output_tensor(2)
    count = int(self.get_output_tensor(3))

    results = []
    for i in range(count):
      if scores[i] >= threshold:
        result = {
            'bounding_box': boxes[i],
            'class_id': classes[i],
            'score': scores[i]
        }
        results.append(result)
    return results

  def annotate_objects(self, results):
    """Draws the bounding box and label for each object in the results."""
    self.logger.debug('annotate objects')
    for obj in results:
      # Convert the bounding box figures from relative coordinates
      # to absolute coordinates based on the original resolution
      ymin, xmin, ymax, xmax = obj['bounding_box']
      xmin = int(xmin * self.WIDTH)
      xmax = int(xmax * self.WIDTH)
      ymin = int(ymin * self.HEIGHT)
      ymax = int(ymax * self.HEIGHT)

      cv2.rectangle(self.raw_capture.array, (xmin,ymin), (xmax, ymax), (0,255,0), 2)
      cv2.putText(self.raw_capture.array, self.labels[obj['class_id']], (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)