# Grab frame from video stream
    frame1 = videostream.read()
    # Acquire frame and resize to expected shape [1xHxWx3]
    frame = frame1.copy()
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame_resized = cv2.resize(frame_rgb, (width, height))
    input_data = np.expand_dims(frame_resized, axis=0)

    # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
    if floating_model:
        input_data = (np.float32(input_data) - input_mean) / input_std

    # Perform the actual detection by running the model with the image as input
    interpreter.set_tensor(input_details[0]['index'], input_data)
    interpreter.invoke()

    # Retrieve detection results
    boxes = interpreter.get_tensor(output_details[0]['index'])[
        0]  # Bounding box coordinates of detected objects
    classes = interpreter.get_tensor(
        output_details[1]['index'])[0]  # Class index of detected objects
    scores = interpreter.get_tensor(
        output_details[2]['index'])[0]  # Confidence of detected objects
    #num = interpreter.get_tensor(output_details[3]['index'])[0]  # Total number of detected objects (inaccurate and not needed)

    # Loop over all detections and draw detection box if confidence is above minimum threshold
    for i in range(len(scores)):
        if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):

            # Get bounding box coordinates and draw box
def main():
    # If tensorflow is not installed, import interpreter from tflite_runtime, else import from regular tensorflow
    pkg = importlib.util.find_spec('tensorflow')
    if pkg is None:
        from tflite_runtime.interpreter import Interpreter
    else:
        from tensorflow.lite.python.interpreter import Interpreter

    args = getParameters()
    MODEL_NAME = args.modeldir
    GRAPH_NAME = args.graph
    LABELMAP_NAME = args.labels
    SLEEP_TIME = args.sleep
    CAMERA_IP = args.cameraip
    SHOW_LOG = args.showlog
    MIN_CONF_THRESHOLD = args.threshold

    resW, resH = args.resolution.split('x')
    imW, imH = int(resW), int(resH)

    # Get path to current working directory
    CWD_PATH = os.getcwd()

    # Path to .tflite file, which contains the model that is used for object detection
    PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME)

    # Path to label map file
    PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME)

    # Load the label map
    with open(PATH_TO_LABELS, 'r') as f:
        labels = [line.strip() for line in f.readlines()]

    # Have to do a weird fix for label map if using the COCO "starter model" from
    # https://www.tensorflow.org/lite/models/object_detection/overview
    # First label is '???', which has to be removed.
    if labels[0] == '???':
        del(labels[0])

    # Load the Tensorflow Lite model and get details
    interpreter = Interpreter(model_path=PATH_TO_CKPT)
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    height = input_details[0]['shape'][1]
    width = input_details[0]['shape'][2]

    floating_model = (input_details[0]['dtype'] == np.float32)

    input_mean = 127.5
    input_std = 127.5

    # Initialize frame rate calculation
    freq = cv2.getTickFrequency()

    # Initialize video stream
    videostream = VideoStream(
        resolution=(imW, imH), framerate=30, camera_ip=CAMERA_IP).start()
    time.sleep(1)

    # Start time
    startTime = time.time()

    # Mean of people
    people_mean = 0
    while True:
        # Grab frame from video stream
        frame1 = videostream.read()

        # Acquire frame and resize to expected shape [1xHxWx3]
        frame = frame1.copy()
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame_resized = cv2.resize(frame_rgb, (width, height))
        input_data = np.expand_dims(frame_resized, axis=0)

        # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
        if floating_model:
            input_data = (np.float32(input_data) - input_mean) / input_std

        # Perform the actual detection by running the model with the image as input
        interpreter.set_tensor(input_details[0]['index'], input_data)
        interpreter.invoke()

        # Retrieve detection results
        # Bounding box coordinates of detected objects
        boxes = interpreter.get_tensor(output_details[0]['index'])[0]
        classes = interpreter.get_tensor(output_details[1]['index'])[
            0]  # Class index of detected objects
        scores = interpreter.get_tensor(output_details[2]['index'])[
            0]  # Confidence of detected objects

        # Current people detected
        current_num_people = 0

        # Loop over all detections and draw detection box if confidence is above minimum threshold
        for i in range(len(scores)):
            if scores[i] > MIN_CONF_THRESHOLD and scores[i] <= 1.0 and labels[int(classes[i])] == 'person':
                current_num_people += 1

        # Update people mean
        people_mean = (people_mean + current_num_people) / 2

        if SHOW_LOG:
            print("Current people mean: " + str(people_mean))

        # If should send information to Firebase
        if time.time() - startTime > SLEEP_TIME:
            # Update start time
            startTime = time.time()
            sendDataToFirebase(round(people_mean))

    # Clean up
    videostream.stop()
Ejemplo n.º 3
0
class Detector:
    """
    Perform object detection with the given model. The model is a quantized tflite
    file which if the detector can not find it at the path it will download it
    from neuralet repository automatically.

    :param config: Is a ConfigEngine instance which provides necessary parameters.
    """

    def __init__(self, config, model_name, variables):
        self.config = config
        self.model_name = model_name
        self.model_variables = variables
        # Frames Per Second
        self.fps = None
        self.model_file = 'ped_ssd_mobilenet_v2_quantized_edgetpu.tflite'
        self.model_path = '/repo/data/edgetpu/' + self.model_file

        # Get the model .tflite file path from the config.
        # If there is no .tflite file in the path it will be downloaded automatically from base_url
        user_model_path = self.model_variables['ModelPath']
        if len(user_model_path) > 0:
            print('using %s as model' % user_model_path)
            self.model_path = user_model_path
        else:
            base_url = 'https://media.githubusercontent.com/media/neuralet/neuralet-models/master/edge-tpu/'
            url = base_url + self.model_name + '/' + self.model_file

            if not os.path.isfile(self.model_path):
                print('model does not exist under: ', self.model_path, 'downloading from ', url)
                wget.download(url, self.model_path)

        # Load TFLite model and allocate tensors
        device_id = self.config.get_section_dict("Detector").get("DeviceId")
        if device_id:
            self.interpreter = Interpreter(
                self.model_path, experimental_delegates=[load_delegate("libedgetpu.so.1", options={"device": device_id})]
            )
        else:
            self.interpreter = Interpreter(self.model_path, experimental_delegates=[load_delegate("libedgetpu.so.1")])

        self.interpreter.allocate_tensors()
        # Get the model input and output tensor details
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()

        # Get class id from config
        self.class_id = int(self.model_variables['ClassID'])
        self.score_threshold = float(self.model_variables['MinScore'])

    def inference(self, resized_rgb_image):
        """
        inference function sets input tensor to input image and gets the output.
        The interpreter instance provides corresponding detection output which is used for creating result
        Args:
            resized_rgb_image: uint8 numpy array with shape (img_height, img_width, channels)

        Returns:
            result: a dictionary contains of [{"id": 0, "bbox": [x1, y1, x2, y2], "score":s%}, {...}, {...}, ...]
        """
        input_image = np.expand_dims(resized_rgb_image, axis=0)
        # Fill input tensor with input_image
        self.interpreter.set_tensor(self.input_details[0]["index"], input_image)
        t_begin = time.perf_counter()
        self.interpreter.invoke()
        inference_time = time.perf_counter() - t_begin  # Second
        self.fps = convert_infr_time_to_fps(inference_time)
        # The function `get_tensor()` returns a copy of the tensor data.
        # Use `tensor()` in order to get a pointer to the tensor.
        boxes = self.interpreter.get_tensor(self.output_details[0]['index'])
        labels = self.interpreter.get_tensor(self.output_details[1]['index'])
        scores = self.interpreter.get_tensor(self.output_details[2]['index'])
        # TODO: will be used for getting number of objects
        # num = self.interpreter.get_tensor(self.output_details[3]['index'])

        result = []
        for i in range(boxes.shape[1]):  # number of boxes
            if labels[0, i] == self.class_id and scores[0, i] > self.score_threshold:
                result.append({"id": str(self.class_id) + '-' + str(i), "bbox": boxes[0, i, :], "score": scores[0, i]})

        return result
Ejemplo n.º 4
0
        #         object_name2 = door_labels[int(door_classes[i])] # Look up object name from "labels" array using class index
        #         door_label = '%s: %d%%' % (object_name2, int(door_scores[i]*100)) # Example: 'person: 72%'
        #         door_labelSize, door_baseLine = cv2.getTextSize(door_label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size
        #         door_label_ymin = max(door_ymin, door_labelSize[1] + 10) # Make sure not to draw label too close to top of window
        #         cv2.rectangle(frame, (door_xmin, door_label_ymin-door_labelSize[1]-10), (door_xmin+door_labelSize[0], door_label_ymin+door_baseLine-10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in
        #         cv2.putText(frame, door_label, (door_xmin, door_label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text
        #         '''
        #         object_name2 = door_labels[int(door_classes[i])] # Look up object name from "labels" array using class index
        #         if object_name2=='open':
        #             door=1

        # #loitering
        # loitering = loitering_prediction(loitering_interpreter,loitering_input_details,loitering_output_details,frame,CLASSIFIER_CKPT)

        animal_interpreter.set_tensor(input_details[0]['index'], input_data)
        animal_interpreter.invoke()

        # Retrieve detection results
        animal_boxes = animal_interpreter.get_tensor(
            animal_output_details[0]['index'])[
                0]  # Bounding box coordinates of detected objects
        animal_classes = animal_interpreter.get_tensor(
            animal_output_details[1]['index'])[
                0]  # Class index of detected objects
        animal_scores = animal_interpreter.get_tensor(
            animal_output_details[2]['index'])[
                0]  # Confidence of detected objects
        animal_output_data = animal_interpreter.get_tensor(
            animal_output_details[0]['index'])
        num = animal_interpreter.get_tensor(animal_output_details[3]['index'])[
            0]  # Total number of detected objects (inaccurate and not needed)
if len(sys.argv) < 3:
  print('Usage:', sys.argv[0], '<model_path> <test_image_dir>')
  exit()

model_path = str(sys.argv[1])

# Creates tflite interpreter
if 'edgetpu' in model_path:
  interpreter = Interpreter(model_path, experimental_delegates=[
      load_delegate(EDGETPU_SHARED_LIB)])
else:
  interpreter = Interpreter(model_path)

interpreter.allocate_tensors()
interpreter.invoke()  # warmup
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
width = input_details[0]['shape'][2]
height = input_details[0]['shape'][1]


def run_inference(interpreter, image):
  interpreter.set_tensor(input_details[0]['index'], image)
  interpreter.invoke()
  boxes = interpreter.get_tensor(output_details[0]['index'])[0]
  classes = interpreter.get_tensor(output_details[1]['index'])[0]
  scores = interpreter.get_tensor(output_details[2]['index'])[0]
  # num_detections = interpreter.get_tensor(output_details[3]['index'])[0]
  return boxes, classes, scores
Ejemplo n.º 6
0
class ObjectDetector(object):
    def __init__(self, model_path, label_path, use_coral_flag, use_tpu_flag,
                 res_x, res_y, min_conf_threshold):

        self.res_y = res_y
        self.res_x = res_x
        self.use_coral_flag = use_coral_flag
        if use_coral_flag:
            from edgetpu.detection.engine import DetectionEngine
            from edgetpu.utils import dataset_utils
        self.min_conf_threshold = min_conf_threshold

        # Load the label map
        with open(label_path, 'r') as f:
            self.labels = [line.strip() for line in f.readlines()]

        if self.labels[0] == '???':
            del (self.labels[0])

        if use_tpu_flag:
            self.interpreter = Interpreter(
                model_path=model_path,
                experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
        else:
            self.interpreter = Interpreter(model_path=model_path)

        self.interpreter.allocate_tensors()

        # Get model details
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()
        self.height = self.input_details[0]['shape'][1]
        self.width = self.input_details[0]['shape'][2]

        self.is_floating_model = (self.input_details[0]['dtype'] == np.float32)

        self.input_mean = 127.5
        self.input_std = 127.5

        #Coral
        if use_coral_flag:
            self.engine = DetectionEngine(model_path)
            self.labels = dataset_utils.read_label_file(label_path)
            _, height, width, _ = self.engine.get_input_tensor_shape()

    def apply_coral_model(self, input_data):
        print("here")
        ans = self.engine.detect_with_input_tensor(input_data,
                                                   threshold=0.05,
                                                   top_k=10)
        print("here2")
        for obj in ans:
            if self.labels:
                print(self.labels[obj.label_id])
            print('score = ', obj.score)
            box = obj.bounding_box.flatten().tolist()
            print('box = ', box)

    def apply_tflite_model(self, input_data):
        # Perform the actual detection by running the model with the image as input
        self.interpreter.set_tensor(self.input_details[0]['index'], input_data)
        self.interpreter.invoke()

        # Retrieve detection results
        boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[
            0]  # Bounding box coordinates of detected objects
        classes = self.interpreter.get_tensor(self.output_details[1]['index'])[
            0]  # Class index of detected objects
        scores = self.interpreter.get_tensor(self.output_details[2]['index'])[
            0]  # Confidence of detected objects

        return (boxes, classes, scores)

    def process_frame(self, frame):
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame_resized = cv2.resize(frame_rgb, (self.width, self.height))
        input_data = np.expand_dims(frame_resized, axis=0)

        # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
        if self.is_floating_model:
            input_data = (np.float32(input_data) -
                          self.input_mean) / self.input_std

        if self.use_coral_flag:
            self.apply_coral_model(input_data)
            scores = []
        else:
            (boxes, classes, scores) = self.apply_tflite_model(input_data)

        return (frame, boxes, classes, scores)

    def is_interesting_object(self, scores, classes):
        is_interesting_object = False
        interesting_classes = []
        for i in range(len(scores)):
            if ((scores[i] > self.min_conf_threshold) and (scores[i] <= 1.0)):
                is_interesting_object = True
                interesting_classes.append(self.labels[int(classes[i])])
        return is_interesting_object, interesting_classes

    def draw_frame(self, frame, boxes, classes, scores):
        # Loop over all detections and draw detection box if confidence is above minimum threshold
        for i in range(len(scores)):
            if ((scores[i] > self.min_conf_threshold) and (scores[i] <= 1.0)):

                # Get bounding box coordinates and draw box
                # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
                ymin = int(max(1, (boxes[i][0] * self.res_y)))
                xmin = int(max(1, (boxes[i][1] * self.res_x)))
                ymax = int(min(self.res_y, (boxes[i][2] * self.res_y)))
                xmax = int(min(self.res_x, (boxes[i][3] * self.res_x)))

                cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0),
                              4)

                # Draw label
                object_name = self.labels[int(
                    classes[i]
                )]  # Look up object name from "labels" array using class index
                label = '%s: %d%%' % (object_name, int(scores[i] * 100)
                                      )  # Example: 'person: 72%'
                labelSize, baseLine = cv2.getTextSize(label,
                                                      cv2.FONT_HERSHEY_SIMPLEX,
                                                      0.7, 2)  # Get font size
                label_ymin = max(
                    ymin, labelSize[1] + 10
                )  # Make sure not to draw label too close to top of window
                cv2.rectangle(
                    frame, (xmin, label_ymin - labelSize[1] - 10),
                    (xmin + labelSize[0], label_ymin + baseLine - 10),
                    (255, 255, 255),
                    cv2.FILLED)  # Draw white box to put label text in
                cv2.putText(frame, label, (xmin, label_ymin - 7),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0),
                            2)  # Draw label text
        (flag, encodedImage) = cv2.imencode(".jpg", frame)
        return encodedImage
Ejemplo n.º 7
0
class TFInferenceEngine:
    """Thin wrapper around TFLite Interpreter.

    The official TFLite API is moving fast and still changes frequently.
    This class intends to abstract out underlying TF changes to some extend.

    It dynamically detects if EdgeTPU is available and uses it.
    Otherwise falls back to TFLite Runtime.
    """
    def __init__(self,
                 model=None,
                 labels=None,
                 confidence_threshold=0.8,
                 top_k=10):
        """Create an instance of Tensorflow inference engine.

        :Parameters:
        ----------
        model: dict
            {
                'tflite': path,
                'edgetpu': path,
            }
            Where path is of type string and points to the
            location of frozen graph file (AI model).
        labels : string
            Location of file with model labels.
        confidence_threshold : float
            Inference confidence threshold.
        top_k : type
            Inference top-k threshold.

        """
        assert model
        assert model['tflite'], 'TFLite AI model path required.'
        model_tflite = model['tflite']
        assert os.path.isfile(model_tflite), \
            'TFLite AI model file does not exist: {}' \
            .format(model_tflite)
        self._model_tflite_path = model_tflite
        model_edgetpu = model.get('edgetpu', None)
        if model_edgetpu:
            assert os.path.isfile(model_edgetpu), \
                'EdgeTPU AI model file does not exist: {}' \
                .format(model_edgetpu)
        self._model_edgetpu_path = model_edgetpu
        assert labels, 'AI model labels path required.'
        assert os.path.isfile(labels), \
            'AI model labels file does not exist: {}' \
            .format(labels)
        self._model_labels_path = labels
        self._confidence_threshold = confidence_threshold
        self._top_k = top_k
        log.debug(
            'Loading AI model:\n'
            'TFLite graph: %r\n'
            'EdgeTPU graph: %r\n'
            'Labels %r.'
            'Condidence threshod: %.0f%%'
            'top-k: %d', model_tflite, model_edgetpu, labels,
            confidence_threshold * 100, top_k)
        # EdgeTPU is not available in testing and other environments
        # load dynamically as needed
        #        edgetpu_class = 'DetectionEngine'
        #        module_object = import_module('edgetpu.detection.engine',
        #                                      packaage=edgetpu_class)
        #        target_class = getattr(module_object, edgetpu_class)
        self._tf_interpreter = _get_edgetpu_interpreter(model=model_edgetpu)
        if not self._tf_interpreter:
            log.debug('EdgeTPU not available. Will use TFLite CPU runtime.')
            self._tf_interpreter = Interpreter(model_path=model_tflite)
        assert self._tf_interpreter
        self._tf_interpreter.allocate_tensors()
        # check the type of the input tensor
        self._tf_input_details = self._tf_interpreter.get_input_details()
        self._tf_output_details = self._tf_interpreter.get_output_details()
        self._tf_is_quantized_model = \
            self.input_details[0]['dtype'] != np.float32

    @property
    def input_details(self):
        return self._tf_input_details

    @property
    def output_details(self):
        return self._tf_output_details

    @property
    def is_quantized(self):
        return self._tf_is_quantized_model

    @property
    def labels_path(self):
        """
        Location of labels file.

        :Returns:
        -------
        string
            Path to AI model labels.

        """
        return self._model_labels_path

    @property
    def confidence_threshold(self):
        """
        Inference confidence threshold.

        :Returns:
        -------
        float
            Confidence threshold for inference results.
            Only results at or above
            this threshold should be returned by each engine inference.

        """
        return self._confidence_threshold

    @property
    def top_k(self):
        """
        Inference top-k threshold.

        :Returns:
        -------
        int
            Max number of results to be returned by each inference.
            Ordered by confidence score.

        """
        return self._top_k

    def infer(self):
        """Invoke model inference on current input tensor."""
        return self._tf_interpreter.invoke()

    def set_tensor(self, index=None, tensor_data=None):
        """Set tensor data at given reference index."""
        assert isinstance(index, int)
        self._tf_interpreter.set_tensor(index, tensor_data)

    def get_tensor(self, index=None):
        """Return tensor data at given reference index."""
        assert isinstance(index, int)
        return self._tf_interpreter.get_tensor(index)
class PalmDetection:
    def __init__(self, palm_model_path, anchors_path):
        self.interp_palm = Interpreter(palm_model_path)
        self.interp_palm.allocate_tensors()

        output_details = self.interp_palm.get_output_details()
        input_details = self.interp_palm.get_input_details()

        self.in_idx = input_details[0]['index']
        self.out_reg_idx = output_details[0]['index']
        self.out_clf_idx = output_details[1]['index']
        # reading the SSD anchors
        with open(anchors_path, "r") as csv_f:
            self.anchors = np.r_[[
                x for x in csv.reader(csv_f, quoting=csv.QUOTE_NONNUMERIC)
            ]]

        # 90° rotation matrix used to create the alignment trianlge
        self.R90 = np.r_[[[0, 1], [-1, 0]]]

        # trianlge target coordinates used to move the detected hand
        # into the right position
        self._target_triangle = np.float32([[128, 128], [128, 0], [0, 128]])
        self._target_box = np.float32([
            [0, 0, 1],
            [256, 0, 1],
            [256, 256, 1],
            [0, 256, 1],
        ])

    @staticmethod
    def _sigm(x):
        return 1 / (1 + np.exp(-x))

    @staticmethod
    def _im_normalize(img):
        return np.ascontiguousarray(2 * ((img / 255) - 0.5).astype('float32'))

    def preprocess_img(self, img):
        # fit the image into a 256x256 square
        shape = np.r_[img.shape]
        self.rgb_shape = shape
        pad = (shape.max() - shape[:2]).astype('uint32') // 2
        img_pad = np.pad(img, ((pad[0], pad[0]), (pad[1], pad[1]), (0, 0)),
                         mode='constant')
        img_small = cv2.resize(img_pad, (256, 256))
        img_small = np.ascontiguousarray(img_small)

        img_norm = self._im_normalize(img_small)
        return img_pad, img_norm, pad

    def predict_hand_boxes(self, img_norm, pad):
        self.interp_palm.set_tensor(self.in_idx,
                                    img_norm.reshape(1, 256, 256, 3))
        self.interp_palm.invoke()
        out_reg = self.interp_palm.get_tensor(self.out_reg_idx)[0]  # bbox
        out_clf = self.interp_palm.get_tensor(self.out_clf_idx)[0, :,
                                                                0]  # scores

        detecion_mask = self._sigm(out_clf) > 0.7
        print(np.sum(detecion_mask))
        candidate_detect = out_reg[detecion_mask]
        candidate_anchors = self.anchors[detecion_mask]

        if candidate_detect.shape[0] == 0:
            return None

        keep = nms(candidate_detect, 0.5)
        bboex = []

        for idx in keep:
            dx, dy, w, h = candidate_detect[idx, :4]
            center_wo_offst = candidate_anchors[idx, :2] * 256
            dx += center_wo_offst[0] - pad[1]
            dy += center_wo_offst[1] - pad[0]
            bboex.append((dx, dy, w, h))

        print('keep', keep)
        return bboex

    def __call__(self, img):
        img_pad, img_norm, pad = self.preprocess_img(img)
        return self.predict_hand_boxes(img_norm, pad)
Ejemplo n.º 9
0
class object_detector:
    def __init__(self):
        PATH_TO_CKPT = rospy.get_param("/object_detector/weights_path")
        PATH_TO_LABELS = rospy.get_param("/object_detector/labels_path")
        camera_input = rospy.get_param("/object_detector/cam_feed")
        use_tpu = int(rospy.get_param("/object_detector/tpu"))
        self.min_conf_threshold = float(
            rospy.get_param("/object_detector/threshold"))
        self.imW = int(rospy.get_param("/object_detector/imW"))
        self.imH = int(rospy.get_param("/object_detector/imH"))
        pkg = importlib.util.find_spec('tflite_runtime')
        if pkg:
            from tflite_runtime.interpreter import Interpreter
            if use_tpu:
                from tflite_runtime.interpreter import load_delegate
        else:
            from tensorflow.lite.python.interpreter import Interpreter
            if use_tpu:
                from tensorflow.lite.python.interpreter import load_delegate
        if use_tpu:
            # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite'
            if (GRAPH_NAME == 'detect.tflite'):
                GRAPH_NAME = 'edgetpu.tflite'
        with open(PATH_TO_LABELS, 'r') as f:
            self.labels = [line.strip() for line in f.readlines()]
        if self.labels[0] == '???':
            del (self.labels[0])

        # Load the Tensorflow Lite model.
        # If using Edge TPU, use special load_delegate argument
        if use_tpu:
            self.interpreter = Interpreter(
                model_path=PATH_TO_CKPT,
                experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
        else:
            self.interpreter = Interpreter(model_path=PATH_TO_CKPT)
        self.interpreter.allocate_tensors()

        # Get model details
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()
        self.height = self.input_details[0]['shape'][1]
        self.width = self.input_details[0]['shape'][2]

        self.floating_model = (self.input_details[0]['dtype'] == np.float32)

        self.input_mean = 127.5
        self.input_std = 127.5
        # Initialize frame rate calculation
        self.frame_rate_calc = 1
        self.freq = cv2.getTickFrequency()

        self.image_pub = rospy.Publisher("/detected_image",
                                         Image,
                                         queue_size=10)
        self.bridge = CvBridge()
        self.image_sub = rospy.Subscriber(camera_input, Image, self.callback)

    def callback(self, data):
        t1 = cv2.getTickCount()
        try:
            cv_image = self.bridge.imgmsg_to_cv2(data, "bgr8")
        except CvBridgeError as e:
            print(e)

        frame = cv_image.copy()
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame_resized = cv2.resize(frame_rgb, (self.width, self.height))
        input_data = np.expand_dims(frame_resized, axis=0)

        # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
        if self.floating_model:
            input_data = (np.float32(input_data) -
                          self.input_mean) / self.input_std

        # Perform the actual detection by running the model with the image as input
        self.interpreter.set_tensor(self.input_details[0]['index'], input_data)
        self.interpreter.invoke()
        # Retrieve detection results
        boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[
            0]  # Bounding box coordinates of detected objects
        classes = self.interpreter.get_tensor(self.output_details[1]['index'])[
            0]  # Class index of detected objects
        scores = self.interpreter.get_tensor(self.output_details[2]['index'])[
            0]  # Confidence of detected objects
        #num = interpreter.get_tensor(output_details[3]['index'])[0]  # Total number of detected objects (inaccurate and not needed)

        output = [
            x for x in zip(classes, boxes, scores)
            if x[2] > self.min_conf_threshold and x[2] <= 1.0
        ]

        print("Output", output)
Ejemplo n.º 10
0
class ImageCapture(Thread):
  def __init__(self):
    super().__init__()

    self.logger = logging.getLogger(__name__)
    self.logger.debug('Init image capture')

    self.WIDTH=640
    self.HEIGHT=480

    # Initialize the camera
    self.camera = PiCamera() 
    # Set the camera resolution
    self.camera.resolution = (self.WIDTH, self.HEIGHT)
    # Set the number of frames per second
    self.camera.framerate = 32 
    # Generates a 3D RGB array and stores it in rawCapture
    self.raw_capture = PiRGBArray(self.camera, size=(self.WIDTH, self.HEIGHT))
    # Wait a certain number of seconds to allow the camera time to warmup
    time.sleep(0.1)

    # load COCO labels
    self.labels = {}
    self.load_labels("./models/coco_labels.txt")
    # init the tf interpreter
    self.interpreter = Interpreter("./models/detect.tflite")
    self.interpreter.allocate_tensors()
    _, self.input_height, self.input_width, _ = self.interpreter.get_input_details()[0]['shape']

    # current image
    self.image=None
    # loop bool
    self.running=True

  def run(self):
    self.logger.debug('starting capture thread')
    while self.running:
      # proces the next camera frame
      self.nextFrame()
      time.sleep(0.05)

  def nextFrame(self):
    self.logger.debug('Capturing next frame')
    # Capture frames continuously from the camera
    self.camera.capture(self.raw_capture, format="bgr", use_video_port=True)
    # analyse the raw image to detect objects
    self.analyse()
    # convert raw image to jpeg    
    res, self.image=cv2.imencode('.JPEG', self.raw_capture.array)
    # Clear the stream in preparation for the next frame
    self.raw_capture.truncate(0)

  def getEncodedImage(self):
    self.logger.debug('Returning current jpeg image base64 encoded')
    if self.image is not None:
      return base64.b64encode(self.image.tobytes()).decode('utf-8')
    return None

  def analyse(self):
    self.logger.debug('analyse raw frame for common objects')
    # resize the image
    resized = cv2.resize(self.raw_capture.array, (self.input_width,self.input_height), interpolation = cv2.INTER_AREA)
    results = self.detect_objects(resized, 0.4)
    self.annotate_objects(results)

  def load_labels(self, path):
    """Loads the labels file. Supports files with or without index numbers."""
    self.logger.debug('loading labels from '+path)
    with open(path, 'r', encoding='utf-8') as f:
      lines = f.readlines()
      for row_number, content in enumerate(lines):
        pair = re.split(r'[:\s]+', content.strip(), maxsplit=1)
        if len(pair) == 2 and pair[0].strip().isdigit():
          self.labels[int(pair[0])] = pair[1].strip()
        else:
          self.labels[row_number] = pair[0].strip()

  def set_input_tensor(self, image):
    """Sets the input tensor."""
    self.logger.debug('setting input tensor')
    tensor_index = self.interpreter.get_input_details()[0]['index']
    input_tensor = self.interpreter.tensor(tensor_index)()[0]
    input_tensor[:, :] = image

  def get_output_tensor(self, index):
    """Returns the output tensor at the given index."""
    self.logger.debug('getting output tensor')
    output_details = self.interpreter.get_output_details()[index]
    tensor = np.squeeze(self.interpreter.get_tensor(output_details['index']))
    return tensor

  def detect_objects(self, image, threshold):
    """Returns a list of detection results, each a dictionary of object info."""
    self.logger.debug('starting to detect objects')

    self.set_input_tensor(image)
    self.interpreter.invoke()

    # Get all output details
    boxes = self.get_output_tensor(0)
    classes = self.get_output_tensor(1)
    scores = self.get_output_tensor(2)
    count = int(self.get_output_tensor(3))

    results = []
    for i in range(count):
      if scores[i] >= threshold:
        result = {
            'bounding_box': boxes[i],
            'class_id': classes[i],
            'score': scores[i]
        }
        results.append(result)
    return results

  def annotate_objects(self, results):
    """Draws the bounding box and label for each object in the results."""
    self.logger.debug('annotate objects')
    for obj in results:
      # Convert the bounding box figures from relative coordinates
      # to absolute coordinates based on the original resolution
      ymin, xmin, ymax, xmax = obj['bounding_box']
      xmin = int(xmin * self.WIDTH)
      xmax = int(xmax * self.WIDTH)
      ymin = int(ymin * self.HEIGHT)
      ymax = int(ymax * self.HEIGHT)

      cv2.rectangle(self.raw_capture.array, (xmin,ymin), (xmax, ymax), (0,255,0), 2)
      cv2.putText(self.raw_capture.array, self.labels[obj['class_id']], (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
Ejemplo n.º 11
0
def predict():

    MODEL_NAME = "model"
    GRAPH_NAME = 'glyphs.tflite'
    LABELMAP_NAME = "labelmap.txt"
    min_conf_threshold = float(0.3)
    use_TPU = False

    IM_NAME = None
    IM_DIR = "images"

    if (IM_NAME and IM_DIR):
        print(
            'Error! Please only use the --image argument or the --imagedir argument, not both. Issue "python TFLite_detection_image.py -h" for help.'
        )
        sys.exit()

    if (not IM_NAME and not IM_DIR):
        IM_NAME = 'test1.jpg'

    pkg = importlib.util.find_spec('tensorflow')
    if pkg is None:
        from tflite_runtime.interpreter import Interpreter
        if use_TPU:
            from tflite_runtime.interpreter import load_delegate
    else:
        from tensorflow.lite.python.interpreter import Interpreter
        if use_TPU:
            from tensorflow.lite.python.interpreter import load_delegate

    if use_TPU:
        if (GRAPH_NAME == 'detect.tflite'):
            GRAPH_NAME = 'edgetpu.tflite'

    CWD_PATH = os.getcwd()

    if IM_DIR:
        PATH_TO_IMAGES = os.path.join(CWD_PATH, IM_DIR)
        images = glob.glob(PATH_TO_IMAGES + '/*')

    elif IM_NAME:
        PATH_TO_IMAGES = os.path.join(CWD_PATH, IM_NAME)
        images = glob.glob(PATH_TO_IMAGES)

    PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME)

    PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME)

    with open(PATH_TO_LABELS, 'r') as f:
        labels = [line.strip() for line in f.readlines()]

    if labels[0] == '???':
        del (labels[0])

    if use_TPU:
        interpreter = Interpreter(
            model_path=PATH_TO_CKPT,
            experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
        print(PATH_TO_CKPT)
    else:
        interpreter = Interpreter(model_path=PATH_TO_CKPT)

    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    height = input_details[0]['shape'][1]
    width = input_details[0]['shape'][2]

    floating_model = (input_details[0]['dtype'] == np.float32)

    input_mean = 127.5
    input_std = 127.5

    results = {}

    for image_path in images:

        image = cv2.imread(image_path)
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        imH, imW, _ = image.shape
        image_resized = cv2.resize(image_rgb, (width, height))
        input_data = np.expand_dims(image_resized, axis=0)

        if floating_model:
            input_data = (np.float32(input_data) - input_mean) / input_std

        interpreter.set_tensor(input_details[0]['index'], input_data)
        interpreter.invoke()

        boxes = interpreter.get_tensor(output_details[0]['index'])[0]
        classes = interpreter.get_tensor(output_details[1]['index'])[0]
        scores = interpreter.get_tensor(output_details[2]['index'])[0]
        num = interpreter.get_tensor(output_details[3]['index'])[0]

        results['boxes'] = boxes
        results['classes'] = classes
        results['scores'] = scores
        results['num'] = num

    return results

    cv2.destroyAllWindows()
Ejemplo n.º 12
0
class NanoDetTFLite(object):
    # Constant definition for post process
    STRIDES = (8, 16, 32)
    REG_MAX = 7
    PROJECT = np.arange(REG_MAX + 1)

    # Constant definition for Standardization
    MEAN = np.array([103.53, 116.28, 123.675], dtype=np.float32)
    MEAN = MEAN.reshape(1, 1, 3)
    STD = np.array([57.375, 57.12, 58.395], dtype=np.float32)
    STD = STD.reshape(1, 1, 3)

    def __init__(
        self,
        model_path='model_float16_quant.tflite',
        input_shape=320,
        class_score_th=0.35,
        nms_th=0.6,
        num_threads=1,
    ):
        self.input_shape = (input_shape, input_shape)

        self.class_score_th = class_score_th
        self.nms_th = nms_th

        # load model
        self.interpreter = Interpreter(model_path=model_path,
                                       num_threads=num_threads)
        self.interpreter.allocate_tensors()

        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()

        # Calculate grid points for each stride
        self.grid_points = []
        for index in range(len(self.STRIDES)):
            grid_point = self._make_grid_point(
                (int(self.input_shape[0] / self.STRIDES[index]),
                 int(self.input_shape[1] / self.STRIDES[index])),
                self.STRIDES[index],
            )
            self.grid_points.append(grid_point)

    def inference(self, image):
        temp_image = copy.deepcopy(image)
        image_height, image_width = image.shape[0], image.shape[1]

        # Pre process: Standardization, Reshape
        resize_image, new_height, new_width, top, left = self._resize_image(
            temp_image)
        x = self._pre_process(resize_image)

        # Inference execution
        self.interpreter.set_tensor(self.input_details[0]['index'], x)
        self.interpreter.invoke()

        results = []
        results.append(
            self.interpreter.get_tensor(
                self.output_details[5]['index']))  # cls_pred_stride_8
        results.append(
            self.interpreter.get_tensor(
                self.output_details[4]['index']))  # dis_pred_stride_8
        results.append(
            self.interpreter.get_tensor(
                self.output_details[3]['index']))  # cls_pred_stride_16
        results.append(
            self.interpreter.get_tensor(
                self.output_details[2]['index']))  # dis_pred_stride_16
        results.append(
            self.interpreter.get_tensor(
                self.output_details[1]['index']))  # cls_pred_stride_32
        results.append(
            self.interpreter.get_tensor(
                self.output_details[0]['index']))  # dis_pred_stride_32

        # Post-process: NMS, grid -> coordinate transformation
        bboxes, scores, class_ids = self._post_process(results)

        # Post-process: Convert coordinates to fit image size
        ratio_height = image_height / new_height
        ratio_width = image_width / new_width
        for i in range(bboxes.shape[0]):
            bboxes[i, 0] = max(int((bboxes[i, 0] - left) * ratio_width), 0)
            bboxes[i, 1] = max(int((bboxes[i, 1] - top) * ratio_height), 0)
            bboxes[i, 2] = min(
                int((bboxes[i, 2] - left) * ratio_width),
                image_width,
            )
            bboxes[i, 3] = min(
                int((bboxes[i, 3] - top) * ratio_height),
                image_height,
            )
        return bboxes, scores, class_ids

    def _make_grid_point(self, grid_size, stride):
        grid_height, grid_width = grid_size

        shift_x = np.arange(0, grid_width) * stride
        shift_y = np.arange(0, grid_height) * stride

        xv, yv = np.meshgrid(shift_x, shift_y)
        xv = xv.flatten()
        yv = yv.flatten()

        cx = xv + 0.5 * (stride - 1)
        cy = yv + 0.5 * (stride - 1)

        return np.stack((cx, cy), axis=-1)

    def _resize_image(self, image, keep_ratio=True):
        top, left = 0, 0
        new_height, new_width = self.input_shape[0], self.input_shape[1]

        if keep_ratio and image.shape[0] != image.shape[1]:
            hw_scale = image.shape[0] / image.shape[1]
            if hw_scale > 1:
                new_height = self.input_shape[0]
                new_width = int(self.input_shape[1] / hw_scale)

                resize_image = cv2.resize(
                    image,
                    (new_width, new_height),
                    interpolation=cv2.INTER_AREA,
                )

                left = int((self.input_shape[1] - new_width) * 0.5)

                resize_image = cv2.copyMakeBorder(
                    resize_image,
                    0,
                    0,
                    left,
                    self.input_shape[1] - new_width - left,
                    cv2.BORDER_CONSTANT,
                    value=0,
                )
            else:
                new_height = int(self.input_shape[0] * hw_scale)
                new_width = self.input_shape[1]

                resize_image = cv2.resize(
                    image,
                    (new_width, new_height),
                    interpolation=cv2.INTER_AREA,
                )

                top = int((self.input_shape[0] - new_height) * 0.5)

                resize_image = cv2.copyMakeBorder(
                    resize_image,
                    top,
                    self.input_shape[0] - new_height - top,
                    0,
                    0,
                    cv2.BORDER_CONSTANT,
                    value=0,
                )
        else:
            resize_image = cv2.resize(
                image,
                self.input_shape,
                interpolation=cv2.INTER_AREA,
            )

        return resize_image, new_height, new_width, top, left

    def _pre_process(self, image):
        # Standardization
        image = image.astype(np.float32)
        image = (image - self.MEAN) / self.STD

        # Reshape
        image = image.reshape(-1, self.input_shape[0], self.input_shape[1], 3)

        return image

    def _softmax(self, x, axis=1):
        x_exp = np.exp(x)
        x_sum = np.sum(x_exp, axis=axis, keepdims=True)
        s = x_exp / x_sum
        return s

    def _post_process(self, predict_results):
        class_scores = predict_results[::2]
        bbox_predicts = predict_results[1::2]

        bboxes, scores, class_ids = self._get_bboxes_single(
            class_scores,
            bbox_predicts,
            1,
            rescale=False,
        )

        return bboxes.astype(np.int32), scores, class_ids

    def _get_bboxes_single(
        self,
        class_scores,
        bbox_predicts,
        scale_factor,
        rescale=False,
        topk=1000,
    ):
        bboxes = []
        scores = []

        # Convert bounding box coordinates for each stride
        for stride, class_score, bbox_predict, grid_point in zip(
                self.STRIDES, class_scores, bbox_predicts, self.grid_points):
            # Dimension adjustment
            if class_score.ndim == 3:
                class_score = class_score.squeeze(axis=0)
            if bbox_predict.ndim == 3:
                bbox_predict = bbox_predict.squeeze(axis=0)

            # Convert the bounding box to relative coordinates and relative distance
            bbox_predict = bbox_predict.reshape(-1, self.REG_MAX + 1)
            bbox_predict = self._softmax(bbox_predict, axis=1)
            bbox_predict = np.dot(bbox_predict, self.PROJECT).reshape(-1, 4)
            bbox_predict *= stride

            # Target in descending order of score
            if 0 < topk < class_score.shape[0]:
                max_scores = class_score.max(axis=1)
                topk_indexes = max_scores.argsort()[::-1][0:topk]

                grid_point = grid_point[topk_indexes, :]
                bbox_predict = bbox_predict[topk_indexes, :]
                class_score = class_score[topk_indexes, :]

            # Convert the bounding box to absolute coordinates
            x1 = grid_point[:, 0] - bbox_predict[:, 0]
            y1 = grid_point[:, 1] - bbox_predict[:, 1]
            x2 = grid_point[:, 0] + bbox_predict[:, 2]
            y2 = grid_point[:, 1] + bbox_predict[:, 3]
            x1 = np.clip(x1, 0, self.input_shape[1])
            y1 = np.clip(y1, 0, self.input_shape[0])
            x2 = np.clip(x2, 0, self.input_shape[1])
            y2 = np.clip(y2, 0, self.input_shape[0])
            bbox = np.stack([x1, y1, x2, y2], axis=-1)

            bboxes.append(bbox)
            scores.append(class_score)

        # Scale adjustment
        bboxes = np.concatenate(bboxes, axis=0)
        if rescale:
            bboxes /= scale_factor
        scores = np.concatenate(scores, axis=0)

        # Non-Maximum Suppression
        bboxes_wh = bboxes.copy()
        bboxes_wh[:, 2:4] = bboxes_wh[:, 2:4] - bboxes_wh[:, 0:2]
        class_ids = np.argmax(scores, axis=1)
        scores = np.max(scores, axis=1)

        indexes = cv2.dnn.NMSBoxes(
            bboxes_wh.tolist(),
            scores.tolist(),
            self.class_score_th,
            self.nms_th,
        )

        # Check the number of cases after NMS processing
        if len(indexes) > 0:
            bboxes = bboxes[indexes[:, 0]]
            scores = scores[indexes[:, 0]]
            class_ids = class_ids[indexes[:, 0]]
        else:
            bboxes = np.array([])
            scores = np.array([])
            class_ids = np.array([])

        return bboxes, scores, class_ids
Ejemplo n.º 13
0
class DetectMask():
    def __init__(self):
        print("init")
        self.MODEL_NAME = "D:\TP_PROGS\Projects\TeProjSahara\model_faceMask\FaceMask_tflite"
        self.GRAPH_NAME = "model4.tflite"
        self.LABELMAP_NAME = "lbl.txt"
        self.min_conf_threshold = 0.2
        # use_TPU = args.edgetpu
        # self.listOfObjDetec = []

        # Path to label map file
        self.PATH_TO_LABELS = os.path.join(self.MODEL_NAME, self.LABELMAP_NAME)
        print(self.PATH_TO_LABELS)
        # Path to .tflite file, which contains the model that is used for object detection
        self.PATH_TO_CKPT = os.path.join(self.MODEL_NAME, self.GRAPH_NAME)

        # Load the label map
        with open(self.PATH_TO_LABELS, 'r') as f:
            self.labels = [line.strip() for line in f.readlines()]

        # Have to do a weird fix for label map if using the COCO "starter model" from
        # https://www.tensorflow.org/lite/models/object_detection/overview
        # First label is '???', which has to be removed.
        if self.labels[0] == '???':
            del (self.labels[0])

        # Load the Tensorflow Lite model.
        self.interpreter = Interpreter(model_path=self.PATH_TO_CKPT)
        self.interpreter.allocate_tensors()

        # Get model details
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()
        self.height = self.input_details[0]['shape'][1]
        self.width = self.input_details[0]['shape'][2]

    def masKDetect(self, img):
        # clearing list of prev objs
        # self.listOfObjDetec.clear()
        imH, imW, _ = img.shape
        image_resized = cv2.resize(img, (self.width, self.height))
        input_data = np.expand_dims(image_resized, axis=0)
        self.interpreter.set_tensor(self.input_details[0]['index'], input_data)
        self.interpreter.invoke()

        # results
        boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[
            0]  # Bounding box coordinates of detected objects
        classes = self.interpreter.get_tensor(self.output_details[1]['index'])[
            0]  # Class index of detected objects
        scores = self.interpreter.get_tensor(self.output_details[2]['index'])[
            0]  # Confidence of detected objects
        for i in range(len(scores)):
            if ((scores[i] > self.min_conf_threshold) and (scores[i] <= 1.0)):
                # getting label/class
                object_name = self.labels[int(
                    classes[i]
                )]  # Look up object name from "labels" array using class index

                print("detected:", object_name, ":", int(scores[i] * 100))
                # self.listOfObjDetec.append(object_name)

                # debug
                ymin = int(max(1, (boxes[i][0] * imH)))
                xmin = int(max(1, (boxes[i][1] * imW)))
                ymax = int(min(imH, (boxes[i][2] * imH)))
                xmax = int(min(imW, (boxes[i][3] * imW)))

                cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2)

                # Draw label
                # object_name = labels[int(classes[i])]  # Look up object name from "labels" array using class
                label = '%s: %d%%' % (object_name, int(scores[i] * 100)
                                      )  # Example: 'person: 72%'
                labelSize, baseLine = cv2.getTextSize(label,
                                                      cv2.FONT_HERSHEY_SIMPLEX,
                                                      0.7, 2)  # Get font size
                label_ymin = max(
                    ymin, labelSize[1] + 10
                )  # Make sure not to draw label too close to top of window
                cv2.rectangle(
                    img, (xmin, label_ymin - labelSize[1] - 10),
                    (xmin + labelSize[0], label_ymin + baseLine - 10),
                    (255, 255, 255),
                    cv2.FILLED)  # Draw white box to put label text in
                cv2.putText(img, label, (xmin, label_ymin - 7),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0),
                            2)  # Draw label text

                # print(self.listOfObjDetec)
                # objDict = dict(Counter(self.listOfObjDetec))
                # print(objDict)
                # strg = ""
                # for i in objDict:
                #     print(i)
                #     strg += "Detected " + str(objDict[i]) + " " + i + "\n"
                #     print(strg)
                # All the results have been drawn on the image, now display the image
                # print(object_name)
                cv2.imshow('Mask detector', img)

                cv2.waitKey(0)
                cv2.destroyAllWindows()
                if object_name == "Nomask":
                    object_name = "No mask"
                strg = "Person is wearing " + object_name
                # Press any key to continue to next image, or press 'q' to quit
                return strg
            else:
                return "No Face Detected"
Ejemplo n.º 14
0
class Classifier:
    """
    Perform image classification with the given model. The model is a .h5 file
    which if the classifier can not find it at the path it will download it
    from neuralet repository automatically.
    :param config: Is a Config instance which provides necessary parameters.
    """

    def __init__(self, config):
        self.config = config
        self.model_name = "OFMClassifier_edgetpu.tflite"
        if os.path.isfile(config.CLASSIFIER_MODEL_PATH):
            self.model_path = config.CLASSIFIER_MODEL_PATH
        else:
            self.model_path = 'data/classifiers/edgetpu/'
            if not os.path.isdir(self.model_path):
                os.makedirs(self.model_path)
            self.model_path = self.model_path + self.model_name

        # Frames Per Second
        self.fps = None
        if not os.path.isfile(self.model_path):
            url = "https://raw.githubusercontent.com/neuralet/neuralet-models/master/edge-tpu/OFMClassifier/OFMClassifier_edgetpu.tflite"
            print("model does not exist under: ", self.model_path, "downloading from ", url)
            wget.download(url, self.model_path)

        # Load TFLite model and allocate tensors
        self.interpreter = Interpreter(self.model_path, experimental_delegates=[load_delegate("libedgetpu.so.1")])
        self.interpreter.allocate_tensors()
        # Get the model input and output tensor details
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()

    def inference(self, resized_rgb_image) -> list:
        """
        Inference function sets input tensor to input image and gets the output.
        The interpreter instance provides corresponding class id output which is used for creating result
        Args:
            resized_rgb_image: Array of images with shape (no_images, img_height, img_width, channels)
        Returns:
            result: List of class id for each input image. ex: [0, 0, 1, 1, 0]
            scores: The classification confidence for each class. ex: [.99, .75, .80, 1.0]
        """
        if np.shape(resized_rgb_image)[0] == 0:
            return [], []
        resized_rgb_image = (resized_rgb_image * 255).astype("uint8")
        result = []
        net_results = []
        for img in resized_rgb_image:
            img = np.expand_dims(img, axis=0)
            self.interpreter.set_tensor(self.input_details[0]["index"], img)
            t_begin = time.perf_counter()
            self.interpreter.invoke()
            inference_time = time.perf_counter() - t_begin  # Second
            self.fps = convert_infr_time_to_fps(inference_time)
            net_output = self.interpreter.get_tensor(self.output_details[0]['index'])[0]
            net_results.append(net_output)
            result.append(np.argmax(net_output))  # returns class id

        # TODO: optimized without for
        scores = []
        for i, itm in enumerate(net_results):
            scores.append(1)

        return result, scores
Ejemplo n.º 15
0
class Detector(object):
    def __init__(self, label_file, model_file, threshold):
        self._threshold = float(threshold)
        self.labels = self.load_labels(label_file)
        self.interpreter = Interpreter(model_file)
        self.interpreter.allocate_tensors()
        _, self.input_height, self.input_width, _ = self.interpreter.get_input_details(
        )[0]['shape']
        self.tensor_index = self.interpreter.get_input_details()[0]['index']

    def load_labels(self, path):
        with open(path, 'r') as f:
            return {
                i: line.strip()
                for i, line in enumerate(f.read().replace('"', '').split(','))
            }

    def preprocess(self, img):
        img = cv2.resize(img, (self.input_width, self.input_height))
        img = img.astype(np.float32)
        img = img / 255.
        img = img - 0.5
        img = img * 2.
        img = img[:, :, ::-1]
        img = np.expand_dims(img, 0)
        return img

    def get_output_tensor(self, index):
        """Returns the output tensor at the given index."""
        output_details = self.interpreter.get_output_details()[index]
        tensor = np.squeeze(
            self.interpreter.get_tensor(output_details['index']))
        return tensor

    def detect_objects(self, image):
        """Returns a list of detection results, each a dictionary of object info."""
        img = self.preprocess(image)
        self.interpreter.set_tensor(self.tensor_index, img)
        self.interpreter.invoke()
        # Get all output details
        boxes = self.get_output_tensor(0)
        return boxes

    def detect(self, original_image):
        self.output_width, self.output_height = original_image.shape[0:2]
        start_time = time.time()
        results = self.detect_objects(image)
        elapsed_ms = (time.time() - start_time) * 1000
        fps = 1 / elapsed_ms * 1000
        print("Estimated frames per second : {0:.2f} Inference time: {1:.2f}".
              format(fps, elapsed_ms))

        def _to_original_scale(boxes):
            minmax_boxes = to_minmax(boxes)
            minmax_boxes[:, 0] *= self.output_width
            minmax_boxes[:, 2] *= self.output_width
            minmax_boxes[:, 1] *= self.output_height
            minmax_boxes[:, 3] *= self.output_height
            return minmax_boxes.astype(np.int)

        boxes, probs = self.run(results)
        print(boxes)
        if len(boxes) > 0:
            boxes = _to_original_scale(boxes)
            original_image = draw_boxes(original_image, boxes, probs,
                                        self.labels)
        return cv2.imencode('.jpg', original_image)[1].tobytes()

    def run(self, netout):
        anchors = [
            0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282,
            3.52778, 9.77052, 9.16828
        ]
        nms_threshold = 0.2
        """Convert Yolo network output to bounding box
        
        # Args
            netout : 4d-array, shape of (grid_h, grid_w, num of boxes per grid, 5 + n_classes)
                YOLO neural network output array
        
        # Returns
            boxes : array, shape of (N, 4)
                coordinate scale is normalized [0, 1]
            probs : array, shape of (N, nb_classes)
        """
        grid_h, grid_w, nb_box = netout.shape[:3]
        boxes = []

        # decode the output by the network
        netout[..., 4] = _sigmoid(netout[..., 4])
        netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(
            netout[..., 5:])
        netout[..., 5:] *= netout[..., 5:] > self._threshold

        for row in range(grid_h):
            for col in range(grid_w):
                for b in range(nb_box):
                    # from 4th element onwards are confidence and class classes
                    classes = netout[row, col, b, 5:]

                    if np.sum(classes) > 0:
                        # first 4 elements are x, y, w, and h
                        x, y, w, h = netout[row, col, b, :4]

                        x = (col + _sigmoid(x)
                             ) / grid_w  # center position, unit: image width
                        y = (row + _sigmoid(y)
                             ) / grid_h  # center position, unit: image height
                        w = anchors[2 * b + 0] * np.exp(
                            w) / grid_w  # unit: image width
                        h = anchors[2 * b + 1] * np.exp(
                            h) / grid_h  # unit: image height
                        confidence = netout[row, col, b, 4]
                        box = BoundBox(x, y, w, h, confidence, classes)
                        boxes.append(box)

        boxes = nms_boxes(boxes, len(classes), nms_threshold, self._threshold)
        boxes, probs = boxes_to_array(boxes)
        return boxes, probs
Ejemplo n.º 16
0
    def analyzing(self):

        CWD_PATH = os.getcwd()
        output_path = os.path.join(CWD_PATH, 'analyze')
        preProcess = PreProcess()
        index = preProcess.nameImage('analyze')

        # If both an image AND a folder are specified, throw an error
        if (self.IM_NAME and self.IM_DIR):
            print('you can only use IM_NAME OR IM_DIR')
            sys.exit()

        # If neither an image or a folder are specified, default to using 'test1.jpg' for image name
        if (not self.IM_NAME and not self.IM_DIR):
            self.IM_DIR = 'new'

        # Import TensorFlow libraries
        # If tensorflow is not installed, import interpreter from tflite_runtime, else import from regular tensorflow
        # If using Coral Edge TPU, import the load_delegate library
        pkg = importlib.util.find_spec('tensorflow')
        if pkg is None:
            from tflite_runtime.interpreter import Interpreter
        else:
            from tensorflow.lite.python.interpreter import Interpreter

        # Get path to current working directory

        # Define path to images and grab all image filenames
        if self.IM_DIR:
            PATH_TO_IMAGES = os.path.join(CWD_PATH, self.IM_DIR)
            images = glob.glob(PATH_TO_IMAGES + '/*')

        elif self.IM_NAME:
            PATH_TO_IMAGES = os.path.join(CWD_PATH, self.IM_NAME)
            images = glob.glob(PATH_TO_IMAGES)

        # Path to .tflite file, which contains the model that is used for object detection
        PATH_TO_CKPT = os.path.join(CWD_PATH, self.MODEL_NAME, self.GRAPH_NAME)

        # Path to label map file
        PATH_TO_LABELS = os.path.join(CWD_PATH, self.MODEL_NAME,
                                      self.LABELMAP_NAME)

        # Load the label map
        with open(PATH_TO_LABELS, 'r') as f:
            labels = [line.strip() for line in f.readlines()]

        # Have to do a weird fix for label map if using the COCO "starter model" from
        # https://www.tensorflow.org/lite/models/object_detection/overview
        # First label is '???', which has to be removed.
        if labels[0] == '???':
            del (labels[0])

        # Load the Tensorflow Lite model.
        interpreter = Interpreter(model_path=PATH_TO_CKPT)

        interpreter.allocate_tensors()

        # Get model details
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        height = input_details[0]['shape'][1]
        width = input_details[0]['shape'][2]
        print(width, height)

        floating_model = (input_details[0]['dtype'] == np.float32)

        input_mean = 127.5
        input_std = 127.5

        # Loop over every image and perform detection
        for image_path in images:
            leaf = flower = melon = 0
            # Load image and resize to expected shape [1xHxWx3]
            image = cv2.imread(image_path)
            image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            imH, imW, _ = image.shape
            image_resized = cv2.resize(image_rgb, (width, height),
                                       interpolation=cv2.INTER_AREA)
            input_data = np.expand_dims(image_resized, axis=0)

            # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
            if floating_model:
                input_data = (np.float32(input_data) - input_mean) / input_std

            # Perform the actual detection by running the model with the image as input
            interpreter.set_tensor(input_details[0]['index'], input_data)
            interpreter.invoke()

            # Retrieve detection results
            boxes = interpreter.get_tensor(output_details[0]['index'])[
                0]  # Bounding box coordinates of detected objects
            classes = interpreter.get_tensor(output_details[1]['index'])[
                0]  # Class index of detected objects
            scores = interpreter.get_tensor(output_details[2]['index'])[
                0]  # Confidence of detected objects
            #num = interpreter.get_tensor(output_details[3]['index'])[0]  # Total number of detected objects (inaccurate and not needed)

            # Loop over all detections and draw detection box if confidence is above minimum threshold
            for i in range(len(scores)):
                if ((scores[i] > self.min_conf_threshold)
                        and (scores[i] <= 1.0)):
                    # Get bounding box coordinates and draw box
                    # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
                    ymin = int(max(1, (boxes[i][0] * imH)))
                    xmin = int(max(1, (boxes[i][1] * imW)))
                    ymax = int(min(imH, (boxes[i][2] * imH)))
                    xmax = int(min(imW, (boxes[i][3] * imW)))

                    cv2.rectangle(image, (xmin, ymin), (xmax, ymax),
                                  (10, 255, 0), 2)

                    # Draw label
                    object_name = labels[int(
                        classes[i]
                    )]  # Look up object name from "labels" array using class index
                    label = '%s: %d%%' % (object_name, int(scores[i] * 100)
                                          )  # Example: 'person: 72%'
                    labelSize, baseLine = cv2.getTextSize(
                        label, cv2.FONT_HERSHEY_SIMPLEX, 0.7,
                        2)  # Get font size
                    label_ymin = max(
                        ymin, labelSize[1] + 10
                    )  # Make sure not to draw label too close to top of window
                    cv2.rectangle(
                        image, (xmin, label_ymin - labelSize[1] - 10),
                        (xmin + labelSize[0], label_ymin + baseLine - 10),
                        (255, 255, 255),
                        cv2.FILLED)  # Draw white box to put label text in
                    cv2.putText(image, label, (xmin, label_ymin - 7),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0),
                                2)  # Draw label text
                    if (object_name == 'leaf'):
                        leaf = leaf + 1
                    elif (object_name == 'flower'):
                        flower = flower + 1
                    else:
                        melon = melon + 1
            # All the results have been drawn on the image, now display the image
            print('image', index, ':')
            print('leaf:', leaf)
            print('flower:', flower)
            print('melon:', melon)
            uploadToFirebase = DbFirebase(leaves=leaf,
                                          flowers=flower,
                                          melons=melon)
            uploadToFirebase.add()
            cv2.imshow('Object detector', image)
            out = os.path.join(output_path, str(index) + ".jpg")
            cv2.imwrite(out, image)

            index = index + 1
            # Press any key to continue to next image, or press 'q' to quit
            cv2.waitKey(1)
        preProcess.moveImage()
        # Clean up
        cv2.destroyAllWindows()