Ejemplo n.º 1
0
class Inference:
    def __init__(self):

        self.interpreter = None
        self.input_details = None
        self.output_details = None
        self.height_for_model = None
        self.width_for_model = None
        self.floating_model = None
        self.min_conf_threshold = None
        self.labels = None

    def get_interpreter(self,
                        model,
                        enable_tpu,
                        labels,
                        min_conf_threshold=0.5):
        """ Returns interpreter from the model
            Args:
                  model -> full path of the .tflite file ,
                  enable_tpu-> whether you want to use Edge TPU or not
        Returns:
                  interpreter object
        Raises:
                Raises exception if the Runtime Library for Edge TPU
                is not found

        """
        if enable_tpu == 'true':
            try:
                # loading the Edge TPU Runtime

                model, *device = model.split('@')
                if os.path.exists(model):
                    load_delegate(EDGETPU_SHARED_LIB)
                    self.interpreter = Interpreter(
                        model_path=model,
                        experimental_delegates=[
                            load_delegate(
                                EDGETPU_SHARED_LIB,
                                {'device': device[0]} if device else {})
                        ])
                else:
                    _LOGGER.exception(
                        "Please make sure  the model file exists ")

            except OSError:
                _LOGGER.exception("Please install runtime for edge tpu ")

            except ValueError:
                _LOGGER.exception("Make sure edge tpu is plugged in ")

        else:

            self.interpreter = Interpreter(model_path=model)

        self.interpreter.allocate_tensors()

        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()
        self.height_for_model = self.input_details[0]['shape'][1]
        self.width_for_model = self.input_details[0]['shape'][2]
        self.floating_model = (self.input_details[0]['dtype'] == np.float32)
        self.min_conf_threshold = min_conf_threshold
        self.labels = labels
        return self.interpreter

    def perform_inference(self, input_data):
        """ Returns bounding box , class , score
            Args:
                   input_data ->  the input data to be fed into the model
            Returns:
                     boxes -> an array of bounding box of the objects detected
                     classes-> an array of the class of objects detected
                     scores-> an array of scores(0 to 1) of the objects detected
            Raises: None
        """

        self.interpreter.set_tensor(self.input_details[0]['index'], input_data)
        self.interpreter.invoke()
        # Retrieve detection results

        # Bounding box coordinates of detected objects
        boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[0]

        # Class index of detected objects
        classes = self.interpreter.get_tensor(
            self.output_details[1]['index'])[0]

        # Confidence of detected objects
        scores = self.interpreter.get_tensor(
            self.output_details[2]['index'])[0]

        return boxes, classes, scores
Ejemplo n.º 2
0
    def __init__(self,
                 weights='yolov5s.pt',
                 device=None,
                 dnn=False,
                 data=None):
        # Usage:
        #   PyTorch:      weights = *.pt
        #   TorchScript:            *.torchscript
        #   CoreML:                 *.mlmodel
        #   OpenVINO:               *.xml
        #   TensorFlow:             *_saved_model
        #   TensorFlow:             *.pb
        #   TensorFlow Lite:        *.tflite
        #   TensorFlow Edge TPU:    *_edgetpu.tflite
        #   ONNX Runtime:           *.onnx
        #   OpenCV DNN:             *.onnx with dnn=True
        #   TensorRT:               *.engine
        from models.experimental import attempt_download, attempt_load  # scoped to avoid circular import

        super().__init__()
        w = str(weights[0] if isinstance(weights, list) else weights)
        suffix = Path(w).suffix.lower()
        suffixes = [
            '.pt', '.torchscript', '.onnx', '.engine', '.tflite', '.pb', '',
            '.mlmodel', '.xml'
        ]
        check_suffix(w, suffixes)  # check weights have acceptable suffix
        pt, jit, onnx, engine, tflite, pb, saved_model, coreml, xml = (
            suffix == x for x in suffixes)  # backends
        stride, names = 64, [f'class{i}'
                             for i in range(1000)]  # assign defaults
        w = attempt_download(w)  # download if not local
        if data:  # data.yaml path (optional)
            with open(data, errors='ignore') as f:
                names = yaml.safe_load(f)['names']  # class names

        if pt:  # PyTorch
            model = attempt_load(weights if isinstance(weights, list) else w,
                                 map_location=device)
            stride = max(int(model.stride.max()), 32)  # model stride
            names = model.module.names if hasattr(
                model, 'module') else model.names  # get class names
            self.model = model  # explicitly assign for to(), cpu(), cuda(), half()
        elif jit:  # TorchScript
            LOGGER.info(f'Loading {w} for TorchScript inference...')
            extra_files = {'config.txt': ''}  # model metadata
            model = torch.jit.load(w, _extra_files=extra_files)
            if extra_files['config.txt']:
                d = json.loads(extra_files['config.txt'])  # extra_files dict
                stride, names = int(d['stride']), d['names']
        elif dnn:  # ONNX OpenCV DNN
            LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
            check_requirements(('opencv-python>=4.5.4', ))
            net = cv2.dnn.readNetFromONNX(w)
        elif onnx:  # ONNX Runtime
            LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
            cuda = torch.cuda.is_available()
            check_requirements(
                ('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
            import onnxruntime
            providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'
                         ] if cuda else ['CPUExecutionProvider']
            session = onnxruntime.InferenceSession(w, providers=providers)
        elif xml:  # OpenVINO
            LOGGER.info(f'Loading {w} for OpenVINO inference...')
            check_requirements(
                ('openvino-dev', )
            )  # requires openvino-dev: https://pypi.org/project/openvino-dev/
            import openvino.inference_engine as ie
            core = ie.IECore()
            network = core.read_network(
                model=w,
                weights=Path(w).with_suffix('.bin'))  # *.xml, *.bin paths
            executable_network = core.load_network(network,
                                                   device_name='CPU',
                                                   num_requests=1)
        elif engine:  # TensorRT
            LOGGER.info(f'Loading {w} for TensorRT inference...')
            import tensorrt as trt  # https://developer.nvidia.com/nvidia-tensorrt-download
            check_version(trt.__version__, '7.0.0',
                          hard=True)  # require tensorrt>=7.0.0
            Binding = namedtuple('Binding',
                                 ('name', 'dtype', 'shape', 'data', 'ptr'))
            logger = trt.Logger(trt.Logger.INFO)
            with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
                model = runtime.deserialize_cuda_engine(f.read())
            bindings = OrderedDict()
            for index in range(model.num_bindings):
                name = model.get_binding_name(index)
                dtype = trt.nptype(model.get_binding_dtype(index))
                shape = tuple(model.get_binding_shape(index))
                data = torch.from_numpy(np.empty(
                    shape, dtype=np.dtype(dtype))).to(device)
                bindings[name] = Binding(name, dtype, shape, data,
                                         int(data.data_ptr()))
            binding_addrs = OrderedDict(
                (n, d.ptr) for n, d in bindings.items())
            context = model.create_execution_context()
            batch_size = bindings['images'].shape[0]
        elif coreml:  # CoreML
            LOGGER.info(f'Loading {w} for CoreML inference...')
            import coremltools as ct
            model = ct.models.MLModel(w)
        else:  # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
            if saved_model:  # SavedModel
                LOGGER.info(
                    f'Loading {w} for TensorFlow SavedModel inference...')
                import tensorflow as tf
                model = tf.keras.models.load_model(w)
            elif pb:  # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
                LOGGER.info(
                    f'Loading {w} for TensorFlow GraphDef inference...')
                import tensorflow as tf

                def wrap_frozen_graph(gd, inputs, outputs):
                    x = tf.compat.v1.wrap_function(
                        lambda: tf.compat.v1.import_graph_def(gd, name=""),
                        [])  # wrapped
                    return x.prune(
                        tf.nest.map_structure(x.graph.as_graph_element,
                                              inputs),
                        tf.nest.map_structure(x.graph.as_graph_element,
                                              outputs))

                graph_def = tf.Graph().as_graph_def()
                graph_def.ParseFromString(open(w, 'rb').read())
                frozen_func = wrap_frozen_graph(gd=graph_def,
                                                inputs="x:0",
                                                outputs="Identity:0")
            elif tflite:  # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
                try:  # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu
                    from tflite_runtime.interpreter import Interpreter, load_delegate
                except ImportError:
                    import tensorflow as tf
                    Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate,
                if 'edgetpu' in w.lower(
                ):  # Edge TPU https://coral.ai/software/#edgetpu-runtime
                    LOGGER.info(
                        f'Loading {w} for TensorFlow Lite Edge TPU inference...'
                    )
                    delegate = {
                        'Linux': 'libedgetpu.so.1',
                        'Darwin': 'libedgetpu.1.dylib',
                        'Windows': 'edgetpu.dll'
                    }[platform.system()]
                    interpreter = Interpreter(
                        model_path=w,
                        experimental_delegates=[load_delegate(delegate)])
                else:  # Lite
                    LOGGER.info(
                        f'Loading {w} for TensorFlow Lite inference...')
                    interpreter = Interpreter(
                        model_path=w)  # load TFLite model
                interpreter.allocate_tensors()  # allocate
                input_details = interpreter.get_input_details()  # inputs
                output_details = interpreter.get_output_details()  # outputs
        self.__dict__.update(locals())  # assign all variables to self
Ejemplo n.º 3
0
# Load the Tensorflow Lite model.
# If using Edge TPU, use special load_delegate argument
if use_TPU:
    interpreter = Interpreter(
        model_path=PATH_TO_CKPT,
        experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
    print(PATH_TO_CKPT)
else:
    interpreter = Interpreter(model_path=PATH_TO_CKPT)

interpreter.allocate_tensors()

# Get model details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
height = input_details[0]['shape'][1]
width = input_details[0]['shape'][2]

floating_model = (input_details[0]['dtype'] == np.float32)

input_mean = 127.5
input_std = 127.5

# Loop over every image and perform detection
for image_path in images:
    # Load image and resize to expected shape [1xHxWx3]
    image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    imH, imW, _ = image.shape
    image_resized = cv2.resize(image_rgb, (width, height))
Ejemplo n.º 4
0
class PoseEngine():
    """Engine used for pose tasks."""

    def __init__(self, model_path, mirror=False):
        """Creates a PoseEngine with given model.
        Args:
          model_path: String, path to TF-Lite Flatbuffer file.
          mirror: Flip keypoints horizontally.
        Raises:
          ValueError: An error occurred when model output is invalid.
        """
        edgetpu_delegate = load_delegate(EDGETPU_SHARED_LIB)
        posenet_decoder_delegate = load_delegate(POSENET_SHARED_LIB)
        self._interpreter = Interpreter(
            model_path, experimental_delegates=[edgetpu_delegate, posenet_decoder_delegate])
        self._interpreter.allocate_tensors()

        self._mirror = mirror

        self._input_tensor_shape = self.get_input_tensor_shape()
        if (self._input_tensor_shape.size != 4 or
                self._input_tensor_shape[3] != 3 or
                self._input_tensor_shape[0] != 1):
            raise ValueError(
                ('Image model should have input shape [1, height, width, 3]!'
                 ' This model has {}.'.format(self._input_tensor_shape)))
        _, self._input_height, self._input_width, self._input_depth = self.get_input_tensor_shape()
        self._input_type = self._interpreter.get_input_details()[0]['dtype']
        self._inf_time = 0

    def run_inference(self, input_data):
        """Run inference using the zero copy feature from pycoral and returns inference time in ms.
        """
        start = time.monotonic()
        edgetpu.run_inference(self._interpreter, input_data)
        self._inf_time = time.monotonic() - start
        return (self._inf_time * 1000)

    def DetectPosesInImage(self, img):
        """Detects poses in a given image.
           For ideal results make sure the image fed to this function is close to the
           expected input size - it is the caller's responsibility to resize the
           image accordingly.
        Args:
          img: numpy array containing image
        """
        input_details = self._interpreter.get_input_details()
        image_width, image_height = img.size
        resized_image = img.resize(
            (self._input_width, self._input_height), Image.NEAREST)
        input_data = np.expand_dims(resized_image, axis=0)
        if self._input_type is np.float32:
            # Floating point versions of posenet take image data in [-1,1] range.
            input_data = np.float32(resized_image) / 128.0 - 1.0
        else:
            # Assuming to be uint8
            input_data = np.asarray(resized_image)
        self.run_inference(input_data.flatten())
        return self.ParseOutput()

    def get_input_tensor_shape(self):
        """Returns input tensor shape."""
        return self._interpreter.get_input_details()[0]['shape']

    def get_output_tensor(self, idx):
        """Returns output tensor view."""
        return np.squeeze(self._interpreter.tensor(
            self._interpreter.get_output_details()[idx]['index'])())

    def ParseOutput(self):
        """Parses interpreter output tensors and returns decoded poses."""
        keypoints = self.get_output_tensor(0)
        keypoint_scores = self.get_output_tensor(1)
        pose_scores = self.get_output_tensor(2)
        num_poses = self.get_output_tensor(3)
        poses = []
        for i in range(int(num_poses)):
            pose_score = pose_scores[i]
            pose_keypoints = {}
            for j, point in enumerate(keypoints[i]):
                y, x = point
                if self._mirror:
                    y = self._input_width - y
                pose_keypoints[KeypointType(j)] = Keypoint(
                    Point(x, y), keypoint_scores[i, j])
            poses.append(Pose(pose_keypoints, pose_score))
        return poses, self._inf_time
Ejemplo n.º 5
0
def process_frame(frame):

    global entry, lime_count, marker_count, lime_sizes, found_list, total_marker_width, pixel_per_metric
    interpreter = Interpreter(model_path=PATH_TO_CKPT, num_threads=4)

    interpreter.allocate_tensors()

    # Get model details
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    height = input_details[0]['shape'][1]
    width = input_details[0]['shape'][2]

    floating_model = (input_details[0]['dtype'] == np.float32)

    input_mean = 127.5
    input_std = 127.5

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    #frame_rgb = frame
    frame_resized = cv2.resize(frame_rgb, (width, height))
    #frame_resized = cv2.resize(frame_rgb, (480, 320))
    input_data = np.expand_dims(frame_resized, axis=0)
    # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
    if floating_model:
        input_data = (np.float32(input_data) - input_mean) / input_std

    # Perform the actual detection by running the model with the image as input
    try:
        start_time = time.time()
        interpreter.set_tensor(input_details[0]['index'], input_data)
        interpreter.invoke()
        elapsed_time.append(time.time() - start_time)
    except:
        print('Thread Error: interpreter not reference')
    # Retrieve detection results
    boxes = interpreter.get_tensor(output_details[0]['index'])[
        0]  # Bounding box coordinates of detected objects
    classes = interpreter.get_tensor(
        output_details[1]['index'])[0]  # Class index of detected objects
    scores = interpreter.get_tensor(
        output_details[2]['index'])[0]  # Confidence of detected objects
    #num = interpreter.get_tensor(output_details[3]['index'])[0]  # Total number of detected objects (inaccurate and not needed)

    # Loop over all detections and draw detection box if confidence is above minimum threshold
    for i in range(len(scores)):
        if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):

            # Get bounding box coordinates and draw box
            # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
            ymin = int(max(1, (boxes[i][0] * imH)))
            xmin = int(max(1, (boxes[i][1] * imW)))
            ymax = int(min(imH, (boxes[i][2] * imH)))
            xmax = int(min(imW, (boxes[i][3] * imW)))

            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0), 4)

            # Draw label
            object_name = labels[int(
                classes[i]
            )]  # Look up object name from "labels" array using class index
            label = '%s: %d%%' % (object_name, int(scores[i] * 100)
                                  )  # Example: 'person: 72%'
            labelSize, baseLine = cv2.getTextSize(label,
                                                  cv2.FONT_HERSHEY_SIMPLEX,
                                                  0.7, 2)  # Get font size
            label_ymin = max(
                ymin, labelSize[1] +
                10)  # Make sure not to draw label too close to top of window
            cv2.rectangle(frame, (xmin, label_ymin - labelSize[1] - 10),
                          (xmin + labelSize[0], label_ymin + baseLine - 10),
                          (255, 255, 255),
                          cv2.FILLED)  # Draw white box to put label text in
            cv2.putText(frame, label, (xmin, label_ymin - 7),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0),
                        2)  # Draw label text

            # counting objects and measure diameter of lime
            if xmin < LINE2 and xmax > LINE1 and not entry:
                entry = True

            if entry and xmax <= LINE1:
                entry = False

                if (int(classes[i]) + 1 == 1):
                    lime_found = time.time() - start_total_time

                    try:
                        lime_count += 1
                        lime_diameter = (
                            (xmax - xmin) +
                            (ymax - ymin)) / (2 * pixel_per_metric)
                        lime_sizes.append(lime_diameter)
                        found_list.append(lime_found)
                        print(
                            f'lime {lime_count} is found at {lime_found}, Diameter(size): {lime_diameter * 1000:.3f} mm'
                        )
                    except:
                        # marker must came first for calculating pixel/metric
                        lime_count -= 1
                        marker_count += 1
                        total_marker_width += ((xmax - xmin) +
                                               (ymax - ymin)) / 2
                        pixel_per_metric = (total_marker_width /
                                            marker_count) / MARKER_DIAMETER

                elif (int(classes[i]) + 1 == 2):
                    marker_count += 1
                    total_marker_width += ((xmax - xmin) + (ymax - ymin)) / 2

                    pixel_per_metric = (total_marker_width /
                                        marker_count) / MARKER_DIAMETER
    # insert Lime Count information text
    font = cv2.FONT_HERSHEY_SIMPLEX
    cv2.putText(
        frame,
        'Lime Count: ' + str(lime_count),
        (10, 35),
        font,
        0.8,
        (0, 0xFF, 0xFF),
        2,
        cv2.FONT_HERSHEY_SIMPLEX,
    )

    # insert Marker Count information text
    cv2.putText(
        frame,
        'Marker Count: ' + str(marker_count),
        (10, 55),
        font,
        0.8,
        (0, 0xFF, 0xFF),
        2,
        cv2.FONT_HERSHEY_SIMPLEX,
    )

    # overlay with line
    pt1 = (LINE1, 0)
    pt2 = (LINE1, int(sqsize))
    cv2.line(frame, pt1, pt2, (0, 0, 255), 2)

    pt1 = (LINE2, 0)
    pt2 = (LINE2, int(sqsize))
    cv2.line(frame, pt1, pt2, (0, 0, 255), 2)

    frame = cv2.resize(frame, (480, 320))

    return frame
Ejemplo n.º 6
0
class DetectorTFLite:
    def __init__(self,
                 path_to_checkpoint,
                 path_to_labelmap,
                 filter_labels=None):
        self.filter_labels = filter_labels

        with open(path_to_labelmap, 'r') as f:
            self.labels = [line.strip() for line in f.readlines()]

        # Have to do a weird fix for label map if using the COCO "starter model" from
        # https://www.tensorflow.org/lite/models/object_detection/overview
        # First label is '???', which has to be removed.
        if self.labels[0] == '???':
            del (self.labels[0])

        self.interpreter = Interpreter(model_path=path_to_checkpoint)
        self.interpreter.allocate_tensors()

        # Get model details
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()
        self.tf_height = self.input_details[0]['shape'][1]
        self.tf_width = self.input_details[0]['shape'][2]

        self.floating_model = (self.input_details[0]['dtype'] == np.float32)

        self.input_mean = 127.5
        self.input_std = 127.5

    def ExtractBoxes(self, imH, imW, boxes, classes, scores):
        det_boxes = []
        for i in range(len(scores)):
            # Get bounding box coordinates and draw box
            # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
            miny = int(max(1, (boxes[i][0] * imH)))
            minx = int(max(1, (boxes[i][1] * imW)))
            maxy = int(min(imH, (boxes[i][2] * imH)))
            maxx = int(min(imW, (boxes[i][3] * imW)))
            label = self.labels[int(classes[i])]
            det_boxes.append((minx, miny, maxx, maxy, label, float(scores[i])))
        return det_boxes

    def DetectFromImage(self, img):
        imH, imW, _ = img.shape
        # Acquire frame and resize to expected shape [1xHxWx3]
        frame_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        frame_resized = cv2.resize(frame_rgb, (self.tf_width, self.tf_height))
        input_data = np.expand_dims(frame_resized, axis=0)

        # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
        if self.floating_model:
            input_data = (np.float32(input_data) -
                          self.input_mean) / self.input_std

        # Perform the actual detection by running the model with the image as input
        self.interpreter.set_tensor(self.input_details[0]['index'], input_data)
        self.interpreter.invoke()

        # Retrieve detection results
        boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[
            0]  # Bounding box coordinates of detected objects
        classes = self.interpreter.get_tensor(self.output_details[1]['index'])[
            0]  # Class index of detected objects
        scores = self.interpreter.get_tensor(self.output_details[2]['index'])[
            0]  # Confidence of detected objects

        return self.ExtractBoxes(imH, imW, boxes, classes, scores)

    def DisplayDetection(self, image, box, det_time=None):
        img = image.copy()

        x_min = box[0]
        y_min = box[1]
        x_max = box[2]
        y_max = box[3]
        cls = str(box[4])
        score = str(np.round(box[-1], 2))

        text = cls + ": " + score
        cv2.rectangle(img, (x_min, y_min), (x_max, y_max), (0, 255, 0), 1)
        cv2.rectangle(img, (x_min, y_min - 20), (x_min, y_min),
                      (255, 255, 255), -1)
        cv2.putText(img, text, (x_min + 5, y_min - 7),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

        if det_time != None:
            fps = round(1000. / det_time, 1)
            fps_txt = str(fps) + " FPS"
            cv2.putText(img, fps_txt, (25, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8,
                        (0, 0, 0), 2)

        return img
Ejemplo n.º 7
0
    def startStream(self, modeldir, graph, labels, threshold, resolution,
                    edgetpu):
        MODEL_NAME = modeldir
        GRAPH_NAME = graph
        LABELMAP_NAME = labels
        min_conf_threshold = float(threshold)
        resW, resH = resolution.split('x')
        imW, imH = int(resW), int(resH)
        use_TPU = edgetpu

        # Import TensorFlow libraries
        # If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow
        # If using Coral Edge TPU, import the load_delegate library
        pkg = importlib.util.find_spec('tflite_runtime')
        if pkg:
            from tflite_runtime.interpreter import Interpreter
            if use_TPU:
                from tflite_runtime.interpreter import load_delegate
        else:
            from tensorflow.lite.python.interpreter import Interpreter
            if use_TPU:
                from tensorflow.lite.python.interpreter import load_delegate

        # If using Edge TPU, assign filename for Edge TPU model
        if use_TPU:
            # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite'
            if (GRAPH_NAME == 'detect.tflite'):
                GRAPH_NAME = 'edgetpu.tflite'

        # Get path to current working directory
        CWD_PATH = os.getcwd()

        # Path to .tflite file, which contains the model that is used for object detection
        PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME)

        # Path to label map file
        PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME)

        # Load the label map
        with open(PATH_TO_LABELS, 'r') as f:
            labels = [line.strip() for line in f.readlines()]

        # Have to do a weird fix for label map if using the COCO "starter model" from
        # https://www.tensorflow.org/lite/models/object_detection/overview
        # First label is '???', which has to be removed.
        if labels[0] == '???':
            del (labels[0])

        # Load the Tensorflow Lite model.
        # If using Edge TPU, use special load_delegate argument
        if use_TPU:
            interpreter = Interpreter(
                model_path=PATH_TO_CKPT,
                experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
            print(PATH_TO_CKPT)
        else:
            interpreter = Interpreter(model_path=PATH_TO_CKPT)

        interpreter.allocate_tensors()

        # Get model details
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        height = input_details[0]['shape'][1]
        width = input_details[0]['shape'][2]

        floating_model = (input_details[0]['dtype'] == np.float32)

        input_mean = 127.5
        input_std = 127.5

        # Initialize frame rate calculation
        frame_rate_calc = 1
        freq = cv2.getTickFrequency()

        # Initialize video stream
        videostream = VideoStream(resolution=(imW, imH), framerate=30).start()
        time.sleep(1)

        # Create window
        cv2.namedWindow('Object detector', cv2.WINDOW_NORMAL)

        #for frame1 in camera.capture_continuous(rawCapture, format="bgr",use_video_port=True):
        while True:

            # Start timer (for calculating frame rate)
            t1 = cv2.getTickCount()

            # Grab frame from video stream
            frame1 = videostream.read()

            # Acquire frame and resize to expected shape [1xHxWx3]
            frame = frame1.copy()
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_resized = cv2.resize(frame_rgb, (width, height))
            input_data = np.expand_dims(frame_resized, axis=0)

            # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
            if floating_model:
                input_data = (np.float32(input_data) - input_mean) / input_std

            # Perform the actual detection by running the model with the image as input
            interpreter.set_tensor(input_details[0]['index'], input_data)
            interpreter.invoke()

            # Retrieve detection results
            boxes = interpreter.get_tensor(output_details[0]['index'])[
                0]  # Bounding box coordinates of detected objects
            classes = interpreter.get_tensor(output_details[1]['index'])[
                0]  # Class index of detected objects
            scores = interpreter.get_tensor(output_details[2]['index'])[
                0]  # Confidence of detected objects
            #num = interpreter.get_tensor(output_details[3]['index'])[0]  # Total number of detected objects (inaccurate and not needed)

            # Loop over all detections and draw detection box if confidence is above minimum threshold
            for i in range(len(scores)):
                if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)
                        and (labels[int(classes[i])] == 'person')):

                    # Get bounding box coordinates and draw box
                    # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
                    ymin = int(max(1, (boxes[i][0] * imH)))
                    xmin = int(max(1, (boxes[i][1] * imW)))
                    ymax = int(min(imH, (boxes[i][2] * imH)))
                    xmax = int(min(imW, (boxes[i][3] * imW)))

                    # print(self.detect)

                    cv2.rectangle(frame, (xmin, ymin), (xmax, ymax),
                                  (10, 255, 0), 2)

                    # Draw label
                    object_name = labels[int(
                        classes[i]
                    )]  # Look up object name from "labels" array using class index
                    label = '%s: %d%%' % (object_name, int(scores[i] * 100)
                                          )  # Example: 'person: 72%'
                    labelSize, baseLine = cv2.getTextSize(
                        label, cv2.FONT_HERSHEY_SIMPLEX, 0.7,
                        2)  # Get font size
                    label_ymin = max(
                        ymin, labelSize[1] + 10
                    )  # Make sure not to draw label too close to top of window
                    cv2.rectangle(
                        frame, (xmin, label_ymin - labelSize[1] - 10),
                        (xmin + labelSize[0], label_ymin + baseLine - 10),
                        (255, 255, 255),
                        cv2.FILLED)  # Draw white box to put label text in
                    cv2.putText(frame, label, (xmin, label_ymin - 7),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0),
                                2)  # Draw label text

                    # Draw circle in center
                    xcenter = xmin + (int(round((xmax - xmin) / 2)))
                    ycenter = ymin + (int(round((ymax - ymin) / 2)))
                    self.detect = setDetect(xcenter, ycenter, imH, imW)

                    cv2.circle(frame, (xcenter, ycenter),
                               5, (0, 0, 255),
                               thickness=-1)

                    # Print info
                    # print('Object ' + str(i) + ': ' + object_name + ' at (' + str(xcenter) + ', ' + str(ycenter) + ')')

            # Draw framerate in corner of frame
            cv2.putText(frame, 'FPS: {0:.2f}'.format(frame_rate_calc),
                        (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0),
                        2, cv2.LINE_AA)
            # All the results have been drawn on the frame, so it's time to display it.
            cv2.imshow('Object detector', frame)

            # Calculate framerate
            t2 = cv2.getTickCount()
            time1 = (t2 - t1) / freq
            frame_rate_calc = 1 / time1

            # Press 'q' to quit
            if cv2.waitKey(1) == ord('q'):
                break

        # Clean up
        cv2.destroyAllWindows()
        videostream.stop()
Ejemplo n.º 8
0
class object_detector:

  def __init__(self):
    PATH_TO_CKPT = rospy.get_param("/object_detector/weights_path")
    PATH_TO_LABELS=rospy.get_param("/object_detector/labels_path")
    camera_input=rospy.get_param("/object_detector/cam_feed")
    use_tpu=int(rospy.get_param("/object_detector/tpu"))
    self.min_conf_threshold = float(rospy.get_param("/object_detector/threshold"))
    self.imW = int(rospy.get_param("/object_detector/imW"))
    self.imH = int(rospy.get_param("/object_detector/imH"))
    pkg = importlib.util.find_spec('tflite_runtime')
    if pkg:
        from tflite_runtime.interpreter import Interpreter
        if use_tpu:
          from tflite_runtime.interpreter import load_delegate
    else:
        from tensorflow.lite.python.interpreter import Interpreter
        if use_tpu:
          from tensorflow.lite.python.interpreter import load_delegate
    if use_tpu:
    # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite'
      if (GRAPH_NAME == 'detect.tflite'):
        GRAPH_NAME = 'edgetpu.tflite'  
    with open(PATH_TO_LABELS, 'r') as f:
        self.labels = [line.strip() for line in f.readlines()]
    if self.labels[0] == '???':
        del(self.labels[0])

    # Load the Tensorflow Lite model.
    # If using Edge TPU, use special load_delegate argument
    if use_tpu:
        self.interpreter = Interpreter(model_path=PATH_TO_CKPT,
                              experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
    else:
        self.interpreter = Interpreter(model_path=PATH_TO_CKPT)
    self.interpreter.allocate_tensors()

    # Get model details
    self.input_details = self.interpreter.get_input_details()
    self.output_details = self.interpreter.get_output_details()
    self.height = self.input_details[0]['shape'][1]
    self.width = self.input_details[0]['shape'][2]

    self.floating_model = (self.input_details[0]['dtype'] == np.float32)

    self.input_mean = 127.5
    self.input_std = 127.5
    # Initialize frame rate calculation
    self.frame_rate_calc = 1
    self.freq = cv2.getTickFrequency()

    self.image_pub = rospy.Publisher("/detected_image",Image,queue_size=10)
    self.bridge = CvBridge()
    self.image_sub = rospy.Subscriber(camera_input,Image,self.callback)
  def callback(self,data):
    t1 = cv2.getTickCount()
    try:
      cv_image = self.bridge.imgmsg_to_cv2(data, "bgr8")
    except CvBridgeError as e:
      print(e)

    frame = cv_image.copy()
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame_resized = cv2.resize(frame_rgb, (self.width, self.height))
    input_data = np.expand_dims(frame_resized, axis=0)

    # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
    if self.floating_model:
        input_data = (np.float32(input_data) - self.input_mean) / self.input_std

    # Perform the actual detection by running the model with the image as input
    self.interpreter.set_tensor(self.input_details[0]['index'],input_data)
    self.interpreter.invoke()
    # Retrieve detection results
    boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[0] # Bounding box coordinates of detected objects
    classes = self.interpreter.get_tensor(self.output_details[1]['index'])[0] # Class index of detected objects
    scores = self.interpreter.get_tensor(self.output_details[2]['index'])[0] # Confidence of detected objects
    #num = interpreter.get_tensor(output_details[3]['index'])[0]  # Total number of detected objects (inaccurate and not needed)

    # Loop over all detections and draw detection box if confidence is above minimum threshold
    for i in range(len(scores)):
        if ((scores[i] > self.min_conf_threshold) and (scores[i] <= 1.0)):

            # Get bounding box coordinates and draw box
            # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
            ymin = int(max(1,(boxes[i][0] * self.imH)))
            xmin = int(max(1,(boxes[i][1] * self.imW)))
            ymax = int(min(self.imH,(boxes[i][2] * self.imH)))
            xmax = int(min(self.imW,(boxes[i][3] * self.imW)))
            
            cv2.rectangle(frame, (xmin,ymin), (xmax,ymax), (10, 255, 0), 2)

            # Draw label
            object_name = self.labels[int(classes[i])] # Look up object name from "labels" array using class index
            label = '%s: %d%%' % (object_name, int(scores[i]*100)) # Example: 'person: 72%'
            labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size
            label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window
            cv2.rectangle(frame, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in
            cv2.putText(frame, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text

    # All the results have been drawn on the frame, so it's time to display it.

    # Draw framerate in corner of frame
    # cv2.putText(frame,'FPS: {0:.2f}'.format(frame_rate_calc),(30,50),cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,0),2,cv2.LINE_AA)
    t2 = cv2.getTickCount()
    time1 = (t2-t1)/self.freq
    frame_rate_calc= 1/time1

    cv2.putText(frame,'FPS: {0:.2f}'.format(frame_rate_calc),(30,50),cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,0),2,cv2.LINE_AA)

    try:
      self.image_pub.publish(self.bridge.cv2_to_imgmsg(frame, "bgr8"))
    except CvBridgeError as e:
      print(e)
Ejemplo n.º 9
0
class TeachableMachine:
    """
Functions:
:meth:`~openpibo.vision.TeachableMachine.load`
:meth:`~openpibo.vision.TeachableMachine.predict`

  파이보의 카메라 Teachable Machine 기능을 사용합니다.

  * ``이미지 프로젝트``의 ``표준 이미지 모델``을 사용합니다.
  * ``Teachable Machine``에서 학습한 모델을 적용하여 추론할 수 있습니다.

  example::

    from openpibo.vision import TeachableMachine

    tm = TeachableMachine()
    # 아래의 모든 예제 이전에 위 코드를 먼저 사용합니다.
  """
    def __init__(self):
        pass

    def load(self, model_path, label_path):
        """
    Teachable Machine 모델을 초기화 합니다.

    example::

      tm.load('model_keras.h5', 'labels.txt')

    :param str model_path: Teachable Machine에서 학습한 모델 파일

    :param str label_path: Teachable Machine에서 학습한 라벨 파일
    """
        with open(label_path, 'r') as f:
            c = f.readlines()
            class_names = [item.split(maxsplit=1)[1].strip('\n') for item in c]

        # Load TFLite model and allocate tensors
        self.interpreter = Interpreter(model_path=model_path)
        self.interpreter.allocate_tensors()

        # Get input and output tensors.
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()

        # check the type of the input tensor
        self.floating_model = self.input_details[0]['dtype'] == np.float32

        self.height = self.input_details[0]['shape'][1]
        self.width = self.input_details[0]['shape'][2]

        self.class_names = class_names

    def predict(self, img):
        """
    적용한 Teachable Machine 모델을 기반으로 추론합니

    example::

      cm = Camera()
      img = cm.read()
      tm.predict(img)

    :param numpy.ndarray img: 이미지 객체

    :returns: 추론 결과, 가장 높은 확률을 가진 클래스 명
    """

        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (self.width, self.height))
        image = Image.fromarray(img)

        # Add a batch dimension
        input_data = np.expand_dims(image, axis=0)

        if self.floating_model:
            input_data = (np.float32(input_data) - 127.5) / 127.5

        # feed data to input tensor and run the interpreter
        self.interpreter.set_tensor(self.input_details[0]['index'], input_data)
        self.interpreter.invoke()

        # Obtain results and map them to the classes
        preds = self.interpreter.get_tensor(self.output_details[0]['index'])
        preds = np.squeeze(preds)
        return self.class_names[np.argmax(preds)], preds
class PoseEngine:
    """Engine used for pose tasks."""
    def __init__(self,
                 model_path,
                 mirror=False,
                 offsetRefineStep=2,
                 scoreThreshold=0.8,
                 maxPoseDetections=5,
                 nmsRadius=30,
                 minPoseConfidence=0.15):
        """Creates a PoseEngine with given model.

        Args:
          model_path: String, path to TF-Lite Flatbuffer file.
          mirror: Flip keypoints horizontally

        Raises:
          ValueError: An error occurred when model output is invalid.
        """
        self.interpreter = Interpreter(model_path)
        self.interpreter.allocate_tensors()

        self._mirror = mirror

        self._input_tensor_shape = self.get_input_tensor_shape()
        if (self._input_tensor_shape.size != 4
                or self._input_tensor_shape[3] != 3
                or self._input_tensor_shape[0] != 1):
            raise ValueError(
                ('Image model should have input shape [1, height, width, 3]!'
                 ' This model has {}.'.format(self._input_tensor_shape)))
        _, self.image_height, self.image_width, self.image_depth = self.get_input_tensor_shape(
        )

        self.heatmaps_nx = self.interpreter.get_output_details()[0]['shape'][2]
        self.heatmaps_ny = self.interpreter.get_output_details()[0]['shape'][1]
        self.heatmaps_stride_x = self.getStride(self.image_width,
                                                self.heatmaps_nx)
        self.heatmaps_stride_y = self.getStride(self.image_height,
                                                self.heatmaps_ny)
        self.quant_heatmaps_r, self.quant_heatmaps_off = self.interpreter.get_output_details(
        )[0]['quantization']
        self.quant_offsets_short_r, self.quant_offsets_short_off = self.interpreter.get_output_details(
        )[1]['quantization']
        self.quant_offsets_mid_r, self.quant_offsets_mid_off = self.interpreter.get_output_details(
        )[2]['quantization']

        self.offsetRefineStep = offsetRefineStep
        self.scoreThreshold = scoreThreshold
        self.maxPoseDetections = maxPoseDetections
        self.nmsRadius = nmsRadius
        self.sqRadius = self.nmsRadius * self.nmsRadius
        self.minPoseConfidence = minPoseConfidence

        # The API returns all the output tensors flattened and concatenated. We
        # have to figure out the boundaries from the tensor shapes & sizes.
        offset = 0
        self._output_offsets = [0]
        for size in self.get_all_output_tensors_sizes():
            offset += size
            self._output_offsets.append(offset)

    def getStride(self, l, n):
        strides = (8, 16, 32)
        return strides[np.argmin(np.abs(strides - l / n))]

    def get_input_tensor_shape(self):
        return self.interpreter.get_input_details()[0]['shape']

    def get_all_output_tensors_sizes(self):
        sizes = np.array([], dtype='int32')
        for d in self.interpreter.get_output_details():
            s = np.squeeze(self.interpreter.get_tensor(
                d['index'])).flatten().size
            sizes = np.append(sizes, int(s))
        return sizes

    def DetectPosesInImage(self, img):
        """Detects poses in a given image.

           For ideal results make sure the image fed to this function is close to the
           expected input size - it is the caller's responsibility to resize the
           image accordingly.

        Args:
          img: numpy array containing image
        """

        # Extend or crop the input to match the input shape of the network.
        if img.shape[0] < self.image_height or img.shape[1] < self.image_width:
            img = np.pad(
                img, [[0, max(0, self.image_height - img.shape[0])],
                      [0, max(0, self.image_width - img.shape[1])], [0, 0]],
                mode='constant')
        img = img[0:self.image_height, 0:self.image_width]
        assert (img.shape == tuple(self._input_tensor_shape[1:]))

        # Run the inference (API expects the data to be flattened)
        return self.ParseOutput(self.run_inference(img))

    def run_inference(self, img):
        tensor_index = self.interpreter.get_input_details()[0]['index']
        input_tensor = self.interpreter.tensor(tensor_index)
        input_tensor()[:, :, :, :] = np.frombuffer(img, dtype='uint8').reshape(
            input_tensor().shape)
        start_time = time.monotonic()
        self.interpreter.invoke()
        elapsed_ms = (time.monotonic() - start_time) * 1000
        out = np.empty(0)
        for d in self.interpreter.get_output_details():
            o = np.squeeze(self.interpreter.get_tensor(d['index'])).flatten()
            out = np.append(out, o)
        return (elapsed_ms, out)

    def logistic(self, x):
        return 1 / (1 + np.exp(-x))

    def isPeak(self, heatmaps_flat, index):
        maxindex = index // len(KEYPOINTS)
        maxkeypoint = index % len(KEYPOINTS)

        y_index = maxindex // self.heatmaps_nx
        x_index = maxindex % self.heatmaps_nx

        y_index_min = np.max((y_index - 1, 0))
        y_index_max = np.min((y_index + 1, self.heatmaps_ny - 1))
        x_index_min = np.max((x_index - 1, 0))
        x_index_max = np.min((x_index + 1, self.heatmaps_nx - 1))

        for y_current in range(y_index_min, y_index_max + 1):
            for x_current in range(x_index_min, x_index_max + 1):
                index_current = len(KEYPOINTS) * (
                    y_current * self.heatmaps_nx + x_current) + maxkeypoint
                if (heatmaps_flat[index_current] >
                        heatmaps_flat[index]) and (index_current != index):
                    return False
        return True

    def ParseOutput(self, output):
        inference_time, output = output
        outputs = [
            output[i:j]
            for i, j in zip(self._output_offsets, self._output_offsets[1:])
        ]

        heatmaps = outputs[0].reshape(-1, len(KEYPOINTS))
        offsets_short_y = outputs[1].reshape(
            -1, 2 * len(KEYPOINTS))[:, 0:len(KEYPOINTS)]
        offsets_short_x = outputs[1].reshape(
            -1, 2 * len(KEYPOINTS))[:, len(KEYPOINTS):2 * len(KEYPOINTS)]
        offsets_mid_fwd_y = outputs[2].reshape(
            -1, 4 * len(poseChain))[:, 0:len(poseChain)]
        offsets_mid_fwd_x = outputs[2].reshape(
            -1, 4 * len(poseChain))[:, len(poseChain):2 * len(poseChain)]
        offsets_mid_bwd_y = outputs[2].reshape(
            -1, 4 * len(poseChain))[:, 2 * len(poseChain):3 * len(poseChain)]
        offsets_mid_bwd_x = outputs[2].reshape(
            -1, 4 * len(poseChain))[:, 3 * len(poseChain):4 * len(poseChain)]
        heatmaps = self.logistic(
            (heatmaps - self.quant_heatmaps_off) * self.quant_heatmaps_r)
        heatmaps_flat = heatmaps.flatten()
        offsets_short_y = (offsets_short_y - self.quant_offsets_short_off
                           ) * self.quant_offsets_short_r
        offsets_short_x = (offsets_short_x - self.quant_offsets_short_off
                           ) * self.quant_offsets_short_r
        offsets_mid_fwd_y = (offsets_mid_fwd_y - self.quant_offsets_mid_off
                             ) * self.quant_offsets_mid_r
        offsets_mid_fwd_x = (offsets_mid_fwd_x - self.quant_offsets_mid_off
                             ) * self.quant_offsets_mid_r
        offsets_mid_bwd_y = (offsets_mid_bwd_y - self.quant_offsets_mid_off
                             ) * self.quant_offsets_mid_r
        offsets_mid_bwd_x = (offsets_mid_bwd_x - self.quant_offsets_mid_off
                             ) * self.quant_offsets_mid_r

        # Obtaining the peaks of heatmaps larger than scoreThreshold
        orderedindices = np.argsort(heatmaps_flat)[::-1]
        largeheatmaps_indices = np.empty(0, dtype='int32')
        for i in range(len(orderedindices)):
            if heatmaps_flat[orderedindices[i]] < self.scoreThreshold:
                break
            if self.isPeak(heatmaps_flat, orderedindices[i]):
                largeheatmaps_indices = np.append(largeheatmaps_indices,
                                                  orderedindices[i])

        pose_list = np.full(self.maxPoseDetections * 2 * len(KEYPOINTS),
                            0.0,
                            dtype='float32').reshape(-1, len(KEYPOINTS), 2)
        maxindex_list = np.full(self.maxPoseDetections * len(KEYPOINTS),
                                -1,
                                dtype='int32').reshape(-1, len(KEYPOINTS))
        score_list = np.full(self.maxPoseDetections * len(KEYPOINTS),
                             0.0,
                             dtype='float32').reshape(-1, len(KEYPOINTS))
        pose_score_list = np.full(self.maxPoseDetections, 0.0, dtype='float32')

        nPoses = 0
        # obtaining at most maxPoseDetections poses
        for point in range(len(largeheatmaps_indices)):
            if nPoses >= self.maxPoseDetections:
                break

            # obtain a root canidate
            maxindex = largeheatmaps_indices[point] // len(KEYPOINTS)
            maxkeypoint = largeheatmaps_indices[point] % len(KEYPOINTS)
            y = self.heatmaps_stride_y * (maxindex // self.heatmaps_nx)
            x = self.heatmaps_stride_x * (maxindex % self.heatmaps_nx)
            y += offsets_short_y[maxindex, maxkeypoint]
            x += offsets_short_x[maxindex, maxkeypoint]

            # skip keypoint with (x, y) that is close to the existing keypoints
            skip = 0
            for p in range(nPoses):
                y_exist = pose_list[p, maxkeypoint, 0]
                x_exist = pose_list[p, maxkeypoint, 1]
                if (y_exist - y) * (y_exist - y) + (x_exist - x) * (
                        x_exist - x) < self.sqRadius:
                    skip = 1
                    break
            if skip == 1:
                continue

            # setting the maxkeypoint as root
            pose_list[nPoses, maxkeypoint, 0] = y
            pose_list[nPoses, maxkeypoint, 1] = x
            maxindex_list[nPoses, maxkeypoint] = maxindex
            score_list[nPoses, maxkeypoint] = heatmaps[maxindex, maxkeypoint]

            # backward decoding
            for edge in reversed(range(len(poseChain))):
                sourceKeypointId = parentToChildEdges[edge]
                targetKeypointId = childToParentEdges[edge]
                if maxindex_list[nPoses,
                                 sourceKeypointId] != -1 and maxindex_list[
                                     nPoses, targetKeypointId] == -1:
                    maxindex = maxindex_list[nPoses, sourceKeypointId]
                    y = pose_list[nPoses, sourceKeypointId, 0]
                    x = pose_list[nPoses, sourceKeypointId, 1]
                    y += offsets_mid_bwd_y[maxindex, edge]
                    x += offsets_mid_bwd_x[maxindex, edge]

                    y_index = np.clip(round(y / self.heatmaps_stride_y), 0,
                                      self.heatmaps_ny - 1)
                    x_index = np.clip(round(x / self.heatmaps_stride_x), 0,
                                      self.heatmaps_nx - 1)
                    maxindex_list[
                        nPoses,
                        targetKeypointId] = self.heatmaps_nx * y_index + x_index
                    for i in range(self.offsetRefineStep):
                        y_index = np.clip(round(y / self.heatmaps_stride_y), 0,
                                          self.heatmaps_ny - 1)
                        x_index = np.clip(round(x / self.heatmaps_stride_x), 0,
                                          self.heatmaps_nx - 1)
                        maxindex_list[
                            nPoses,
                            targetKeypointId] = self.heatmaps_nx * y_index + x_index
                        y = self.heatmaps_stride_y * y_index
                        x = self.heatmaps_stride_x * x_index
                        y += offsets_short_y[maxindex_list[nPoses,
                                                           targetKeypointId],
                                             targetKeypointId]
                        x += offsets_short_x[maxindex_list[nPoses,
                                                           targetKeypointId],
                                             targetKeypointId]

                    pose_list[nPoses, targetKeypointId, 0] = y
                    pose_list[nPoses, targetKeypointId, 1] = x
                    score_list[nPoses, targetKeypointId] = heatmaps[
                        maxindex_list[nPoses,
                                      targetKeypointId], targetKeypointId]

            # forward decoding
            for edge in range(len(poseChain)):
                sourceKeypointId = childToParentEdges[edge]
                targetKeypointId = parentToChildEdges[edge]
                if maxindex_list[nPoses,
                                 sourceKeypointId] != -1 and maxindex_list[
                                     nPoses, targetKeypointId] == -1:
                    maxindex = maxindex_list[nPoses, sourceKeypointId]
                    y = pose_list[nPoses, sourceKeypointId, 0]
                    x = pose_list[nPoses, sourceKeypointId, 1]
                    y += offsets_mid_fwd_y[maxindex, edge]
                    x += offsets_mid_fwd_x[maxindex, edge]

                    y_index = np.clip(round(y / self.heatmaps_stride_y), 0,
                                      self.heatmaps_ny - 1)
                    x_index = np.clip(round(x / self.heatmaps_stride_x), 0,
                                      self.heatmaps_nx - 1)
                    maxindex_list[
                        nPoses,
                        targetKeypointId] = self.heatmaps_nx * y_index + x_index
                    for i in range(self.offsetRefineStep):
                        y_index = np.clip(round(y / self.heatmaps_stride_y), 0,
                                          self.heatmaps_ny - 1)
                        x_index = np.clip(round(x / self.heatmaps_stride_x), 0,
                                          self.heatmaps_nx - 1)
                        maxindex_list[
                            nPoses,
                            targetKeypointId] = self.heatmaps_nx * y_index + x_index
                        y = self.heatmaps_stride_y * y_index
                        x = self.heatmaps_stride_x * x_index
                        y += offsets_short_y[maxindex_list[nPoses,
                                                           targetKeypointId],
                                             targetKeypointId]
                        x += offsets_short_x[maxindex_list[nPoses,
                                                           targetKeypointId],
                                             targetKeypointId]

                    pose_list[nPoses, targetKeypointId, 0] = y
                    pose_list[nPoses, targetKeypointId, 1] = x
                    score_list[nPoses, targetKeypointId] = heatmaps[
                        maxindex_list[nPoses,
                                      targetKeypointId], targetKeypointId]

            # calclate pose score
            score = 0
            for k in range(len(KEYPOINTS)):
                y = pose_list[nPoses, k, 0]
                x = pose_list[nPoses, k, 1]
                closekeypoint_exists = False
                for p in range(nPoses):
                    y_exist = pose_list[p, k, 0]
                    x_exist = pose_list[p, k, 1]
                    if (y_exist - y) * (y_exist - y) + (x_exist - x) * (
                            x_exist - x) < self.sqRadius:
                        closekeypoint_exists = True
                        break
                if not closekeypoint_exists:
                    score += score_list[nPoses, k]
            score /= len(KEYPOINTS)

            if score > self.minPoseConfidence:
                pose_score_list[nPoses] = score
                nPoses += 1
            else:
                for k in range(len(KEYPOINTS)):
                    maxindex_list[nPoses, k] = -1

        # Convert the poses to a friendlier format of keypoints with associated
        # scores.
        poses = []
        for pose_i in range(nPoses):
            keypoint_dict = {}
            for point_i, point in enumerate(pose_list[pose_i]):
                keypoint = Keypoint(KEYPOINTS[point_i], point,
                                    score_list[pose_i, point_i])
                if self._mirror:
                    keypoint.yx[1] = self.image_width - keypoint.yx[1]
                keypoint_dict[KEYPOINTS[point_i]] = keypoint
            poses.append(Pose(keypoint_dict, pose_score_list[pose_i]))

        return poses, inference_time
class Detector:
    """
    Perform object detection with the given model. The model is a quantized tflite
    file which if the detector can not find it at the path it will download it
    from neuralet repository automatically.

    :param config: Is a ConfigEngine instance which provides necessary parameters.
    """
    def __init__(self, config):
        self.config = config
        # Get the model name from the config
        self.model_name = self.config.get_section_dict('Detector')['Name']
        # Frames Per Second
        self.fps = None
        self.model_file = 'ped_ssdlite_mobilenet_v2_quantized_edgetpu.tflite'
        self.model_path = 'libs/detectors/edgetpu/data/' + self.model_file

        # Get the model .tflite file path from the config.
        # If there is no .tflite file in the path it will be downloaded automatically from base_url
        user_model_path = self.config.get_section_dict('Detector')['ModelPath']
        if len(user_model_path) > 0:
            print('using %s as model' % user_model_path)
            self.model_path = user_model_path
        else:
            base_url = 'https://raw.githubusercontent.com/neuralet/neuralet-models/master/edge-tpu/'
            url = base_url + self.model_name + '/' + self.model_file

            if not os.path.isfile(self.model_path):
                print('model does not exist under: ', self.model_path,
                      'downloading from ', url)
                wget.download(url, self.model_path)

        # Load TFLite model and allocate tensors
        self.interpreter = Interpreter(
            self.model_path,
            experimental_delegates=[load_delegate("libedgetpu.so.1")])
        self.interpreter.allocate_tensors()
        # Get the model input and output tensor details
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()

        # Get class id from config
        self.class_id = int(
            self.config.get_section_dict('Detector')['ClassID'])
        self.score_threshold = float(
            self.config.get_section_dict('Detector')['MinScore'])

    def inference(self, resized_rgb_image):
        """
        inference function sets input tensor to input image and gets the output.
        The interpreter instance provides corresponding detection output which is used for creating result
        Args:
            resized_rgb_image: uint8 numpy array with shape (img_height, img_width, channels)

        Returns:
            result: a dictionary contains of [{"id": 0, "bbox": [x1, y1, x2, y2], "score":s%}, {...}, {...}, ...]
        """
        input_image = np.expand_dims(resized_rgb_image, axis=0)
        # Fill input tensor with input_image
        self.interpreter.set_tensor(self.input_details[0]["index"],
                                    input_image)
        t_begin = time.perf_counter()
        self.interpreter.invoke()
        inference_time = time.perf_counter() - t_begin  # Second
        self.fps = convert_infr_time_to_fps(inference_time)
        # The function `get_tensor()` returns a copy of the tensor data.
        # Use `tensor()` in order to get a pointer to the tensor.
        boxes = self.interpreter.get_tensor(self.output_details[0]['index'])
        labels = self.interpreter.get_tensor(self.output_details[1]['index'])
        scores = self.interpreter.get_tensor(self.output_details[2]['index'])
        # TODO: will be used for getting number of objects
        # num = self.interpreter.get_tensor(self.output_details[3]['index'])

        result = []
        for i in range(boxes.shape[1]):  # number of boxes
            if labels[0,
                      i] == self.class_id and scores[0,
                                                     i] > self.score_threshold:
                result.append({
                    "id": str(self.class_id) + '-' + str(i),
                    "bbox": boxes[0, i, :],
                    "score": scores[0, i]
                })

        return result
Ejemplo n.º 12
0
class Detector(object):
    def __init__(self, label_file, model_file, threshold):
        self._threshold = float(threshold)
        self.labels = self.load_labels(label_file)
        self.interpreter = Interpreter(model_file)
        self.interpreter.allocate_tensors()
        _, self.input_height, self.input_width, _ = self.interpreter.get_input_details(
        )[0]['shape']

    def load_labels(self, path):
        with open(path, 'r') as f:
            return {
                i: line.strip()
                for i, line in enumerate(f.read().replace('"', '').split(','))
            }

    def set_input_tensor(self, image):
        """Sets the input tensor."""
        tensor_index = self.interpreter.get_input_details()[0]['index']
        input_tensor = self.interpreter.tensor(tensor_index)()[0]
        input_tensor[:, :] = image

    def get_output_tensor(self, index):
        """Returns the output tensor at the given index."""
        output_details = self.interpreter.get_output_details()[index]
        tensor = np.squeeze(
            self.interpreter.get_tensor(output_details['index']))
        return tensor

    def detect_objects(self, image):
        """Returns a list of detection results, each a dictionary of object info."""
        self.set_input_tensor(image)
        self.interpreter.invoke()
        # Get all output details
        boxes = self.get_output_tensor(0)
        return boxes

    def detect(self, original_image):
        self.output_width, self.output_height = original_image.shape[0:2]
        start_time = time.time()
        image = cv2.resize(original_image,
                           (self.input_width, self.input_height))
        #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        results = self.detect_objects(image)
        elapsed_ms = (time.time() - start_time) * 1000

        fps = 1 / elapsed_ms * 1000
        print("Estimated frames per second : {0:.2f} Inference time: {1:.2f}".
              format(fps, elapsed_ms))

        def _to_original_scale(boxes):
            minmax_boxes = to_minmax(boxes)
            minmax_boxes[:, 0] *= self.output_width
            minmax_boxes[:, 2] *= self.output_width
            minmax_boxes[:, 1] *= self.output_height
            minmax_boxes[:, 3] *= self.output_height
            return minmax_boxes.astype(np.int)

        boxes, probs = self.run(results)
        print(boxes)
        if len(boxes) > 0:
            boxes = _to_original_scale(boxes)
            original_image = draw_boxes(original_image, boxes, probs,
                                        self.labels)
        return cv2.imencode('.jpg', original_image)[1].tobytes()

    def run(self, netout):
        anchors = [
            0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282,
            3.52778, 9.77052, 9.16828
        ]
        nms_threshold = 0.2
        """Convert Yolo network output to bounding box
        
        # Args
            netout : 4d-array, shape of (grid_h, grid_w, num of boxes per grid, 5 + n_classes)
                YOLO neural network output array
        
        # Returns
            boxes : array, shape of (N, 4)
                coordinate scale is normalized [0, 1]
            probs : array, shape of (N, nb_classes)
        """
        grid_h, grid_w, nb_box = netout.shape[:3]
        boxes = []

        # decode the output by the network
        netout[..., 4] = _sigmoid(netout[..., 4])
        netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(
            netout[..., 5:])
        netout[..., 5:] *= netout[..., 5:] > self._threshold

        for row in range(grid_h):
            for col in range(grid_w):
                for b in range(nb_box):
                    # from 4th element onwards are confidence and class classes
                    classes = netout[row, col, b, 5:]

                    if np.sum(classes) > 0:
                        # first 4 elements are x, y, w, and h
                        x, y, w, h = netout[row, col, b, :4]

                        x = (col + _sigmoid(x)
                             ) / grid_w  # center position, unit: image width
                        y = (row + _sigmoid(y)
                             ) / grid_h  # center position, unit: image height
                        w = anchors[2 * b + 0] * np.exp(
                            w) / grid_w  # unit: image width
                        h = anchors[2 * b + 1] * np.exp(
                            h) / grid_h  # unit: image height
                        confidence = netout[row, col, b, 4]
                        box = BoundBox(x, y, w, h, confidence, classes)
                        boxes.append(box)

        boxes = nms_boxes(boxes, len(classes), nms_threshold, self._threshold)
        boxes, probs = boxes_to_array(boxes)
        return boxes, probs
Ejemplo n.º 13
0
class ObjectDetectorLite():
    def __init__(self, model_path='detect.tflite', threads_num=4):
        try:
            self.interpreter = Interpreter(model_path=model_path)
            #self.interpreter.set_num_threads(threads_num)
        except:
            self.interpreter = tf.lite.Interpreter(model_path=model_path)
            self.interpreter.set_num_threads(threads_num)
        self.interpreter.allocate_tensors()
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()

    def _boxes_coordinates(self,
                           image,
                           boxes,
                           classes,
                           scores,
                           max_boxes_to_draw=20,
                           min_score_thresh=.5):

        if not max_boxes_to_draw:
            max_boxes_to_draw = boxes.shape[0]
        number_boxes = min(max_boxes_to_draw, boxes.shape[0])
        person_boxes = []
        for i in range(number_boxes):
            if scores is None or scores[i] > min_score_thresh:
                box = tuple(boxes[i].tolist())
                ymin, xmin, ymax, xmax = box
                im_height, im_width, _ = image.shape
                left, right, top, bottom = [
                    int(z) for z in (xmin * im_width, xmax * im_width,
                                     ymin * im_height, ymax * im_height)
                ]
                person_boxes.append([(left, top), (right, bottom), scores[i],
                                     LABELS[classes[i]]])
        return person_boxes

    def detect(self, image, threshold=0.1):
        # Resize and normalize image for network input
        frame = cv2.resize(image, (300, 300))
        frame = np.expand_dims(frame, axis=0)
        frame = frame.astype('uint8')

        # run model
        self.interpreter.set_tensor(self.input_details[0]['index'], frame)
        start_time = time.time()
        self.interpreter.invoke()
        stop_time = time.time()
        print("time: ", stop_time - start_time)

        # get results
        boxes = self.interpreter.get_tensor(self.output_details[0]['index'])
        classes = self.interpreter.get_tensor(self.output_details[1]['index'])
        scores = self.interpreter.get_tensor(self.output_details[2]['index'])
        num = self.interpreter.get_tensor(self.output_details[3]['index'])

        # Find detected boxes coordinates
        return self._boxes_coordinates(image,
                                       np.squeeze(boxes[0]),
                                       np.squeeze(classes[0] + 1).astype(
                                           np.int32),
                                       np.squeeze(scores[0]),
                                       min_score_thresh=threshold)
Ejemplo n.º 14
0
def obj_center(args, objX, objY, centerX, centerY):
    # signal trap to handle keyboard interrupt
    signal.signal(signal.SIGINT, signal_handler)

    # Define and parse input arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('--modeldir',
                        help='Folder the .tflite file is located in',
                        required=True)
    parser.add_argument(
        '--graph',
        help='Name of the .tflite file, if different than detect.tflite',
        default='detect.tflite')
    parser.add_argument(
        '--labels',
        help='Name of the labelmap file, if different than labelmap.txt',
        default='labelmap.txt')
    parser.add_argument(
        '--threshold',
        help='Minimum confidence threshold for displaying detected objects',
        default=0.5)
    parser.add_argument(
        '--resolution',
        help=
        'Desired webcam resolution in WxH. If the webcam does not support the resolution entered, errors may occur.',
        default='1280x720')

    args = parser.parse_args()

    MODEL_NAME = args.modeldir
    GRAPH_NAME = args.graph
    LABELMAP_NAME = args.labels
    min_conf_threshold = float(args.threshold)
    resW, resH = args.resolution.split('x')
    imW, imH = int(resW), int(resH)

    # Import TensorFlow libraries
    # If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow
    # If using Coral Edge TPU, import the load_delegate library
    from tflite_runtime.interpreter import Interpreter
    print("using runtime not tensorflow")

    # Get path to current working directory
    CWD_PATH = os.getcwd()

    # Path to .tflite file, which contains the model that is used for object detection
    PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME)

    # Path to label map file
    PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME)

    # Load the label map
    with open(PATH_TO_LABELS, 'r') as f:
        labels = [line.strip() for line in f.readlines()]

    # Have to do a weird fix for label map if using the COCO "starter model" from
    # https://www.tensorflow.org/lite/models/object_detection/overview
    # First label is '???', which has to be removed.
    if labels[0] == '???':
        del (labels[0])

    # Load the Tensorflow Lite model.
    interpreter = Interpreter(model_path=PATH_TO_CKPT)
    interpreter.allocate_tensors()

    # Get model details
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    height = input_details[0]['shape'][1]
    width = input_details[0]['shape'][2]

    floating_model = (input_details[0]['dtype'] == np.float32)

    input_mean = 127.5
    input_std = 127.5

    # Initialize frame rate calculation
    frame_rate_calc = 1
    freq = cv2.getTickFrequency()

    # Initialize video stream
    videostream = VideoStream(resolution=(imW, imH), framerate=30).start()
    time.sleep(1)

    while True:
        # Start timer (for calculating frame rate)
        t1 = cv2.getTickCount()
        print(t1)

        # Grab frame from video stream
        frame1 = videostream.read()

        # Acquire frame and resize to expected shape [1xHxWx3]
        frame = frame1.copy()
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame_resized = cv2.resize(frame_rgb, (width, height))
        input_data = np.expand_dims(frame_resized, axis=0)

        # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
        if floating_model:
            input_data = (np.float32(input_data) - input_mean) / input_std

        # Perform the actual detection by running the model with the image as input
        interpreter.set_tensor(input_details[0]['index'], input_data)
        interpreter.invoke()

        # Retrieve detection results
        boxes = interpreter.get_tensor(output_details[0]['index'])[
            0]  # Bounding box coordinates of detected objects
        classes = interpreter.get_tensor(
            output_details[1]['index'])[0]  # Class index of detected objects
        scores = interpreter.get_tensor(
            output_details[2]['index'])[0]  # Confidence of detected objects
        # num = interpreter.get_tensor(output_details[3]['index'])[0]  # Total number of detected objects (inaccurate and not needed)

        # Loop over all detections and draw detection box if confidence is above minimum threshold
        for i in range(len(scores)):
            if ((scores[i] > min_conf_threshold) and
                (scores[i] <= 1.0)) and labels[int(classes[i])] == "book":
                # Get bounding box coordinates and draw box
                # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
                ymin = int(max(1, (boxes[i][0] * imH)))
                xmin = int(max(1, (boxes[i][1] * imW)))
                ymax = int(min(imH, (boxes[i][2] * imH)))
                xmax = int(min(imW, (boxes[i][3] * imW)))

                #Center Coordinates of the found Object
                bookX = (xmin + xmax) / 2
                bookY = (ymin + ymax) / 2

                objX.value = bookX
                objY.value = bookY

                cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0),
                              2)

                # Draw label
                object_name = labels[int(
                    classes[i]
                )]  # Look up object name from "labels" array using class index
                label = '%s: %d%%' % (object_name, int(scores[i] * 100)
                                      )  # Example: 'person: 72%'
                labelSize, baseLine = cv2.getTextSize(label,
                                                      cv2.FONT_HERSHEY_SIMPLEX,
                                                      0.7, 2)  # Get font size
                label_ymin = max(
                    ymin, labelSize[1] + 10
                )  # Make sure not to draw label too close to top of window
                cv2.rectangle(
                    frame, (xmin, label_ymin - labelSize[1] - 10),
                    (xmin + labelSize[0], label_ymin + baseLine - 10),
                    (255, 255, 255),
                    cv2.FILLED)  # Draw white box to put label text in
                cv2.putText(frame, label, (xmin, label_ymin - 7),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0),
                            2)  # Draw label text

        # Draw framerate in corner of frame
        cv2.putText(frame, 'FPS: {0:.2f}'.format(frame_rate_calc), (30, 50),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2, cv2.LINE_AA)

        # All the results have been drawn on the frame, so it's time to display it.
        cv2.imshow('Object detector', frame)

        # Calculate framerate
        t2 = cv2.getTickCount()
        time1 = (t2 - t1) / freq
        frame_rate_calc = 1 / time1

        # Press 'q' to quit
        if cv2.waitKey(1) == ord('q'):
            break

    # Clean up
    cv2.destroyAllWindows()
    videostream.stop()
def setup_ssd_edgetpu(modelParas):
    # Get Args
    MODEL_NAME = modelParas[0]
    GRAPH_NAME = modelParas[1]
    LABELMAP_NAME = modelParas[2]
    min_conf_threshold = float(modelParas[3])
    resW, resH = modelParas[4:6]
    imW, imH = int(resW), int(resH)
    use_TPU = modelParas[6]

    # Import TensorFlow libraries
    # If tensorflow is not installed, import interpreter from tflite_runtime, else import from regular tensorflow
    # If using Coral Edge TPU, import the load_delegate library

    # If using Edge TPU, assign filename for Edge TPU model
    if use_TPU:
        # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite'
        if (GRAPH_NAME == 'detect.tflite'):
            GRAPH_NAME = 'edgetpu.tflite'

    # Get path to current working directory
    CWD_PATH = os.getcwd()

    # Path to .tflite file, which contains the model that is used for object detection
    PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME)

    # Path to label map file
    PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME)

    # Load the label map
    with open(PATH_TO_LABELS, 'r') as f:
        labels = [line.strip() for line in f.readlines()]

    # Have to do a weird fix for label map if using the COCO "starter model" from
    # https://www.tensorflow.org/lite/models/object_detection/overview
    # First label is '???', which has to be removed.
    if labels[0] == '???':
        del (labels[0])

    # Load the Tensorflow Lite model.
    # If using Edge TPU, use special load_delegate argument
    if use_TPU:
        interpreter = Interpreter(
            model_path=PATH_TO_CKPT,
            experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
        print(PATH_TO_CKPT)
    else:
        interpreter = Interpreter(model_path=PATH_TO_CKPT)

    interpreter.allocate_tensors()

    # Get model details
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    height = input_details[0]['shape'][1]
    width = input_details[0]['shape'][2]

    floating_model = (input_details[0]['dtype'] == np.float32)

    input_mean = 127.5
    input_std = 127.5

    tfParas = [
        height, width, floating_model, labels, input_mean, input_std,
        input_details, min_conf_threshold, imH, imW, interpreter,
        output_details
    ]

    return tfParas
Ejemplo n.º 16
0
class Classifier:
    """
    Perform image classification with the given model. The model is an int8 quantized tflite
    file which if the classifier can not find it at the path it will download it
    from neuralet repository automatically.

    :param config: Is a ConfigEngine instance which provides necessary parameters.
    """
    def __init__(self, config):
        self.config = config
        self.model_name = "OFMClassifier_edgetpu.tflite"
        self.model_path = '/repo/data/edgetpu/' + self.model_name
        self.fps = None
        if not os.path.isfile(self.model_path):
            url = "https://raw.githubusercontent.com/neuralet/neuralet-models/master/edge-tpu/OFMClassifier/OFMClassifier_edgetpu.tflite"  # noqa
            print("model does not exist under: ", self.model_path,
                  "downloading from ", url)
            wget.download(url, self.model_path)

        # Load TFLite model and allocate tensors
        self.interpreter = Interpreter(
            self.model_path,
            experimental_delegates=[load_delegate("libedgetpu.so.1")])
        self.interpreter.allocate_tensors()
        # Get the model input and output tensor details
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()

    def inference(self, resized_rgb_images) -> list:
        """
        Inference function sets input tensor to input image and gets the output.
        The interpreter instance provides corresponding class id output which is used for creating result
        Args:
            resized_rgb_images: Array of images with shape (no_images, img_height, img_width, channels)
        Returns:
            result: List of class id for each input image. ex: [0, 0, 1, 1, 0]
            scores: The classification confidence for each class. ex: [.99, .75, .80, 1.0]
        """
        if np.shape(resized_rgb_images)[0] == 0:
            return [], []
        resized_rgb_images = (resized_rgb_images * 255).astype("uint8")
        result = []
        net_results = []
        for img in resized_rgb_images:
            img = np.expand_dims(img, axis=0)
            self.interpreter.set_tensor(self.input_details[0]["index"], img)
            t_begin = time.perf_counter()
            self.interpreter.invoke()
            inference_time = time.perf_counter() - t_begin  # Second
            self.fps = convert_infr_time_to_fps(inference_time)
            net_output = self.interpreter.get_tensor(
                self.output_details[0]['index'])[0]
            net_results.append(net_output)
            result.append(np.argmax(net_output))  # returns class id

        # TODO: optimized without for
        scores = []
        for i, itm in enumerate(net_results):
            scores.append((itm[result[i]] - 1) / 255.0)

        return result, scores
Ejemplo n.º 17
0
CWD_PATH = os.getcwd()

EDGE_TPU = False
if EDGE_TPU:
    face_model_path = 'model/face-detector-quantized_edgetpu.tflite'
    face_interpreter = Interpreter(model_path=os.path.join(CWD_PATH, face_model_path),
                                   experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
else:
    face_model_path = 'model/face_detection_front.tflite'
    face_interpreter = Interpreter(model_path=os.path.join(CWD_PATH, face_model_path))

face_interpreter.allocate_tensors()

# Get model details
face_input_details = face_interpreter.get_input_details()[0]
face_output_details = face_interpreter.get_output_details()
height = face_input_details['shape'][1]
width = face_input_details['shape'][2]

# Initialize frame rate calculation
frame_rate_calc = 1
freq = cv2.getTickFrequency()
# Initialize video stream
videostream = VideoStream(resolution=(imW, imH)).start()
time.sleep(1)
anchors = np.load('anchors.npy')

while True:
    # opencv
    # Start timer (for calculating frame rate)
    t1 = cv2.getTickCount()
Ejemplo n.º 18
0
    def start(self, Handler):

        self.thread = Thread(target=self.update, args=())
        self.thread.start()

        self.startTime = time.time()

        MODEL_NAME = "Sample_TFLite_model"

        GRAPH_NAME = 'edgetpu.tflite'
        LABELMAP_NAME = 'labelmap.txt'
        min_conf_threshold = float(0.5)
        resW, resH = ('1080x720').split('x')
        imW, imH = int(resW), int(resH)

        pkg = importlib.util.find_spec('tensorflow')

        if pkg is None:
            from tflite_runtime.interpreter import Interpreter
            from tflite_runtime.interpreter import load_delegate
        else:
            from tensorflow.lite.python.interpreter import Interpreter
            from tensorflow.lite.python.interpreter import load_delegate

        CWD_PATH = os.getcwd()
        PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME)

        PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME)

        # Load the label map
        with open(PATH_TO_LABELS, 'r') as f:
            labels = [line.strip() for line in f.readlines()]

        # BUG of Tensorflow: first label is '???', which has to be removed.
        if labels[0] == '???':
            del (labels[0])

        # Load the Tensorflow Lite model.
        interpreter = Interpreter(
            model_path=PATH_TO_CKPT,
            experimental_delegates=[load_delegate('libedgetpu.so.1.0')])

        interpreter.allocate_tensors()

        # Get model details
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        height = input_details[0]['shape'][1]
        width = input_details[0]['shape'][2]

        floating_model = (input_details[0]['dtype'] == np.float32)

        input_mean = 127.5
        input_std = 127.5

        # Initialize frame rate calculation
        frame_rate_calc = 1
        freq = cv2.getTickFrequency()

        # To save each photo of every run with a different name.
        # On second boot the photos will be overwritten, in order to not consume too much space
        pic_counter = 0

        # Every 10 frames captured where you spot a thief, take a picture
        counter = 0

        self.SMS_Flag = 0

        while True:

            # Increment time if the flag was changed to 1
            if (Handler.getTimeFlag() == 1):
                self.startTime = time.time()
                Handler.setTimeFlag(0)
                print("Time incremented")

            # Stops everything after 20 seconds from the last timer
            if (time.time() >= self.startTime + 20):
                Handler.setCameraState(0, self.MQTT)
                print("Camera STOPPED after {}".format(time.time() -
                                                       self.initialTime))
                break

            # Start timer (for calculating frame rate)
            t1 = cv2.getTickCount()

            # Grab frame from video stream
            frame1 = self.read()

            # Acquire frame and resize to expected shape [1xHxWx3]
            frame = frame1.copy()
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_resized = cv2.resize(frame_rgb, (width, height))
            input_data = np.expand_dims(frame_resized, axis=0)

            # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
            if floating_model:
                input_data = (np.float32(input_data) - input_mean) / input_std

            # Perform the actual detection by running the model with the image as input
            interpreter.set_tensor(input_details[0]['index'], input_data)
            interpreter.invoke()

            # Retrieve detection results
            boxes = interpreter.get_tensor(output_details[0]['index'])[
                0]  # Bounding box coordinates of detected objects
            classes = interpreter.get_tensor(output_details[1]['index'])[
                0]  # Class index of detected objects
            scores = interpreter.get_tensor(output_details[2]['index'])[
                0]  # Confidence of detected objects
            #num = interpreter.get_tensor(output_details[3]['index'])[0]  # Total number of detected objects (inaccurate and not needed)

            fakeFlag = 1

            # Loop over all detections and draw detection box if confidence is above minimum threshold
            for i in range(len(scores)):
                if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)
                        and classes[i] == 0):

                    counter += 1
                    # Get bounding box coordinates and draw box
                    # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
                    ymin = int(max(1, (boxes[i][0] * imH)))
                    xmin = int(max(1, (boxes[i][1] * imW)))
                    ymax = int(min(imH, (boxes[i][2] * imH)))
                    xmax = int(min(imW, (boxes[i][3] * imW)))

                    cv2.rectangle(frame, (xmin, ymin), (xmax, ymax),
                                  (255, 0, 255), 2)

                    # Draw label
                    object_name = labels[int(
                        classes[i]
                    )]  # Look up object name from "labels" array using class index
                    label = '%s: %d%%' % (object_name, int(scores[i] * 100)
                                          )  # Example: 'person: 72%'
                    labelSize, baseLine = cv2.getTextSize(
                        label, cv2.FONT_HERSHEY_SIMPLEX, 0.7,
                        1)  # Get font size
                    label_ymin = max(
                        ymin, labelSize[1] + 10
                    )  # Make sure not to draw label too close to top of window
                    cv2.rectangle(
                        frame, (xmin, label_ymin - labelSize[1] - 10),
                        (xmin + labelSize[0], label_ymin + baseLine - 10),
                        (255, 255, 255),
                        cv2.FILLED)  # Draw white box to put label text in
                    cv2.putText(frame, label, (xmin, label_ymin - 7),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0),
                                1)  # Draw label text

                # Fakeflag for the promo video
                if (fakeFlag == 0):
                    names = [
                        "Frame1.png", "Frame3.png", "Frame4.png", "Frame5.png",
                        "Frame6.png"
                    ]
                    requests.post(
                        'https://api.telegram.org/YOUR_TOKEN/sendMessage',
                        data={
                            'chat_id':
                            297501031,
                            'text':
                            "Attention, a human presence has been detected in the house!\nHere are the photos:"
                        })
                    for i in range(len(names)):

                        requests.post(
                            'https://api.telegram.org/YOUR_TOKEN/sendPhoto',
                            data={'chat_id': 297501031},
                            files={
                                'photo': open('./{}'.format(names[i]), 'rb')
                            })
                        time.sleep(2)
                    fakeFlag = 1

                # Counter for the frame that recognize a person
                elif (counter > 5):

                    print("ATTENTION, INTRUSION DETECTED!")
                    if self.SMS_Flag == 0:
                        for number in self.numbers:
                            self.MQTT.publish("SMS_ALERT_CAM",
                                              json.dumps({"number": number}))
                    self.SMS_Flag = 1

                    for ID in self.IDs:
                        if (pic_counter == 0):
                            requests.post(
                                'https://api.telegram.org/YOUR_TOKEN/sendMessage',
                                data={
                                    'chat_id':
                                    ID,
                                    'text':
                                    "ATTENTION, a human presence has been detected in the house\nHere are the photos"
                                })
                        filename = "me_{}.jpg".format(pic_counter)
                        cv2.imwrite(filename, frame)
                        requests.post(
                            'https://api.telegram.org/YOUR_TOKEN/sendPhoto',
                            data={'chat_id': ID},
                            files={
                                'photo': open('./{}'.format(filename), 'rb')
                            })
                    counter = 0
                    pic_counter += 1

            # Draw framerate in corner of frame
            cv2.putText(frame, 'FPS: {0:.2f}'.format(frame_rate_calc),
                        (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0),
                        2, cv2.LINE_AA)

            # Calculate framerate
            t2 = cv2.getTickCount()

            time1 = (t2 - t1) / freq
            frame_rate_calc = 1 / time1

        # Clean up
        self.stop()

        cv2.destroyAllWindows()
        print("Window destroyed")
        Handler.destroyCamera()
Ejemplo n.º 19
0
class YOLOV5:
    def __init__(self, wanted_labels=None, model_file=None, label_file=None, num_threads=None, edgetpu=False, libedgetpu=None, score_threshold=0.25):
        basedir = os.getenv('DEEPDISHHOME','.')
        if model_file is None:
          model_file = os.path.join(basedir, 'detectors/yolov5/yolov5s-int8.tflite')
        if label_file is None:
          label_file = os.path.join(basedir, 'detectors/yolov5/coco_classes.txt')
        self.cfg_file = os.path.join(basedir, 'detectors/yolov5/yolov5s.yaml')
        if wanted_labels is None:
          wanted_labels = ['person']
        self.wanted_labels = wanted_labels
        self.label_file = label_file
        self.score_threshold = score_threshold
        self.labels = self._get_labels()
        self.use_edgetpu = edgetpu
        self.int8 = False

        if 'saved_model' in model_file:
            self.mode = 'saved_model'
            if 'keras' not in sys.modules:
                print('yolov5: saved_model mode requires keras')
                sys.exit(1)
        elif '.tflite' in model_file:
            self.mode = 'tflite'
            if 'int8' in model_file: self.int8 = True
        else:
            print('unable to determine format of yolov5 model')
            sys.exit(1)

        if libedgetpu is None:
            libedgetpu = edgetpu_lib_name()

        if self.mode == 'tflite':
            # Load TFLite model and allocate tensors.
            self.interpreter = Interpreter(
                model_path=model_file,
                num_threads=num_threads,
                experimental_delegates=[load_delegate(libedgetpu)] if self.use_edgetpu else None)
            self.interpreter.allocate_tensors()
            self.num_threads = num_threads
            # Get input and output tensors.
            self.input_details = self.interpreter.get_input_details()
            self.output_details = self.interpreter.get_output_details()
            _, self.height, self.width, _ = self.input_details[0]['shape'].tolist()
        elif self.mode == 'saved_model':
            self.model = keras.models.load_model(model_file)
            self.num_threads = 1
            _, self.height, self.width, _ = self.model.inputs[0].shape.as_list()

        yaml_file = Path(self.cfg_file)
        with open(yaml_file) as f:
            cfg = yaml.load(f, Loader=yaml.FullLoader)
        self.anchors = cfg['anchors']

    def _get_labels(self):
        labels_path = os.path.expanduser(self.label_file)
        with open(labels_path) as f:
            labels = {i: line.strip() for i, line in enumerate(f.readlines())}
        return labels

    def detect_image(self, img):
        img_size = img.size
        img_resized = img.convert('RGB').resize((self.width, self.height), Image.ANTIALIAS)
        input_data = np.expand_dims(img_resized, 0).astype(np.float32)

        if self.int8:
            scale, zero_point = self.input_details[0]['quantization']
            input_data = (input_data / scale + zero_point).astype(np.uint8)

        if self.mode == 'tflite':
            self.interpreter.set_tensor(self.input_details[0]['index'], input_data)
            self.interpreter.invoke()
            output_data = self.interpreter.get_tensor(self.output_details[0]['index'])
            raw = np.copy(output_data)
        elif self.mode == 'saved_model':
            input_data /= 255.0
            output_data = self.model(input_data).numpy()

        if self.int8:
            scale, zero_point = self.output_details[0]['quantization']
            output_data = output_data.astype(np.float32)
            output_data = (output_data - zero_point) * scale

        x = np.copy(output_data)
        boxes = np.copy(x[..., :4])
        boxes[..., 0] = x[..., 0] - x[..., 2] / 2
        boxes[..., 1] = x[..., 1] - x[..., 3] / 2
        boxes[..., 2] = x[..., 0] + x[..., 2] / 2
        boxes[..., 3] = x[..., 1] + x[..., 3] / 2
        x[..., 5:] *= x[..., 4:5]
        best_classes = np.expand_dims(np.argmax(x[..., 5:], axis=-1), axis=-1)
        confidences = np.take_along_axis(x, best_classes + 5, axis=-1)
        y = np.concatenate((boxes, confidences, best_classes.astype(np.float32)), axis=-1)
        y = y[np.where(y[..., 4] >= self.score_threshold)]
        y[...,:4] *= np.array([img_size[0], img_size[1], img_size[0], img_size[1]])

        return_boxs = []
        return_lbls = []
        return_scrs = []

        for *xyxy, score, labelidx in y:
            label=self.labels[int(labelidx)]
            if label in self.wanted_labels and score >= self.score_threshold:
                tlwh = np.copy(xyxy)
                tlwh[2] = xyxy[2] - xyxy[0]
                tlwh[3] = xyxy[3] - xyxy[1]
                return_boxs.append(list(tlwh))
                return_lbls.append(label)
                return_scrs.append(score)
        return (return_boxs, return_lbls, return_scrs)
Ejemplo n.º 20
0
class WakeWord2:
    def __init__(self):
        # Sliding window
        self.window = np.zeros(int(RECORD_DURATION * RESAMPLE_RATE) * 2)

        # Load model
        self.interpreter = Interpreter(WAKEWORD_MODEL_PATH)
        self.interpreter.allocate_tensors()
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()

        # Thread and flags
        self.ON = True
        self.running = True
        self.wakeword_flag = False
        self.wakeword_thread = threading.Thread(target=self.wakeword_run,
                                                name="wakeword_thread")
        self.wakeword_thread.start()
        print('WakeWord Initialized')

    def close(self):
        self.running = False
        self.wakeword_thread.join()
        return

    # Background loop that continuously checks for wake words
    def wakeword_run(self):
        with sd.InputStream(channels=NUM_CHANNELS,
                            samplerate=SAMPLE_RATE,
                            blocksize=int(SAMPLE_RATE * RECORD_DURATION),
                            callback=self.wakeword_process):
            while self.running:
                pass

    def __call__(self):
        if self.wakeword_flag:
            self.wakeword_flag = False
            return True
        return False

    def wakeword_process(self, rec, frames, time, error):
        # Start timing for testing
        start_time = timeit.default_timer()
        # Notify if errors
        if error:
            print("Error: ", error)

        # Remove 2nd dimension from recording sample and downsample
        rec = np.squeeze(rec)
        rec = scipy.signal.decimate(rec, DOWNSAMPLE)

        # Analyze a sliding window if the sound that overlaps with last window by 50%
        # to catch wake words that might span time segments
        self.window[:len(self.window) // 2] = self.window[len(self.window) //
                                                          2:]
        self.window[len(self.window) // 2:] = rec

        # Process image with MFCC (Mel Frequency Cepstrum) that scales the frequency in order
        # to match more closely what the human ear can hear
        mfccs = python_speech_features.base.mfcc(self.window,
                                                 samplerate=RESAMPLE_RATE,
                                                 winlen=0.256,
                                                 winstep=0.050,
                                                 numcep=NUM_MFCC,
                                                 nfilt=26,
                                                 nfft=2048,
                                                 preemph=0.0,
                                                 ceplifter=0,
                                                 appendEnergy=False,
                                                 winfunc=np.hanning)
        mfccs = mfccs.transpose()

        # Make prediction from model
        in_tensor = np.float32(
            mfccs.reshape(1, mfccs.shape[0], mfccs.shape[1], 1))
        self.interpreter.set_tensor(self.input_details[0]['index'], in_tensor)
        self.interpreter.invoke()
        output_data = self.interpreter.get_tensor(
            self.output_details[0]['index'])
        val = output_data[0][0]
        # test for the wake word ('go')
        if val > WORD_THRESHOLD:
            print('listening')
            self.wakeword_flag = True
        if DEBUG_ACC:  # print accuracy of each detection
            print(val)
        if DEBUG_TIME:  # print processing time for a sound clip
            print(timeit.default_timer() - start_time)
Ejemplo n.º 21
0
class camera_interface():
    """
    The main interface for using the camera and determining the grip we need to be in.
    https://www.hackster.io/gatoninja236/scan-qr-codes-in-real-time-with-raspberry-pi-a5268b
      
    Attributes:
        count (int): Count of saved screenshots. File titles are frame'count'.jpg.
        cap (cv2 VideoCapture): The VideoCapture object.
        detector (QRCodeDetector): The QR Code detecting object.
    """

    def __init__(self,resolution=(640,480),framerate=30):
        self.count = 0
        # self.cap = cv2.VideoCapture(0)
        self.vs = VideoStream(resolution=(1280,720),framerate=30).start()
        # self.stream = cv2.VideoCapture(0)
        # ret = self.stream.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG'))
        # ret = self.stream.set(3,resolution[0])
        # ret = self.stream.set(4,resolution[1])

        #Wait for the camera to startup for one seconds
        time.sleep(1)
        print("[INFO] Created video capture object")
        print("[INFO] loading model...")

        #Load the tflite model and labelmap
        # Get path to current working directory
        GRAPH_NAME = "detect.tflite"
        MODEL_NAME = "Camera_Interpreter/Coco"
        LABELMAP_NAME = "labelmap.txt"
        CWD_PATH = os.getcwd()

        # Path to .tflite file, which contains the model that is used for object detection
        PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,GRAPH_NAME)

        # Path to label map file
        PATH_TO_LABELS = os.path.join(CWD_PATH,MODEL_NAME,LABELMAP_NAME)

        # Load the label map
        with open(PATH_TO_LABELS, 'r') as f:
            self.labels = [line.strip() for line in f.readlines()]

        # Have to do a weird fix for label map if using the COCO "starter model" from
        # https://www.tensorflow.org/lite/models/object_detection/overview
        # First label is '???', which has to be removed.
        if self.labels[0] == '???':
            del(self.labels[0])

        # Load the Tensorflow Lite model.
        # If using Edge TPU, use special load_delegate argument
        use_TPU = False
        if use_TPU:
            self.interpreter = Interpreter(model_path=PATH_TO_CKPT,
                                    experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
            print(PATH_TO_CKPT)
        else:
            self.interpreter = Interpreter(model_path=PATH_TO_CKPT)

        self.interpreter.allocate_tensors()

        # Get model details
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()
        self.height = self.input_details[0]['shape'][1]
        self.width = self.input_details[0]['shape'][2]

        self.floating_model = (self.input_details[0]['dtype'] == np.float32)

        self.input_mean = 127.5
        self.input_std = 127.5
        
        # QR code detection object
        # self.detector = cv2.QRCodeDetector()
        self.cam_data = ""
        self.object_spotted = False
        self.test_count = 0
        self.killed_thread = False
        self.cam_image = None
        self.cam_image_index = 0
        self.object_spotted_T0 = 0
        self.object_not_spotted_delta_req = 3

        #Initialize the paused flag to false
        self.temp_pause = False

    def camera_read_threader(self):
        #Start the read cam thread
        read_cam = threading.Thread(target=self.read_cam_thread, args=())
        read_cam.start()
        while(self.cam_image_index == 0):
            time.sleep(0.05)
        #Start the image decode thread
        decoder = threading.Thread(target=self.decode_image_thread, args=())
        decoder.start()
        while not self.killed_thread and read_cam.is_alive() and decoder.is_alive():
            time.sleep(0.25)
        #Flag is thrown or error, so ensure flag is thrown and wait for threads to join
        self.killed_thread = True
        read_cam.join()
        decoder.join()

    def decode_image_thread(self):
        previous_index = None
        while not self.killed_thread:
            #Detect and decode the stored image if it's ready
            # t = time.time()
            if(previous_index != self.cam_image_index and (not self.temp_pause)):
                previous_index = self.cam_image_index
                # data, _, _ = self.detector.detectAndDecode(self.cam_image) Deprecated QR Code reader
                data, score = self.detect_main_object(self.cam_image)
                # print("[INFO] Camera objects: " + data)
                # if(data not in grips._value2member_map_):
                #     data = grips.openGrip.value

                #If the camera sees an object, skip the time requirement
                if(data != ""):
                    self.cam_data = data
                    self.object_spotted_T0 = time.time()
                    self.object_spotted = True
                #If the camera doesn't see an object, require a delay before reporting nothing
                else:
                    if((time.time() - self.object_spotted_T0) > self.object_not_spotted_delta_req):
                        # print("[DEBUG] Delta Req passed; reporting no object now")
                        self.cam_data = data
                        self.object_spotted = False
                
                #####No sleep since detecting/decoding takes significant time, just do it as fast as possible
            # print("[INFO] Time to decode image: " + (str(time.time() - t)))
            time.sleep(0.01)

    def detect_main_object(self, frame1):
        min_conf_threshold = 0.35

        # Acquire frame and resize to expected shape [1xHxWx3]
        frame = frame1.copy()
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame_resized = cv2.resize(frame_rgb, (self.width, self.height))
        input_data = np.expand_dims(frame_resized, axis=0)

        # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
        if self.floating_model:
            input_data = (np.float32(input_data) - self.input_mean) / self.input_std

        # Perform the actual detection by running the model with the image as input
        self.interpreter.set_tensor(self.input_details[0]['index'],input_data)
        self.interpreter.invoke()

        # Retrieve detection results
        # boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[0] # Bounding box coordinates of detected objects
        classes = self.interpreter.get_tensor(self.output_details[1]['index'])[0] # Class index of detected objects
        scores = self.interpreter.get_tensor(self.output_details[2]['index'])[0] # Confidence of detected objects

        highest_scoring_label = ""
        highest_score = 0
        for i in range(len(scores)):
            object_name = self.labels[int(classes[i])] # Look up object name from "labels" array using class index
            if((scores[i] > min_conf_threshold) and (scores[i] <= 1.0) and (scores[i] > highest_score) and (object_name in grips._value2member_map_)):
                # Draw label
                highest_scoring_label = object_name
                highest_score = scores[i]

        return (highest_scoring_label, highest_score)

    def read_cam_thread(self):
        while not self.killed_thread:
            if(not self.temp_pause):
                # t = time.time()
                #Get camera image, rescale, and store in class variable
                frame = self.vs.read()
                self.cam_image = imutils.resize(frame, width=400)
                
                #Increase index by 1
                self.cam_image_index += 1
                #Pause temply
                time.sleep(0.2)
                # print("Time to save/resize new image: " + (str(time.time() - t)))

    # def read_cam(self):
    #     # get the image
    #     _, img = self.cap.read() #TODO: #14 Downscale the resolution for faster processing
    #     # get bounding box coords and data
    #     data, bbox, _ = self.detector.detectAndDecode(img)
    #     #Define a parameter we can easily read later if anything is detected
    #     is_object = False
    #     #Update parameter/output the data we found, if any
    #     if data:
    #         #print("data found: ", data)
    #         is_object = True
    #     #return the information we got from the camera
    #     # cv2.imwrite("frame1.jpg", img)     # save frame as JPEG file
    #     return data, bbox, img, is_object

    # def read_cam_display_out(self):
    #     #Call the standard method to get the qr data / bounding box
    #     data, bbox, img, _ = self.read_cam()
    #     # if there is a bounding box, draw one, along with the data
    #     if(bbox is not None):
    #         for i in range(len(bbox)):
    #             cv2.line(img, tuple(bbox[i][0]), tuple(bbox[(i+1) % len(bbox)][0]), color=(255,
    #                     0, 255), thickness=2)
    #         cv2.putText(img, data, (int(bbox[0][0][0]), int(bbox[0][0][1]) - 10), cv2.FONT_HERSHEY_SIMPLEX,
    #                     0.5, (0, 255, 0), 2)
    #         #if data:
    #             #print("data found: ", data)
    #     # display the image preview
    #     cv2.imshow("code detector", img)

    #     # save the image
    #     cv2.imwrite("frame1.jpg", img)     # save frame as JPEG file
    #     #self.count += 1

    def end_camera_session(self):
        #Stop the camera thread 
        self.killed_thread = True
        time.sleep(0.1)
        #Release the camera object
        self.vs.stop()
Ejemplo n.º 22
0
def approximation(limit):
    detect = 0
    MODEL_NAME = 'obj_detection_tflite'
    GRAPH_NAME = 'detect.tflite'
    LABELMAP_NAME = 'labelmap.txt'
    min_conf_threshold = 0.6
    imW, imH = 1280, 720

    pkg = importlib.util.find_spec('tflite_runtime')
    if pkg:
        from tflite_runtime.interpreter import Interpreter

    else:
        from tensorflow.lite.python.interpreter import Interpreter

    CWD_PATH = os.getcwd()

    PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME)
    PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME)

    with open(PATH_TO_LABELS, 'r') as f:
        labels = [line.strip() for line in f.readlines()]

    if labels[0] == '???':
        del (labels[0])

    interpreter = Interpreter(model_path=PATH_TO_CKPT)
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    height = input_details[0]['shape'][1]
    width = input_details[0]['shape'][2]

    floating_model = (input_details[0]['dtype'] == np.float32)

    input_mean = 127.5
    input_std = 127.5

    pi_camera = PiCamera(resolution=(imW, imH), framerate=30).start()
    time.sleep(1)

    p_height = 0
    p_width = 0
    detections = 0
    approximation_detected = False
    timer_mark = timer_start = time.time()
    while timer_mark - timer_start < limit:
        print(timer_mark - timer_start)
        frame1 = pi_camera.read()

        frame = frame1.copy()
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame_resized = cv2.resize(frame_rgb, (width, height))
        input_data = np.expand_dims(frame_resized, axis=0)

        if floating_model:
            input_data = (np.float32(input_data) - input_mean) / input_std

        interpreter.set_tensor(input_details[0]['index'], input_data)
        interpreter.invoke()

        boxes = interpreter.get_tensor(output_details[0]['index'])[0]  # Bounding box coordinates of detections objects
        classes = interpreter.get_tensor(output_details[1]['index'])[0]  # Class index of detections objects
        scores = interpreter.get_tensor(output_details[2]['index'])[0]  # Confidence of detections objects

        for i in range(len(scores)):

            if (scores[i] > min_conf_threshold) and (scores[i] <= 1.0):
                y_min = int(max(1, (boxes[i][0] * imH)))
                x_min = int(max(1, (boxes[i][1] * imW)))
                y_max = int(min(imH, (boxes[i][2] * imH)))
                x_max = int(min(imW, (boxes[i][3] * imW)))
                object_name = labels[int(classes[i])]

                if object_name == 'car' or object_name == 'bus' or object_name == 'truck':
                    detections += 1
                    if (y_max - y_min) > p_height * 1.15 or (x_max - x_min) > p_width * 1.15\
                            and detections > 1:
                        play_sound_notification("waiting")
                        limit += 3

                    p_height = y_max - y_min
                    p_width = x_max - x_min
        
        timer_mark = time.time()

    cv2.destroyAllWindows()
    pi_camera.stop()
Ejemplo n.º 23
0
img = cv2.imread('Capture.png')
h = img.shape[0]
w = img.shape[1]

img = cv2.resize(img, (256, 144))
img = np.asarray(img)
img = img / 255.
img = img.astype(np.float32)
img = img[np.newaxis, :, :, :]

# Tensorflow Lite
interpreter = Interpreter(model_path='model_float16_quant.tflite',
                          num_threads=4)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()[0]['index']
output_details = interpreter.get_output_details()[0]['index']

interpreter.set_tensor(input_details, img)
interpreter.invoke()
output = interpreter.get_tensor(output_details)

print(output.shape)
out1 = output[0][:, :, 0]
out2 = output[0][:, :, 1]

out1 = np.invert((out1 > 0.5) * 255)
out2 = np.invert((out2 > 0.5) * 255)

print('out1:', out1.shape)
print('out2:', out2.shape)
Ejemplo n.º 24
0
def detectPenKey(img):
    # parser = argparse.ArgumentParser()
    # parser.add_argument('--modeldir', help='Folder the .tflite file is located in',
    #                     default='models\\model_objDetec\\penKeyModel')
    # parser.add_argument('--graph', help='Name of the .tflite file, if different than detect.tflite',
    #                     default='model-9020516539576614912_tflite_2021-04-01T07_44_31.691148Z_model.tflite')
    # parser.add_argument('--labels', help='Name of the labelmap file, if different than labelmap.txt',
    #                     default='labels.txt')
    # parser.add_argument('--threshold', help='Minimum confidence threshold for displaying detected objects',
    #                     default=0.5)
    # parser.add_argument('--image', help='Name of the single image to perform detection on. To run detection on multiple images, use --imagedir',
    #                     default=None)
    # parser.add_argument('--imagedir', help='Name of the folder containing images to perform detection on. Folder must contain only images.',
    #                     default=None)
    # parser.add_argument('--edgetpu', help='Use Coral Edge TPU Accelerator to speed up detection',
    #                     action='store_true')
    #
    # args = parser.parse_args()
    listOfObjDetec = []
    MODEL_NAME = "models\\model_objDetec\\penKeyModel"
    GRAPH_NAME = "model-9020516539576614912_tflite_2021-04-01T07_44_31.691148Z_model.tflite"
    LABELMAP_NAME = "labels.txt"
    min_conf_threshold = float(0.5)
    use_TPU = False

    # # Parse input image name and directory.
    # IM_NAME = args.image
    # IM_DIR = args.imagedir
    #
    # # If both an image AND a folder are specified, throw an error
    # if (IM_NAME and IM_DIR):
    #     print('Error! Please only use the --image argument or the --imagedir argument, not both. Issue "python TFLite_detection_image.py -h" for help.')
    #     sys.exit()
    #
    # # If neither an image or a folder are specified, default to using 'test1.jpg' for image name
    # if (not IM_NAME and not IM_DIR):
    #     IM_NAME = 'keys11.jpg'
    #
    # Import TensorFlow libraries
    # If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow
    # If using Coral Edge TPU, import the load_delegate library
    pkg = importlib.util.find_spec('tflite_runtime')
    if pkg:
        from tflite_runtime.interpreter import Interpreter
        if use_TPU:
            from tflite_runtime.interpreter import load_delegate
    else:
        from tensorflow.lite.python.interpreter import Interpreter
        if use_TPU:
            from tensorflow.lite.python.interpreter import load_delegate

    # If using Edge TPU, assign filename for Edge TPU model
    if use_TPU:
        # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite'
        if (GRAPH_NAME == 'detect.tflite'):
            GRAPH_NAME = 'edgetpu.tflite'

    # Get path to current working directory
    CWD_PATH = os.getcwd()

    # Define path to images and grab all image filenames
    # if IM_DIR:
    #     PATH_TO_IMAGES = os.path.join(CWD_PATH,IM_DIR)
    #     images = glob.glob(PATH_TO_IMAGES + '/*')
    #
    # elif IM_NAME:
    #     PATH_TO_IMAGES = os.path.join(CWD_PATH,IM_NAME)
    #     images = glob.glob(PATH_TO_IMAGES)

    # Path to .tflite file, which contains the model that is used for object detection
    PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME)

    # Path to label map file
    PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME)

    # Load the label map
    with open(PATH_TO_LABELS, 'r') as f:
        labels = [line.strip() for line in f.readlines()]

    # Have to do a weird fix for label map if using the COCO "starter model" from
    # https://www.tensorflow.org/lite/models/object_detection/overview
    # First label is '???', which has to be removed.
    if labels[0] == '???':
        del (labels[0])

    # Load the Tensorflow Lite model.
    # If using Edge TPU, use special load_delegate argument
    if use_TPU:
        interpreter = Interpreter(
            model_path=PATH_TO_CKPT,
            experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
        print(PATH_TO_CKPT)
    else:
        interpreter = Interpreter(model_path=PATH_TO_CKPT)

    interpreter.allocate_tensors()

    # Get model details
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    height = input_details[0]['shape'][1]
    width = input_details[0]['shape'][2]

    floating_model = (input_details[0]['dtype'] == np.float32)

    input_mean = 127.5
    input_std = 127.5

    # Loop over every image and perform detection
    # for image_path in images:

    # Load image and resize to expected shape [1xHxWx3]
    image = img
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    imH, imW, _ = image.shape
    image_resized = cv2.resize(image_rgb, (width, height))
    input_data = np.expand_dims(image_resized, axis=0)

    # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
    if floating_model:
        print("hello")
        input_data = (np.float32(input_data) - input_mean) / input_std

    # Perform the actual detection by running the model with the image as input
    interpreter.set_tensor(input_details[0]['index'], input_data)
    interpreter.invoke()

    # Retrieve detection results
    boxes = interpreter.get_tensor(output_details[0]['index'])[
        0]  # Bounding box coordinates of detected objects
    classes = interpreter.get_tensor(
        output_details[1]['index'])[0]  # Class index of detected objects
    scores = interpreter.get_tensor(
        output_details[2]['index'])[0]  # Confidence of detected objects
    #num = interpreter.get_tensor(output_details[3]['index'])[0]  # Total number of detected objects (inaccurate and not needed)

    # Loop over all detections and draw detection box if confidence is above minimum threshold
    for i in range(len(scores)):
        if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):
            # getting label/class
            object_name = labels[int(
                classes[i]
            )]  # Look up object name from "labels" array using class index

            print("detected:", object_name, ":", int(scores[i] * 100))
            listOfObjDetec.append(object_name)

            # debug
            ymin = int(max(1, (boxes[i][0] * imH)))
            xmin = int(max(1, (boxes[i][1] * imW)))
            ymax = int(min(imH, (boxes[i][2] * imH)))
            xmax = int(min(imW, (boxes[i][3] * imW)))

            cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2)

            # Draw label
            # object_name = labels[int(classes[i])]  # Look up object name from "labels" array using class
            label = '%s: %d%%' % (object_name, int(scores[i] * 100)
                                  )  # Example: 'person: 72%'
            labelSize, baseLine = cv2.getTextSize(label,
                                                  cv2.FONT_HERSHEY_SIMPLEX,
                                                  0.7, 2)  # Get font size
            label_ymin = max(
                ymin, labelSize[1] +
                10)  # Make sure not to draw label too close to top of window
            cv2.rectangle(img, (xmin, label_ymin - labelSize[1] - 10),
                          (xmin + labelSize[0], label_ymin + baseLine - 10),
                          (255, 255, 255),
                          cv2.FILLED)  # Draw white box to put label text in
            cv2.putText(img, label, (xmin, label_ymin - 7),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0),
                        2)  # Draw label text
    if listOfObjDetec:
        print(listOfObjDetec)
        objDict = dict(Counter(listOfObjDetec))
        print(objDict)
        strg = "Detected "
        for i in objDict:
            print(i)
            strg += "" + str(objDict[i]) + " " + i + ", "
            print(strg)
            # All the results have been drawn on the image, now display the image
        cv2.imshow('Object detector', img)

        cv2.waitKey(0)
        cv2.destroyAllWindows()
        # Press any key to continue to next image, or press 'q' to quit
        return strg
    else:
        return "No Objects Detected"
Ejemplo n.º 25
0
def get_item_dictionary(): 
    # Define and parse input arguments
    parser = argparse.ArgumentParser()
    # parser.add_argument('--modeldir', help='Folder the .tflite file is located in',
    #                     required=True)



    parser.add_argument('--graph', help='Name of the .tflite file, if different than detect.tflite',
                        default='detect.tflite')
    parser.add_argument('--labels', help='Name of the labelmap file, if different than labelmap.txt',
                        default='labelmap.txt')
    parser.add_argument('--threshold', help='Minimum confidence threshold for displaying detected objects',
                        default=0.5)
    parser.add_argument('--image', help='Name of the single image to perform detection on. To run detection on multiple images, use --imagedir',
                        default=None)
    parser.add_argument('--imagedir', help='Name of the folder containing images to perform detection on. Folder must contain only images.',
                        default=None)
    parser.add_argument('--edgetpu', help='Use Coral Edge TPU Accelerator to speed up detection',
                        action='store_true')

    args = parser.parse_args()

    MODEL_NAME = "Sample_TFLite_model"

    GRAPH_NAME = args.graph
    LABELMAP_NAME = args.labels
    min_conf_threshold = float(args.threshold)
    use_TPU = args.edgetpu

    # Parse input image name and directory. 
    IM_NAME = args.image
    IM_DIR = args.imagedir

    # If both an image AND a folder are specified, throw an error
    if (IM_NAME and IM_DIR):
        print('Error! Please only use the --image argument or the --imagedir argument, not both. Issue "python TFLite_detection_image.py -h" for help.')
        sys.exit()

    # If neither an image or a folder are specified, default to using 'test1.jpg' for image name
    if (not IM_NAME and not IM_DIR):
        import picamera

#         print("about to take a photo")
        with picamera.PiCamera() as camera:
            camera.resolution = (1280,720)
            camera.capture("/home/pi/Desktop/tflite1/test_picam.jpg")
#         print("taken photo")

        IM_NAME = '/home/pi/Desktop/tflite1/CS190_P2/test_image.jpg'

    # Import TensorFlow libraries
    # If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow
    # If using Coral Edge TPU, import the load_delegate library
    pkg = importlib.util.find_spec('tflite_runtime')
    if pkg:
        from tflite_runtime.interpreter import Interpreter
        if use_TPU:
            from tflite_runtime.interpreter import load_delegate
    else:
        from tensorflow.lite.python.interpreter import Interpreter
        if use_TPU:
            from tensorflow.lite.python.interpreter import load_delegate

    # If using Edge TPU, assign filename for Edge TPU model
    if use_TPU:
        # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite'
        if (GRAPH_NAME == 'detect.tflite'):
            GRAPH_NAME = 'edgetpu.tflite'


    # Get path to current working directory
    CWD_PATH = os.getcwd()

    # Define path to images and grab all image filenames
    if IM_DIR:
        PATH_TO_IMAGES = os.path.join(CWD_PATH,IM_DIR)
        images = glob.glob(PATH_TO_IMAGES + '/*')

    elif IM_NAME:
        PATH_TO_IMAGES = os.path.join(CWD_PATH,IM_NAME)
        images = glob.glob(PATH_TO_IMAGES)

    # Path to .tflite file, which contains the model that is used for object detection
    PATH_TO_CKPT = '/home/pi/Desktop/tflite1/CS190_P2/src/edge/Sample_TFLite_model/detect.tflite'

    # Path to label map file
    PATH_TO_LABELS = '/home/pi/Desktop/tflite1/CS190_P2/src/edge/Sample_TFLite_model/labelmap.txt'

    # Load the label map
    with open(PATH_TO_LABELS, 'r') as f:
        labels = [line.strip() for line in f.readlines()]

    # Have to do a weird fix for label map if using the COCO "starter model" from
    # https://www.tensorflow.org/lite/models/object_detection/overview
    # First label is '???', which has to be removed.
    if labels[0] == '???':
        del(labels[0])

    # Load the Tensorflow Lite model.
    # If using Edge TPU, use special load_delegate argument
    if use_TPU:
        interpreter = Interpreter(model_path=PATH_TO_CKPT,
                                  experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
        print(PATH_TO_CKPT)
    else:
        interpreter = Interpreter(model_path=PATH_TO_CKPT)

    interpreter.allocate_tensors()

    # Get model details
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    height = input_details[0]['shape'][1]
    width = input_details[0]['shape'][2]

    floating_model = (input_details[0]['dtype'] == np.float32)

    input_mean = 127.5
    input_std = 127.5
    result_label = []

    # Loop over every image and perform detection
    for image_path in images:

        # Load image and resize to expected shape [1xHxWx3]
        image = cv2.imread(image_path)
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        imH, imW, _ = image.shape 
        image_resized = cv2.resize(image_rgb, (width, height))
        input_data = np.expand_dims(image_resized, axis=0)

        # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
        if floating_model:
            input_data = (np.float32(input_data) - input_mean) / input_std

        # Perform the actual detection by running the model with the image as input
        interpreter.set_tensor(input_details[0]['index'],input_data)
        interpreter.invoke()

        # Retrieve detection results
        boxes = interpreter.get_tensor(output_details[0]['index'])[0] # Bounding box coordinates of detected objects
        classes = interpreter.get_tensor(output_details[1]['index'])[0] # Class index of detected objects
        scores = interpreter.get_tensor(output_details[2]['index'])[0] # Confidence of detected objects
        #num = interpreter.get_tensor(output_details[3]['index'])[0]  # Total number of detected objects (inaccurate and not needed)

        # Loop over all detections and draw detection box if confidence is above minimum threshold
        for i in range(len(scores)):
            if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):

                # Get bounding box coordinates and draw box
                # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
                ymin = int(max(1,(boxes[i][0] * imH)))
                xmin = int(max(1,(boxes[i][1] * imW)))
                ymax = int(min(imH,(boxes[i][2] * imH)))
                xmax = int(min(imW,(boxes[i][3] * imW)))
                
                cv2.rectangle(image, (xmin,ymin), (xmax,ymax), (10, 255, 0), 2)

                # Draw label
                object_name = labels[int(classes[i])] # Look up object name from "labels" array using class index
                if object_name == 'potted plant':
                    object_name = 'pineapple'
                label = '%s: %d%%' % (object_name, int(scores[i]*100)) # Example: 'person: 72%'
                result_label.append(object_name)
                
                labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size
                label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window
                cv2.rectangle(image, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in
                cv2.putText(image, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text

        # All the results have been drawn on the image, now display the image
    #     cv2.imshow('Object detector', image)
        cv2.imwrite("detected_test.jpg", image)
        print(Counter(result_label))

        # Press any key to continue to next image, or press 'q' to quit
    # #     if cv2.waitKey(0) == ord('q'):
    #         break

    # Clean up
#     cv2.destroyAllWindows()
    return dict(Counter(result_label))
Ejemplo n.º 26
0
class ImageDetection:
    def __init__(self, modeldir):
        GRAPH_NAME = 'detect.tflite'
        LABELMAP_NAME = 'labelmap.txt'
        CWD_PATH = os.getcwd()
        PATH_TO_CKPT = os.path.join(CWD_PATH, modeldir, GRAPH_NAME)
        PATH_TO_LABELS = os.path.join(CWD_PATH, modeldir, LABELMAP_NAME)
        with open(PATH_TO_LABELS, 'r') as f:
            self.labels = [line.strip() for line in f.readlines()]
        if self.labels[0] == '???':
            del (self.labels[0])

        self.min_conf_threshold = 0.6
        self.input_mean = 127.5
        self.input_std = 127.5
        self.interpreter = Interpreter(model_path=PATH_TO_CKPT)
        self.interpreter.allocate_tensors()
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()
        self.height = self.input_details[0]['shape'][1]
        self.width = self.input_details[0]['shape'][2]
        self.floating_model = (self.input_details[0]['dtype'] == np.float32)

    def detect(self, image_path):
        image = cv2.imread(image_path)
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        im_h, im_w, _ = image.shape
        image_resized = cv2.resize(image_rgb, (self.width, self.height))
        input_data = np.expand_dims(image_resized, axis=0)
        if self.floating_model:
            input_data = (np.float32(input_data) -
                          self.input_mean) / self.input_std
        self.interpreter.set_tensor(self.input_details[0]['index'], input_data)
        self.interpreter.invoke()

        boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[0]
        classes = self.interpreter.get_tensor(
            self.output_details[1]['index'])[0]
        scores = self.interpreter.get_tensor(
            self.output_details[2]['index'])[0]
        detect_text = ""
        for i in range(len(scores)):
            if self.min_conf_threshold < scores[i] <= 1.0:
                ymin = int(max(1, (boxes[i][0] * im_h)))
                xmin = int(max(1, (boxes[i][1] * im_w)))
                ymax = int(min(im_h, (boxes[i][2] * im_h)))
                xmax = int(min(im_w, (boxes[i][3] * im_w)))
                cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (10, 255, 0),
                              2)

                object_name = self.labels[int(classes[i])]
                label = '%s: %d%%' % (object_name, int(scores[i] * 100))
                label_size, base_line = cv2.getTextSize(
                    label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)
                label_ymin = max(ymin, label_size[1] + 10)
                cv2.rectangle(
                    image, (xmin, label_ymin - label_size[1] - 10),
                    (xmin + label_size[0], label_ymin + base_line - 10),
                    (255, 255, 255), cv2.FILLED)
                cv2.putText(image, label, (xmin, label_ymin - 7),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
                detect_text = detect_text + " " + object_name
        cv2.imshow('Detector', image)
        os.system('echo %s | festival --tts & ' % detect_text)
        sleep(5)
        cv2.destroyAllWindows()
        return
def object_detection():
    label_out = []
    mid_x_out = []
    mid_y_out = []

    class VideoStream:
        """Camera object that controls video streaming from the Picamera"""
        def __init__(self, resolution=(640, 480), framerate=30):
            # Initialize the PiCamera and the camera image stream
            self.stream = cv2.VideoCapture(0)
            ret = self.stream.set(cv2.CAP_PROP_FOURCC,
                                  cv2.VideoWriter_fourcc(*'MJPG'))
            ret = self.stream.set(3, resolution[0])
            ret = self.stream.set(4, resolution[1])

            # Read first frame from the stream
            (self.grabbed, self.frame) = self.stream.read()

            # Variable to control when the camera is stopped
            self.stopped = False

        def start(self):
            # Start the thread that reads frames from the video stream
            Thread(target=self.update, args=()).start()
            return self

        def update(self):
            # Keep looping indefinitely until the thread is stopped
            while True:
                # If the camera is stopped, stop the thread
                if self.stopped:
                    # Close camera resources
                    self.stream.release()
                    return

                # Otherwise, grab the next frame from the stream
                (self.grabbed, self.frame) = self.stream.read()

        def read(self):
            # Return the most recent frame
            return self.frame

        def stop(self):
            # Indicate that the camera and thread should be stopped
            self.stopped = True

    # Define and parse input arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('--modeldir',
                        help='Folder the .tflite file is located in',
                        required=True)
    parser.add_argument(
        '--graph',
        help='Name of the .tflite file, if different than detect.tflite',
        default='detect.tflite')
    parser.add_argument(
        '--labels',
        help='Name of the labelmap file, if different than labelmap.txt',
        default='labelmap.txt')
    parser.add_argument(
        '--threshold',
        help='Minimum confidence threshold for displaying detected objects',
        default=0.5)
    parser.add_argument(
        '--resolution',
        help=
        'Desired webcam resolution in WxH. If the webcam does not support the resolution entered, errors may occur.',
        default='1280x720')
    parser.add_argument(
        '--edgetpu',
        help='Use Coral Edge TPU Accelerator to speed up detection',
        action='store_true')

    args = parser.parse_args()

    MODEL_NAME = args.modeldir
    GRAPH_NAME = args.graph
    LABELMAP_NAME = args.labels
    min_conf_threshold = float(args.threshold)
    resW, resH = args.resolution.split('x')
    imW, imH = int(resW), int(resH)
    use_TPU = args.edgetpu

    # Import TensorFlow libraries
    # If tensorflow is not installed, import interpreter from tflite_runtime, else import from regular tensorflow
    # If using Coral Edge TPU, import the load_delegate library
    pkg = importlib.util.find_spec('tensorflow')
    if pkg is None:
        from tflite_runtime.interpreter import Interpreter
        if use_TPU:
            from tflite_runtime.interpreter import load_delegate
    else:
        from tensorflow.lite.python.interpreter import Interpreter
        if use_TPU:
            from tensorflow.lite.python.interpreter import load_delegate

    if use_TPU:
        # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite'
        if (GRAPH_NAME == 'detect.tflite'):
            GRAPH_NAME = 'edgetpu.tflite'

    # Get path to current working directory
    CWD_PATH = os.getcwd()

    # Path to .tflite file, which contains the model that is used for object detection
    PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME)

    # Path to label map file
    PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME)

    # Load the label map
    with open(PATH_TO_LABELS, 'r') as f:
        labels = [line.strip() for line in f.readlines()]

    if labels[0] == '???':
        del (labels[0])

    # Load the Tensorflow Lite model.
    if use_TPU:
        interpreter = Interpreter(
            model_path=PATH_TO_CKPT,
            experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
        print(PATH_TO_CKPT)
    else:
        interpreter = Interpreter(model_path=PATH_TO_CKPT)

    interpreter.allocate_tensors()

    # Get model details
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    height = input_details[0]['shape'][1]
    width = input_details[0]['shape'][2]

    floating_model = (input_details[0]['dtype'] == np.float32)

    input_mean = 127.5
    input_std = 127.5

    # Initialize frame rate calculation
    frame_rate_calc = 1
    freq = cv2.getTickFrequency()

    # Initialize video stream
    videostream = VideoStream(resolution=(imW, imH), framerate=30).start()
    time.sleep(1)

    #for frame1 in camera.capture_continuous(rawCapture, format="bgr",use_video_port=True):
    while True:
        flag = 0
        # Start timer (for calculating frame rate)
        t1 = cv2.getTickCount()

        # Grab frame from video stream
        frame1 = videostream.read()

        # Acquire frame and resize to expected shape [1xHxWx3]
        frame = frame1.copy()
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame_resized = cv2.resize(frame_rgb, (width, height))
        input_data = np.expand_dims(frame_resized, axis=0)

        # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
        if floating_model:
            input_data = (np.float32(input_data) - input_mean) / input_std

        # Perform the actual detection by running the model with the image as input
        interpreter.set_tensor(input_details[0]['index'], input_data)
        interpreter.invoke()

        # Retrieve detection results
        boxes = interpreter.get_tensor(output_details[0]['index'])[
            0]  # Bounding box coordinates of detected objects
        classes = interpreter.get_tensor(
            output_details[1]['index'])[0]  # Class index of detected objects
        scores = interpreter.get_tensor(
            output_details[2]['index'])[0]  # Confidence of detected objects
        #num = interpreter.get_tensor(output_details[3]['index'])[0]  # Total number of detected objects (inaccurate and not needed)

        # Loop over all detections and draw detection box if confidence is above minimum threshold
        for i in range(len(scores)):
            if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):

                # Get bounding box coordinates and draw box
                # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
                ymin = int(max(1, (boxes[i][0] * imH)))
                xmin = int(max(1, (boxes[i][1] * imW)))
                ymax = int(min(imH, (boxes[i][2] * imH)))
                xmax = int(min(imW, (boxes[i][3] * imW)))

                cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0),
                              2)
                cv2.circle(frame, (xmin, ymin), 5, (255, 255, 0), cv2.FILLED)
                cv2.circle(frame, (xmax, ymax), 5, (0, 255, 255), cv2.FILLED)
                x_diff = xmax - xmin
                y_diff = ymax - ymin
                mid_x = x_diff / 2 + xmin
                mid_x = math.ceil(mid_x)
                mid_y = ymin + y_diff / 2
                mid_y = math.ceil(mid_y)
                cv2.circle(frame, (0, 0), 5, (0, 0, 255), cv2.FILLED)
                cv2.circle(frame, (mid_x, mid_y), 5, (255, 255, 255),
                           cv2.FILLED)

                # Draw label
                object_name = labels[int(
                    classes[i]
                )]  # Look up object name from "labels" array using class index
                label = '%s: %d%%' % (object_name, int(scores[i] * 100)
                                      )  # Example: 'person: 72%'
                labelSize, baseLine = cv2.getTextSize(label,
                                                      cv2.FONT_HERSHEY_SIMPLEX,
                                                      0.7, 2)  # Get font size
                label_ymin = max(
                    ymin, labelSize[1] + 10
                )  # Make sure not to draw label too close to top of window
                cv2.rectangle(
                    frame, (xmin, label_ymin - labelSize[1] - 10),
                    (xmin + labelSize[0], label_ymin + baseLine - 10),
                    (255, 255, 255),
                    cv2.FILLED)  # Draw white box to put label text in
                cv2.putText(frame, label, (xmin, label_ymin - 7),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0),
                            2)  # Draw label text
                label_out.append(label)
                mid_x_out.append(mid_x)
                mid_y_out.append(mid_y)
        # Draw framerate in corner of frame
        cv2.putText(frame, 'FPS: {0:.2f}'.format(frame_rate_calc), (30, 50),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2, cv2.LINE_AA)

        # All the results have been drawn on the frame, so it's time to display it.
        #cv2.imshow('Object detector', frame)

        # Calculate framerate
        t2 = cv2.getTickCount()
        time1 = (t2 - t1) / freq
        frame_rate_calc = 1 / time1
        (h, w) = frame.shape[:2]
        cv2.waitKey(100)
        break
    # Clean up
    cv2.destroyAllWindows()
    videostream.stop()
    return (label_out, mid_x_out, mid_y_out, h / 2, w / 2)
Ejemplo n.º 28
0
class Detection:
    def __init__(self):
        self.MODEL_NAME = "detect"
        self.GRAPH_NAME = "detect.tflite"
        self.LABELMAP_NAME = "label_map.txt"
        self.min_conf_threshold = 0.70
        self.resW, self.resH = (1280, 720)
        self.imW, self.imH = int(self.resW), int(self.resH)
        # self.use_TPU = (True if 'projects' in str(os.getcwd()) else False)
        self.use_TPU = False
        self.frame_rate_calc = None
        self.item_detected = False
        self.latest_item = None

        self.detection_counter = [
            {
                "name": "apple",
                "counter": 0
            },
            {
                "name": "aubergine",
                "counter": 0
            },
            {
                "name": "banana",
                "counter": 0
            },
            {
                "name": "broccoli",
                "counter": 0
            },
            {
                "name": "cucumber",
                "counter": 0
            },
            {
                "name": "orange",
                "counter": 0
            },
            {
                "name": "paprika",
                "counter": 0
            },
            {
                "name": "pear",
                "counter": 0
            }
        ]

        # Import TFLite requirements
        self.pkg = importlib.util.find_spec('tflite_runtime')
        if self.pkg:
            from tflite_runtime.interpreter import Interpreter
            if self.use_TPU:
                from tflite_runtime.interpreter import load_delegate
        else:
            from tensorflow.lite.python.interpreter import Interpreter
            if self.use_TPU:
                from tensorflow.lite.python.interpreter import load_delegate

        # If using Edge TPU, assign filename for Edge TPU model
        if self.use_TPU:
            # If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite'
            if (self.GRAPH_NAME == 'detect.tflite'):
                self.GRAPH_NAME = 'edgetpu.tflite'

        # Get path to current working directory
        CWD_PATH = os.getcwd()

        PATH_TO_CKPT = "/home/pi/projects/smartcart-device/dojo/tflite/{}".format(self.GRAPH_NAME)

        PATH_TO_LABELS = "/home/pi/projects/smartcart-device/dojo/tflite/{}".format(
            self.LABELMAP_NAME)

        PATH_TO_OBJ_NAMES = "/home/pi/projects/smartcart-device/dojo/yolo/yolov4_smartcart/tflite/coco.names"
        # Load the label map
        with open(PATH_TO_LABELS, 'r') as f:
            self.labels = [line.strip() for line in f.readlines()]

        # Fix for potential label map issue
        if self.labels[0] == '???':
            del (self.labels[0])

        if self.use_TPU:
            self.interpreter = Interpreter(model_path=PATH_TO_CKPT,
                                           experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
            print(PATH_TO_CKPT)
        else:
            self.interpreter = Interpreter(model_path=PATH_TO_CKPT)

        self.interpreter.allocate_tensors()
        
        print("Model loaded and tensors allocated")

        # Get model details
        self.input_details = self.interpreter.get_input_details()
        #print("Input details: {}".format(self.input_details))
        self.output_details = self.interpreter.get_output_details()
        #print("Output detais: {}".format(self.output_details))
        self.height = self.input_details[0]['shape'][1]
        self.width = self.input_details[0]['shape'][2]

        self.floating_model = (self.input_details[0]['dtype'] == np.float32)

        self.input_mean = 127.5
        self.input_std = 127.5

        # Initialize frame rate calculation
        self.frame_rate_calc = 1
        self.freq = cv2.getTickFrequency()

        # Initialize video stream
        self.videostream = VideoStream(resolution=(self.imW, self.imH))
        self.videostream = self.videostream.start()

    def filter_boxes(self, box_xywh, scores, score_threshold=0.4, input_shape=tf.constant([416, 416])):
        scores_max = tf.math.reduce_max(scores, axis=-1)

        mask = scores_max >= score_threshold
        class_boxes = tf.boolean_mask(box_xywh, mask)
        pred_conf = tf.boolean_mask(scores, mask)
        class_boxes = tf.reshape(class_boxes, [tf.shape(scores)[0], -1, tf.shape(class_boxes)[-1]])
        pred_conf = tf.reshape(pred_conf, [tf.shape(scores)[0], -1, tf.shape(pred_conf)[-1]])

        box_xy, box_wh = tf.split(class_boxes, (2, 2), axis=-1)

        input_shape = tf.cast(input_shape, dtype=tf.float32)

        box_yx = box_xy[..., ::-1]
        box_hw = box_wh[..., ::-1]

        box_mins = (box_yx - (box_hw / 2.)) / input_shape
        box_maxes = (box_yx + (box_hw / 2.)) / input_shape
        boxes = tf.concat([
            box_mins[..., 0:1],  # y_min
            box_mins[..., 1:2],  # x_min
            box_maxes[..., 0:1],  # y_max
            box_maxes[..., 1:2]  # x_max
        ], axis=-1)
        # return tf.concat([boxes, pred_conf], axis=-1)
        return (boxes, pred_conf)

    def read_class_names(self, class_file_name):
        names = {}
        with open(class_file_name, 'r') as data:
            for ID, name in enumerate(data):
                names[ID] = name.strip('\n')
        return names

    # TODO: Definde cfg.YOLO.CLASSES
    def draw_bbox(self, image, bboxes, classes, show_label=True):
        num_classes = len(classes)
        image_h, image_w, _ = image.shape
        hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)]
        colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))

        random.seed(0)
        random.shuffle(colors)
        random.seed(None)

        out_boxes, out_scores, out_classes, num_boxes = bboxes
        for i in range(num_boxes[0]):
            if int(out_classes[0][i]) < 0 or int(out_classes[0][i]) > num_classes: continue
            coor = out_boxes[0][i]
            coor[0] = int(coor[0] * image_h)
            coor[2] = int(coor[2] * image_h)
            coor[1] = int(coor[1] * image_w)
            coor[3] = int(coor[3] * image_w)

            fontScale = 0.5
            score = out_scores[0][i]
            class_ind = int(out_classes[0][i])
            bbox_color = colors[class_ind]
            bbox_thick = int(0.6 * (image_h + image_w) / 600)
            c1, c2 = (coor[1], coor[0]), (coor[3], coor[2])
            cv2.rectangle(image, c1, c2, bbox_color, bbox_thick)

            if show_label:
                bbox_mess = '%s: %.2f' % (classes[class_ind], score)
                t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick // 2)[0]
                c3 = (c1[0] + t_size[0], c1[1] - t_size[1] - 3)
                cv2.rectangle(image, c1, (np.float32(c3[0]), np.float32(c3[1])), bbox_color, -1)  # filled

                cv2.putText(image, bbox_mess, (c1[0], np.float32(c1[1] - 2)), cv2.FONT_HERSHEY_SIMPLEX,
                            fontScale, (0, 0, 0), bbox_thick // 2, lineType=cv2.LINE_AA)
        return image

    def perform(self):
        while True:
            t1 = cv2.getTickCount()

            frame1 = self.videostream.read()
            
            print("Frame read from stream")

            frame = frame1.copy()
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            # frame_resized = cv2.resize(frame_rgb, (self.width, self.height))
            # input_data = np.expand_dims(frame_resized, axis=0)

            image_data = cv2.resize(frame, (608, 608))
            image_data = image_data / 255.

            images_data = []
            for i in range(1):
                images_data.append(image_data)
            images_data = np.asarray(images_data).astype(np.float32)

            # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
            # if self.floating_model:
            # input_data = (np.float32(input_data) - self.input_mean) / self.input_std

            # Perform the actual detection by running the model with the image as input
            self.interpreter.set_tensor(self.input_details[0]['index'], images_data)
            print("Performing detection")
            self.interpreter.invoke()
            print("Detection performed")
            pred = [self.interpreter.get_tensor(self.output_details[i]['index']) for i in
                    range(len(self.output_details))]
            boxes, pred_conf = self.filter_boxes(pred[0], pred[1], score_threshold=0.25,
                                                 input_shape=tf.constant([608, 608]))

            boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
                boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
                scores=tf.reshape(
                    pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
                max_output_size_per_class=50,
                max_total_size=50,
                iou_threshold=0.3,  # TODO: Make var
                score_threshold=0.3  # TODO: Make var
            )
            pred_bbox = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()]
            class_names = self.read_class_names(
                "/home/pi/projects/smartcart-device/dojo/yolo/yolov4_smartcart/tflite/coco.names")
            print("Drawing bounding boxes")
            frame = self.draw_bbox(frame, pred_bbox, class_names)
            #frame = Image.fromarray(frame.astype(np.uint8))

           # cv2.imshow('Object detector',frame.astype(np.uint8))
            time.sleep(5)
            image = cv2.cvtColor(np.array(frame), cv2.COLOR_BGR2RGB)

            if cv2.waitKey(1) == ord('x'):
                break

            if self.item_detected:
                break

        return self.item_detected, self.latest_item

    def run(self, cloud=False):
        #while True:
            # for frame1 in camera.capture_continuous(rawCapture, format="bgr",use_video_port=True):

            # Start timer (for calculating frame rate)
            t1 = cv2.getTickCount()

            # Grab frame from video stream
            frame1 = self.videostream.read()

            # Acquire frame and resize to expected shape [1xHxWx3]
            frame = frame1.copy()
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_resized = cv2.resize(frame_rgb, (self.width, self.height))

            if cloud:
                # TODO: Send image to cloud and get data back
                content_type = 'image/jpeg'
                headers = {'content-type': content_type}

                _, img_encoded = cv2.imencode('.jpg', frame_rgb)
                request_address = "http://a24dcb00998c.ngrok.io/api/detect"
                # send http request with image and receive response
                print("Sending image to cloud api and awaiting response")
                response = requests.post(request_address, data=img_encoded.tostring(), headers=headers)
                print("Response received:")
                print(json.loads(response.text))

            else:
                input_data = np.expand_dims(frame_resized, axis=0)

                # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
                if self.floating_model:
                    input_data = (np.float32(input_data) - self.input_mean) / self.input_std

                # Perform the actual detection by running the model with the image as input
                self.interpreter.set_tensor(self.input_details[0]['index'], input_data)
                #print("Detection started")
                self.interpreter.invoke()
                #print("Detection complete")

                # Retrieve detection results
                #print(self.output_details)
                boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[0]  # Bounding coordinates of objects
                classes = self.interpreter.get_tensor(self.output_details[1]['index'])[0]  # Class index of detected objects
                scores = self.interpreter.get_tensor(self.output_details[2]['index'])[0]  # Confidence of detected objects
                num = self.interpreter.get_tensor(self.output_details[3]['index'])[0]
                # Total number of detected objects (inaccurate and not needed)

            max_score = 0

            # Loop over all detections and draw detection box if confidence is above minimum threshold
            for i in range(len(scores)):
                if ((scores[i] > self.min_conf_threshold) and (scores[i] <= 1.0)):
                    # Specify that item has been detected
                    #self.item_detected = True
                    #if scores[i] > max_score:
                        #max_score = scores[i]
                        #self.latest_item = self.labels[int(classes[i])]


                    # Get bounding box coordinates and draw box
                    # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
                    ymin = int(max(1, (boxes[i][0] * self.imH)))
                    xmin = int(max(1, (boxes[i][1] * self.imW)))
                    ymax = int(min(self.imH, (boxes[i][2] * self.imH)))
                    xmax = int(min(self.imW, (boxes[i][3] * self.imW)))

                    cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2)

                    # Draw label
                    object_name = self.labels[int(classes[i])]  # Look up object name from "labels" array using class index
                    self.increase_detection_counter(object_name, scores[i])
                    label = '%s: %d%%' % (object_name, int(scores[i] * 100))  # Example: 'person: 72%'
                    labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)  # Get font size
                    label_ymin = max(ymin, labelSize[1] + 10)  # Make sure not to draw label too close to top of window
                    cv2.rectangle(frame, (xmin, label_ymin - labelSize[1] - 10),
                                  (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255),
                                  cv2.FILLED)  # Draw white box to put label text in
                    cv2.putText(frame, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0),
                                2)  # Draw label text

            # Draw framerate in corner of frame
            cv2.putText(frame, 'FPS: {0:.2f}'.format(self.frame_rate_calc), (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1,
                        (255, 255, 0), 2,
                        cv2.LINE_AA)

            # All the results have been drawn on the frame, so it's time to display it.
            cv2.imshow('Object detector', frame)
        
            if cv2.waitKey(1) == ord('x'):
                    cv2.destroyAllWindows()
                    #break
        
            # Calculate framerate
            t2 = cv2.getTickCount()
            time1 = (t2 - t1) / self.freq
            self.frame_rate_calc = 1 / time1

            self.item_detected, self.latest_item = self.get_object_with_score_five()

            if self.item_detected:
                self.reset_detection_counter()

            return self.item_detected, self.latest_item

    def increase_detection_counter(self, detected_item, score):
        for object in self.detection_counter:
            if object["name"] == detected_item:
                object["counter"]+=score


    def get_object_with_score_five(self):
        max_score = 0
        latest_object = "None"
        detected_object = False
        for object in self.detection_counter:
            if object["counter"] >= 5 and object["counter"] > max_score:
                latest_object = object["name"]
                detected_object = True
                max_score = object["counter"]
        return detected_object, latest_object

    def reset_detection_counter(self):
        self.detection_counter = [
            {
                "name": "apple",
                "counter": 0
            },
            {
                "name": "aubergine",
                "counter": 0
            },
            {
                "name": "banana",
                "counter": 0
            },
            {
                "name": "broccoli",
                "counter": 0
            },
            {
                "name": "cucumber",
                "counter": 0
            },
            {
                "name": "orange",
                "counter": 0
            },
            {
                "name": "paprika",
                "counter": 0
            },
            {
                "name": "pear",
                "counter": 0
            }
        ]

    def destroy(self):
        # Clean up
        cv2.destroyAllWindows()
        self.videostream.stop()
def objectsCount(MODEL_NAME, GRAPH_NAME, LABELMAP_NAME, min_conf_threshold,
                 use_TPU, IM_NAME, IM_DIR):
    import os
    import cv2
    import numpy as np
    import sys
    import glob
    import importlib.util

    # If both an image AND a folder are specified, throw an error
    if (IM_NAME and IM_DIR):
        print(
            'Error! Please only use the --image argument or the --imagedir argument, not both. Issue "python TFLite_detection_image.py -h" for help.'
        )
        sys.exit()

    # If neither an image or a folder are specified, default to using 'test1.jpg' for image name
    if (not IM_NAME and not IM_DIR):
        IM_NAME = 'test1.jpg'

    # Import TensorFlow libraries
    # If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow
    # If using Coral Edge TPU, import the load_delegate library
    pkg = importlib.util.find_spec('tflite_runtime')
    if pkg:
        from tflite_runtime.interpreter import Interpreter
        if use_TPU:
            from tflite_runtime.interpreter import load_delegate
    else:
        from tensorflow.lite.python.interpreter import Interpreter
        if use_TPU:
            from tensorflow.lite.python.interpreter import load_delegate

    # Get path to current working directory
    CWD_PATH = os.getcwd()

    # Define path to images and grab all image filenames
    if IM_DIR:
        PATH_TO_IMAGES = os.path.join(CWD_PATH, IM_DIR)
        images = glob.glob(PATH_TO_IMAGES + '/*')

    elif IM_NAME:
        PATH_TO_IMAGES = os.path.join(CWD_PATH, IM_NAME)
        images = glob.glob(PATH_TO_IMAGES)

    # Path to .tflite file, which contains the model that is used for object detection
    PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME)

    # Path to label map file
    PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME)

    # Load the label map
    with open(PATH_TO_LABELS, 'r') as f:
        labels = [line.strip() for line in f.readlines()]

    # Have to do a weird fix for label map if using the COCO "starter model" from
    # https://www.tensorflow.org/lite/models/object_detection/overview
    # First label is '???', which has to be removed.
    if labels[0] == '???':
        del (labels[0])

    # Load the Tensorflow Lite model.
    # If using Edge TPU, use special load_delegate argument
    if use_TPU:
        interpreter = Interpreter(
            model_path=PATH_TO_CKPT,
            experimental_delegates=[load_delegate('libedgetpu.so.1.0')])

    else:
        interpreter = Interpreter(model_path=PATH_TO_CKPT)

    interpreter.allocate_tensors()

    # Get model details
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    height = input_details[0]['shape'][1]
    width = input_details[0]['shape'][2]

    floating_model = (input_details[0]['dtype'] == np.float32)

    input_mean = 127.5
    input_std = 127.5

    objects_list = {
    }  #create the dictionary where the traffic names and number of cars detected will be saved

    # Loop over every image and perform detection
    for image_path in images:

        # Load image and resize to expected shape [1xHxWx3]
        image = cv2.imread(image_path)
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        imH, imW, _ = image.shape
        image_resized = cv2.resize(image_rgb, (width, height))
        input_data = np.expand_dims(image_resized, axis=0)

        # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
        if floating_model:
            input_data = (np.float32(input_data) - input_mean) / input_std

        # Perform the actual detection by running the model with the image as input
        interpreter.set_tensor(input_details[0]['index'], input_data)
        interpreter.invoke()

        # Retrieve detection results
        boxes = interpreter.get_tensor(output_details[0]['index'])[
            0]  # Bounding box coordinates of detected objects
        classes = interpreter.get_tensor(
            output_details[1]['index'])[0]  # Class index of detected objects
        scores = interpreter.get_tensor(
            output_details[2]['index'])[0]  # Confidence of detected objects
        #num = interpreter.get_tensor(output_details[3]['index'])[0]  # Total number of detected objects (inaccurate and not needed)

        objects_count = 0  #instantiate detected object counts

        # Loop over all detections and draw detection box if confidence is above minimum threshold
        for i in range(len(scores)):
            if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):

                # Draw label
                object_name = labels[int(
                    classes[i]
                )]  # Look up object name from "labels" array using class index
                if (object_name == 'car'):
                    objects_count = objects_count + 1  #get the count of cars detected in the image

        objects_list[image_path] = objects_count
    return (objects_list)
Ejemplo n.º 30
0
class CamDetect:
    def __init__(self, camera):
        self.camera = camera
        MODEL_DIR = 'models/coco_ssd_1'
        MODEL_NAME = 'detect.tflite'
        LABELMAP_NAME = 'labelmap.txt'
        PATH_TO_MODEL = os.path.join(MODEL_DIR, MODEL_NAME)
        PATH_TO_LABELS = os.path.join(MODEL_DIR, LABELMAP_NAME)

        self.min_conf_threshold = .40

        # Load Labels
        with open(PATH_TO_LABELS, 'r') as f:
            self.labels = [line.strip() for line in f.readlines()]
        # Have to do a weird fix for label map if using the COCO "starter model" from
        # https://www.tensorflow.org/lite/models/object_detection/overview
        # First label is '???', which has to be removed.
        if self.labels[0] == '???':
            del (self.labels[0])

        # Load Model
        self.interpreter = Interpreter(model_path=PATH_TO_MODEL)
        self.interpreter.allocate_tensors()
        # Get model details
        self.input_details = self.interpreter.get_input_details()
        self.output_details = self.interpreter.get_output_details()

        self.height = self.input_details[0]['shape'][1]
        self.width = self.input_details[0]['shape'][2]
        print('MODEL IMAGE SHAPE: ', self.input_details[0]['shape'])

        self.floating_model = (self.input_details[0]['dtype'] == np.float32)

        self.obj_flag = False  # True for found new objects
        self.new_item = None

        self.ON = True  # A False value ends the barcode loop and the thread
        self.running = True
        self.detection_thread = threading.Thread(target=self.detection_loop, name="detection_loop")
        self.detection_thread.start()
        print("Object Detection initialized")

    def __call__(self):
        if self.obj_flag:
            self.obj_flag = False
            return True
        return False

    # Get the latest item based on upc scan search
    def get_item(self):
        return self.new_item

    def close(self):  # Clean up
        self.ON = False
        cv2.destroyAllWindows()

    def detection_loop(self):
        print("detection loop started")
        while self.ON:
            # Read frame from camera connection
            frame = self.camera.read_frame()

            in_height, in_width, in_channels = frame.shape
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_resized = cv2.resize(frame_rgb, (self.width, self.height))
            input_data = np.expand_dims(frame_resized, axis=0)
            # Normalize pixel values if using a floating model (i.e. if model is non-quantized)
            if self.floating_model:
                input_data = (np.float32(input_data) - 127.5) / 127.5
                print('Floating model')
            # Perform the actual detection by running the model with the image as input
            self.interpreter.set_tensor(self.input_details[0]['index'], input_data)
            self.interpreter.invoke()
            # Retrieve detection results
            boxes = self.interpreter.get_tensor(self.output_details[0]['index'])[0]  # Bounding box coordinates of detected objects
            classes = self.interpreter.get_tensor(self.output_details[1]['index'])[0]  # Class index of detected objects
            scores = self.interpreter.get_tensor(self.output_details[2]['index'])[0]  # Confidence of detected objects

            # Loop over all detections and draw detection box if confidence is above minimum threshold
            ilist = [51, 52, 53, 54, 55, 56, 57, 58, 59, 60]
            object_name = ' '
            top_score = 0
            top_name = ' '
            last_name = ' '
            for i in range(len(scores)):
                if ((int(classes[i]) in ilist) and (scores[i] > self.min_conf_threshold) and (scores[i] <= 1.0)):
                    # Get bounding box coordinates and draw box
                    # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
                    ymin = int(max(0, boxes[i][0]) * in_height)
                    xmin = int(max(0, boxes[i][1]) * in_width)
                    ymax = int(min(1, boxes[i][2]) * in_height)
                    xmax = int(min(1, boxes[i][3]) * in_width)
                    cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0), 1)

                    # Draw label into **input frame**
                    object_name = self.labels[int(classes[i])]  # Look up object name from "labels" array using class index
                    item_txt = '%s: %d%%' % (object_name, int(scores[i] * 100))  # Example: 'person: 72%'
                    labelSize, baseLine = cv2.getTextSize(item_txt, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)  # Get font size
                    label_ymin = max(ymin, labelSize[1] + 10)  # Make sure not to draw label too close to top of window
                    cv2.rectangle(frame,
                                  (xmin, label_ymin - labelSize[1] - 10),
                                  (xmin + labelSize[0], label_ymin + baseLine - 10),
                                  (255, 255, 255),
                                  cv2.FILLED)  # Draw white box to put label text in
                    cv2.putText(frame, item_txt,
                                (xmin, label_ymin - 7),
                                cv2.FONT_HERSHEY_SIMPLEX,
                                0.5, (0, 0, 0), 2)  # Draw label text

                    # Draw circle in center
                    xcenter = xmin + (int(round((xmax - xmin) / 2)))
                    ycenter = ymin + (int(round((ymax - ymin) / 2)))
                    cv2.circle(frame, (xcenter, ycenter), 5, (0, 0, 255), thickness=-1)
                    if scores[i] > top_score:
                        top_score = scores[i]
                        top_name = self.labels[int(classes[i])]
                    # Print info
                    print('Object ', str(classes[i]), ': ', object_name, 'score:', scores[i])

            # SELECT WHICH ITEM GETS SAVED
            if top_name != last_name:
                last_name = top_name
                object_item = parsepy.item()
                object_item.name = top_name
                object_item.upc = ' '
                object_item.imageURL = ' '
                print("writing new object", object_item.name)
                self.new_item = object_item
                self.obj_flag = True

            # All the results have been drawn on the frame, so it's time to display it.
            if SHOW_DETECTION_VIDEO and in_height > 0:
                out_scale_fct = 1
                frame = cv2.resize(frame, (int(in_width * out_scale_fct), int(in_height * out_scale_fct)))
                frame = cv2.normalize(frame, frame, 0, 255, cv2.NORM_MINMAX)
                cv2.imshow('Objects', frame)
                cv2.moveWindow('Objects', 10, 10)
                cv2.waitKey(200)